init sources

This commit is contained in:
2017-09-27 21:15:46 +02:00
parent 72164ae8f1
commit e655faaf62
32 changed files with 1110702 additions and 0 deletions

8
src/DoresA/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
/.idea/
/bin/
/data/
/data_test/
/include/
/lib/
/__pycache__/

71
src/DoresA/db.py Normal file
View File

@@ -0,0 +1,71 @@
import MySQLdb as mariadb
from pymongo import MongoClient
mongo_client = MongoClient('localhost', 27017)
db = mongo_client.doresa
pdns_logs_mongo = db.pdns_logs
sql_connection = mariadb.connect(user='doresa', passwd='3qfACEZzbXY4b', db='doresa')
sql_cursor = sql_connection.cursor()
def mariadb_insert_log(csv_entry):
insert_sql = 'INSERT INTO pdns_logs (timestamp, domain, type, record, ttl) VALUES (%s, %s, %s, %s, %s)'
values = (csv_entry[0], csv_entry[1], csv_entry[2], csv_entry[3], csv_entry[4])
sql_cursor.execute(insert_sql, values)
sql_connection.commit()
def mariadb_insert_logs(csv_entries):
inserts_sql = 'INSERT INTO pdns_logs (timestamp, domain, type, record, ttl) VALUES '
for i in range(len(csv_entries) - 1):
inserts_sql += '(%s, %s, %s, %s, %s), '
inserts_sql += '(%s, %s, %s, %s, %s)'
values = []
for csv_entry in csv_entries:
values += [csv_entry[0], csv_entry[1], csv_entry[2], csv_entry[3], csv_entry[4]]
sql_cursor.execute(inserts_sql, values)
sql_connection.commit()
def mariadb_create_table():
create_table = """
CREATE TABLE pdns_logs (
id INTEGER AUTO_INCREMENT PRIMARY KEY,
timestamp VARCHAR(50),
domain VARCHAR(255),
type VARCHAR(50),
record VARCHAR(255),
ttl INTEGER);"""
sql_cursor.execute(create_table)
def mongodb_insert_log(log_entry):
db_entry = {'timestamp': log_entry[0], 'domain': log_entry[1], 'type': log_entry[2], 'record': log_entry[3], 'ttl': log_entry[4]}
pdns_logs_mongo.insert_one(db_entry)
def mongodb_insert_logs(log_entries):
db_entries = []
for log_entry in log_entries:
db_entries.append(
{'timestamp': log_entry[0], 'domain': log_entry[1], 'type': log_entry[2], 'record': log_entry[3], 'ttl': log_entry[4]}
)
pdns_logs_mongo.insert_many(db_entries)
def close():
# mariadb
sql_cursor.close()
sql_connection.close()
# mongodb
mongo_client.close()

180
src/DoresA/detect_cusum.py Normal file
View File

@@ -0,0 +1,180 @@
"""Cumulative sum algorithm (CUSUM) to detect abrupt changes in data."""
from __future__ import division, print_function
import numpy as np
__author__ = 'Marcos Duarte, https://github.com/demotu/BMC'
__version__ = "1.0.4"
__license__ = "MIT"
def detect_cusum(x, threshold=1, drift=0, ending=False, show=True, ax=None):
"""Cumulative sum algorithm (CUSUM) to detect abrupt changes in data.
Parameters
----------
x : 1D array_like
data.
threshold : positive number, optional (default = 1)
amplitude threshold for the change in the data.
drift : positive number, optional (default = 0)
drift term that prevents any change in the absence of change.
ending : bool, optional (default = False)
True (1) to estimate when the change ends; False (0) otherwise.
show : bool, optional (default = True)
True (1) plots data in matplotlib figure, False (0) don't plot.
ax : a matplotlib.axes.Axes instance, optional (default = None).
Returns
-------
ta : 1D array_like [indi, indf], int
alarm time (index of when the change was detected).
tai : 1D array_like, int
index of when the change started.
taf : 1D array_like, int
index of when the change ended (if `ending` is True).
amp : 1D array_like, float
amplitude of changes (if `ending` is True).
Notes
-----
Tuning of the CUSUM algorithm according to Gustafsson (2000)[1]_:
Start with a very large `threshold`.
Choose `drift` to one half of the expected change, or adjust `drift` such
that `g` = 0 more than 50% of the time.
Then set the `threshold` so the required number of false alarms (this can
be done automatically) or delay for detection is obtained.
If faster detection is sought, try to decrease `drift`.
If fewer false alarms are wanted, try to increase `drift`.
If there is a subset of the change times that does not make sense,
try to increase `drift`.
Note that by default repeated sequential changes, i.e., changes that have
the same beginning (`tai`) are not deleted because the changes were
detected by the alarm (`ta`) at different instants. This is how the
classical CUSUM algorithm operates.
If you want to delete the repeated sequential changes and keep only the
beginning of the first sequential change, set the parameter `ending` to
True. In this case, the index of the ending of the change (`taf`) and the
amplitude of the change (or of the total amplitude for a repeated
sequential change) are calculated and only the first change of the repeated
sequential changes is kept. In this case, it is likely that `ta`, `tai`,
and `taf` will have less values than when `ending` was set to False.
See this IPython Notebook [2]_.
References
----------
.. [1] Gustafsson (2000) Adaptive Filtering and Change Detection.
.. [2] hhttp://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectCUSUM.ipynb
Examples
--------
>>> from detect_cusum import detect_cusum
>>> x = np.random.randn(300)/5
>>> x[100:200] += np.arange(0, 4, 4/100)
>>> ta, tai, taf, amp = detect_cusum(x, 2, .02, True, True)
>>> x = np.random.randn(300)
>>> x[100:200] += 6
>>> detect_cusum(x, 4, 1.5, True, True)
>>> x = 2*np.sin(2*np.pi*np.arange(0, 3, .01))
>>> ta, tai, taf, amp = detect_cusum(x, 1, .05, True, True)
"""
x = np.atleast_1d(x).astype('float64')
gp, gn = np.zeros(x.size), np.zeros(x.size)
ta, tai, taf = np.array([[], [], []], dtype=int)
tap, tan = 0, 0
amp = np.array([])
# Find changes (online form)
for i in range(1, x.size):
s = x[i] - x[i-1]
gp[i] = gp[i-1] + s - drift # cumulative sum for + change
gn[i] = gn[i-1] - s - drift # cumulative sum for - change
if gp[i] < 0:
gp[i], tap = 0, i
if gn[i] < 0:
gn[i], tan = 0, i
if gp[i] > threshold or gn[i] > threshold: # change detected!
ta = np.append(ta, i) # alarm index
tai = np.append(tai, tap if gp[i] > threshold else tan) # start
gp[i], gn[i] = 0, 0 # reset alarm
# THE CLASSICAL CUSUM ALGORITHM ENDS HERE
# Estimation of when the change ends (offline form)
if tai.size and ending:
_, tai2, _, _ = detect_cusum(x[::-1], threshold, drift, show=False)
taf = x.size - tai2[::-1] - 1
# Eliminate repeated changes, changes that have the same beginning
tai, ind = np.unique(tai, return_index=True)
ta = ta[ind]
# taf = np.unique(taf, return_index=False) # corect later
if tai.size != taf.size:
if tai.size < taf.size:
taf = taf[[np.argmax(taf >= i) for i in ta]]
else:
ind = [np.argmax(i >= ta[::-1])-1 for i in taf]
ta = ta[ind]
tai = tai[ind]
# Delete intercalated changes (the ending of the change is after
# the beginning of the next change)
ind = taf[:-1] - tai[1:] > 0
if ind.any():
ta = ta[~np.append(False, ind)]
tai = tai[~np.append(False, ind)]
taf = taf[~np.append(ind, False)]
# Amplitude of changes
amp = x[taf] - x[tai]
if show:
_plot(x, threshold, drift, ending, ax, ta, tai, taf, gp, gn)
return ta, tai, taf, amp
def _plot(x, threshold, drift, ending, ax, ta, tai, taf, gp, gn):
"""Plot results of the detect_cusum function, see its help."""
try:
import matplotlib.pyplot as plt
except ImportError:
print('matplotlib is not available.')
else:
if ax is None:
_, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
t = range(x.size)
ax1.plot(t, x, 'b-', lw=2)
if len(ta):
ax1.plot(tai, x[tai], '>', mfc='g', mec='g', ms=10,
label='Start')
if ending:
ax1.plot(taf, x[taf], '<', mfc='g', mec='g', ms=10,
label='Ending')
ax1.plot(ta, x[ta], 'o', mfc='r', mec='r', mew=1, ms=5,
label='Alarm')
ax1.legend(loc='best', framealpha=.5, numpoints=1)
ax1.set_xlim(-.01*x.size, x.size*1.01-1)
ax1.set_xlabel('Data #', fontsize=14)
ax1.set_ylabel('Amplitude', fontsize=14)
ymin, ymax = x[np.isfinite(x)].min(), x[np.isfinite(x)].max()
yrange = ymax - ymin if ymax > ymin else 1
ax1.set_ylim(ymin - 0.1*yrange, ymax + 0.1*yrange)
ax1.set_title('Time series and detected changes ' +
'(threshold= %.3g, drift= %.3g): N changes = %d'
% (threshold, drift, len(tai)))
ax2.plot(t, gp, 'y-', label='+')
ax2.plot(t, gn, 'm-', label='-')
ax2.set_xlim(-.01*x.size, x.size*1.01-1)
ax2.set_xlabel('Data #', fontsize=14)
ax2.set_ylim(-0.01*threshold, 1.1*threshold)
ax2.axhline(threshold, color='r')
ax1.set_ylabel('Amplitude', fontsize=14)
ax2.set_title('Time series of the cumulative sums of ' +
'positive and negative changes')
ax2.legend(loc='best', framealpha=.5, numpoints=1)
plt.tight_layout()
plt.show()

BIN
src/DoresA/detect_cusum.pyc Normal file

Binary file not shown.

5
src/DoresA/dns.py Normal file
View File

@@ -0,0 +1,5 @@
import socket
def reverse(ip):
return socket.gethostbyaddr(ip)

92
src/DoresA/domain.py Normal file
View File

@@ -0,0 +1,92 @@
import enchant
import numpy as np
# check if dictionary is installed: $aspell dicts (or enchant.list_languages() in python)
# if not, check http://pythonhosted.org/pyenchant/tutorial.html
dictionary = enchant.Dict('en_US')
def check_if_english_word(string):
return dictionary.check(string)
# TODO strip of protocol and TLD (if needed)
def find_longest_meaningful_substring(string):
match = ''
min_length = 4
max_length = 10
for i in range(len(string)):
for j in range(i+1, len(string)):
if min_length <= (j + 1 - i) <= max_length:
substring = string[i:j+1]
if dictionary.check(substring):
if len(match) < len(substring):
match = substring
return match
# TODO strip of protocol and TLD (if needed)
def ratio_lms_to_fqdn(string):
lms = find_longest_meaningful_substring(string)
return len(lms) / len(string)
def test():
print(ratio_lms_to_fqdn('www.google.de'))
exit()
# TODO evaluate this (what about special chars)
def ratio_numerical_to_alpha(string):
numerical = 0
alpha = 0
for char in string:
if char.isalpha():
alpha += 1
else:
numerical += 1
return numerical / alpha
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python
def levenshtein(source, target):
if len(source) < len(target):
return levenshtein(target, source)
# So now we have len(source) >= len(target).
if len(target) == 0:
return len(source)
# We call tuple() to force strings to be used as sequences
# ('c', 'a', 't', 's') - numpy uses them as values by default.
source = np.array(tuple(source))
target = np.array(tuple(target))
# We use a dynamic programming algorithm, but with the
# added optimization that we only need the last two rows
# of the matrix.
previous_row = np.arange(target.size + 1)
for s in source:
# Insertion (target grows longer than source):
current_row = previous_row + 1
# Substitution or matching:
# Target and source items are aligned, and either
# are different (cost of 1), or are the same (cost of 0).
current_row[1:] = np.minimum(
current_row[1:],
np.add(previous_row[:-1], target != s))
# Deletion (target grows shorter than source):
current_row[1:] = np.minimum(
current_row[1:],
current_row[0:-1] + 1)
previous_row = current_row
return previous_row[-1]
if __name__ == "__main__":
test()

14
src/DoresA/location.py Normal file
View File

@@ -0,0 +1,14 @@
from geolite2 import geolite2
def get_country_by_ip(ip):
with geolite2 as gl2:
reader = gl2.reader()
result = reader.get(ip)
if result:
return result['country']['names']['en']
if __name__ == "__main__":
exit()

BIN
src/DoresA/location.pyc Normal file

Binary file not shown.

View File

@@ -0,0 +1,6 @@
(DoresA) 18:51:55 felix@x230 ~/sources/MastersThesis/src/DoresA (master 0 0 0 0 5 9|) % python analyse.py
2017-04-07 |################################| 24/24
2017-04-08 |################################| 24/24
2017-04-09 |################################| 24/24
total duration: 289.71997332572937s
python analyse.py 225.60s user 2.53s system 78% cpu 4:50.28 total

View File

@@ -0,0 +1,386 @@
(DoresA) 18:57:32 felix@x230 ~/sources/MastersThesis/src/DoresA % python analyse.py
2017-04-07 |## | 2/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 316")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 317")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 318")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 319")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 320")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 321")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 322")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 323")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 324")
cursor.execute(inserts_sql, values)
2017-04-07 |#### | 3/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip6:fd1b:212c:a5f9::/48' for column 'ttl' at row 653")
cursor.execute(inserts_sql, values)
2017-04-07 |##### | 4/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 814")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 815")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 816")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 817")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 818")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 819")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 820")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 821")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 822")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 431")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 666")
cursor.execute(inserts_sql, values)
2017-04-07 |###### | 5/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 348")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 857")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 858")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 859")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 860")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 861")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 862")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 863")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 864")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 865")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 947")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 948")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 949")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 950")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 951")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 952")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 953")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 954")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 955")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 133")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 288")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 10")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip6:fd1b:212c:a5f9::/48' for column 'ttl' at row 236")
cursor.execute(inserts_sql, values)
2017-04-07 |######## | 6/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 186")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 688")
cursor.execute(inserts_sql, values)
2017-04-07 |######### | 7/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070245' for column 'ttl' at row 114")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070243' for column 'ttl' at row 115")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060237' for column 'ttl' at row 116")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704042147' for column 'ttl' at row 117")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060253' for column 'ttl' at row 118")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060726' for column 'ttl' at row 119")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060251' for column 'ttl' at row 120")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070317' for column 'ttl' at row 121")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 139")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 322")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 695")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 150")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip6:fd1b:212c:a5f9::/48' for column 'ttl' at row 161")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'a' for column 'ttl' at row 314")
cursor.execute(inserts_sql, values)
2017-04-07 |############ | 9/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip6:fd1b:212c:a5f9::/48' for column 'ttl' at row 797")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 73")
cursor.execute(inserts_sql, values)
2017-04-07 |############# | 10/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070801' for column 'ttl' at row 87")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070837' for column 'ttl' at row 88")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070718' for column 'ttl' at row 89")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704042147' for column 'ttl' at row 90")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060726' for column 'ttl' at row 91")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070810' for column 'ttl' at row 92")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070800' for column 'ttl' at row 93")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070811' for column 'ttl' at row 94")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 310")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 849")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 850")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 851")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 852")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 853")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 854")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 855")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 856")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 857")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 514")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'u' for column 'ttl' at row 477")
cursor.execute(inserts_sql, values)
2017-04-07 |############## | 11/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 936")
cursor.execute(inserts_sql, values)
2017-04-07 |################ | 12/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 444")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 747")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 748")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 749")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 750")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 751")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 752")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 753")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 754")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 755")
cursor.execute(inserts_sql, values)
2017-04-07 |################# | 13/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 883")
cursor.execute(inserts_sql, values)
2017-04-07 |################## | 14/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 774")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 438")
cursor.execute(inserts_sql, values)
2017-04-07 |#################### | 15/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip6:fd1b:212c:a5f9::/48' for column 'ttl' at row 506")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 543")
cursor.execute(inserts_sql, values)
2017-04-07 |########################## | 20/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704071914' for column 'ttl' at row 408")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070801' for column 'ttl' at row 409")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704071917' for column 'ttl' at row 410")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704071648' for column 'ttl' at row 411")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704060726' for column 'ttl' at row 412")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070811' for column 'ttl' at row 413")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704070810' for column 'ttl' at row 414")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'TIME=01704042147' for column 'ttl' at row 415")
cursor.execute(inserts_sql, values)
2017-04-07 |################################| 24/24
2017-04-08 |##################### | 16/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_netblocks.google.com' for column 'ttl' at row 159")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip4:64.18.0.0/20' for column 'ttl' at row 179")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'ip4:172.217.0.0/19' for column 'ttl' at row 255")
cursor.execute(inserts_sql, values)
2017-04-08 |################################| 24/24
2017-04-09 |######### | 7/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 654")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 655")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 656")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 657")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 658")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 659")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 660")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 661")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 662")
cursor.execute(inserts_sql, values)
2017-04-09 |############ | 9/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 770")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 785")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 786")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 787")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 788")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 789")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 790")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 791")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 792")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 793")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 851")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 852")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 853")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 854")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 855")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 856")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 858")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 859")
cursor.execute(inserts_sql, values)
2017-04-09 |################# | 13/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 633")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 965")
cursor.execute(inserts_sql, values)
2017-04-09 |#################### | 15/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 487")
cursor.execute(inserts_sql, values)
2017-04-09 |##################### | 16/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 576")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 440")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 441")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 442")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 443")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 444")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 445")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 446")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 447")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 448")
cursor.execute(inserts_sql, values)
2017-04-09 |###################### | 17/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 745")
cursor.execute(inserts_sql, values)
2017-04-09 |############################ | 21/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:1337' for column 'ttl' at row 236")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 720")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 721")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 722")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 723")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 724")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 725")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 726")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 727")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 728")
cursor.execute(inserts_sql, values)
2017-04-09 |############################# | 22/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 909")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 910")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 911")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 912")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 913")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 914")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 915")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 916")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 917")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 10")
cursor.execute(inserts_sql, values)
2017-04-09 |################################| 24/24analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:80' for column 'ttl' at row 857")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 974")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 975")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 976")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 977")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 978")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 979")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 980")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 981")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 982")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'include:_incspfcheck.mailspike.net' for column 'ttl' at row 161")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2800' for column 'ttl' at row 459")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2770' for column 'ttl' at row 460")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2720' for column 'ttl' at row 461")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2710' for column 'ttl' at row 462")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2730' for column 'ttl' at row 463")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2790' for column 'ttl' at row 464")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2780' for column 'ttl' at row 465")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2750' for column 'ttl' at row 466")
cursor.execute(inserts_sql, values)
analyse.py:122: Warning: (1366, "Incorrect integer value: 'UDP:2740' for column 'ttl' at row 467")
cursor.execute(inserts_sql, values)
total duration: 460.3384804725647s
python analyse.py 274.65s user 1.37s system 59% cpu 7:41.26 total

View File

@@ -0,0 +1,6 @@
(DoresA) 17:58:58 felix@x230 ~/sources/MastersThesis/src/DoresA (master 0 0 0 0 5 9|) % python analyse.py
2017-04-07 |################################| 24/24
2017-04-08 |################################| 24/24
2017-04-09 |################################| 24/24
total duration: 3155.8810420036316s
python analyse.py 2161.28s user 186.72s system 74% cpu 52:36.84 total

View File

@@ -0,0 +1 @@
{"last_check":"2017-09-27T12:13:16Z","pypi_version":"9.0.1"}

View File

@@ -0,0 +1,12 @@
maxminddb==1.3.0
maxminddb-geolite2==2017.803
mysqlclient==1.3.12
numpy==1.13.1
pandas==0.20.3
progress==1.3
pyenchant==1.6.11
pymongo==3.5.1
python-dateutil==2.6.1
python-geoip==1.2
pytz==2017.2
six==1.10.0

Binary file not shown.

View File

@@ -0,0 +1 @@
Database and Contents Copyright (c) 2017 MaxMind, Inc.

View File

@@ -0,0 +1,3 @@
This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/.
This database incorporates GeoNames [http://www.geonames.org] geographical data, which is made available under the Creative Commons Attribution 3.0 License. To view a copy of this license, visit http://www.creativecommons.org/licenses/by/3.0/us/.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

28316
src/DoresA/res/raw/domains.txt Executable file

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

1000000
src/DoresA/res/raw/top-1m.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
############################################################################
# abuse.ch ZeuS domain blocklist "BadDomains" (excluding hijacked sites) #
# #
# For questions please refer to https://zeustracker.abuse.ch/blocklist.php #
############################################################################
afobal.cl
alvoportas.com.br
az-armaturen.su
bestdove.in.ua
blogerjijer.pw
bright.su
citricbenz.website
danislenefc.info
dau43vt5wtrd.tk
dzitech.net
ebesee.com
gmailsecurityteam.com
goodbytoname.com
gzhueyuatex.com
hruner.com
hui-ain-apparel.tk
ice.ip64.net
istyle.ge
ivansaru.418.com1.ru
jangasm.org
jump1ng.net
kesikelyaf.com
kntksales.tk
lion.web2.0campus.net
liuz112.ddns.net
luenhinpearl.com
machine.cu.ma
maminoleinc.tk
metalexvietnamreed.tk
nasscomminc.tk
ns511849.ip-192-99-19.net
ns513726.ip-192-99-148.net
nsdic.pp.ru
p-alpha.ooo.al
pandyi.com
panel.vargakragard.se
platinum-casino.ru
projects.globaltronics.net
sanyai-love.rmu.ac.th
server.bovine-mena.com
servmill.com
ssl.sinergycosmetics.com
sus.nieuwmoer.info
telefonfiyatlari.org
update.odeen.eu
update.rifugiopontese.it
updateacces.org
vodahelp.sytes.net
www.antibasic.ga
www.nikey.cn
www.poloatmer.ru
www.riverwalktrader.co.za
www.slapintins.publicvm.com
www.witkey.com
zabava-bel.ru

View File

@@ -0,0 +1,16 @@
#!/bin/bash
# how much to take
COUNT=1000;
cd res;
rm alexa.zip;
rm top-1m.csv;
cd raw;
curl -o alexa.zip http://s3.amazonaws.com/alexa-static/top-1m.csv.zip;
unzip alexa.zip;
head -n $COUNT top-1m.csv | cut -f2 -d"," >> res/benign_domains.txt;

View File

@@ -0,0 +1,30 @@
#!/bin/bash
# cleanup
cd res;
echo "" > malicious_domains.txt;
rm malwaredomains.zip;
rm domains.txt;
rm phishtank.csv;
rm zeus.txt;
cd raw;
# malwaredomains.com
curl -o malwarecomains.zip http://malware-domains.com/files/domains.zip;
unzip malwaredomains.zip;
tail -n +5 domains.txt | cut -f3 >> ../malicious_domains.txt;
# Phishtank
curl -o phishtank.csv http://data.phishtank.com/data/online-valid.csv
tail -n +1 phishtank.csv | cut -f2 -d"," >> ../malicious_domains.txt
# ZeuS Tracker
curl -o zeus.txt https://zeustracker.abuse.ch/blocklist.php?download=baddomains;
tail -n +7 zeus.txt >> ../malicious_domains.txt;
# remove empty lines
sed -i.bak '/^$/d' ../malicious_domains.txt

View File

@@ -0,0 +1,123 @@
import csv
import gzip
import glob
import time
import datetime
import pandas
from progress.bar import Bar
import db
analysis_start_date = datetime.date(2017, 4, 7)
analysis_days_amount = 3
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in range(analysis_days_amount)]
# mongodb
# mariadb
def main():
check_duplicates()
start = time.time()
distinct_ttl_count = {}
# everything = {}
# for log_file in ['data/pdns_capture.pcap-sgsgpdc0n9x-2017-04-07_00-00-02.csv.gz']:
for day in range(analysis_days_amount):
log_files_hour = get_log_files_for_hours_of_day(analysis_days[day])
# everything[day] = {}
progress_bar = Bar(analysis_days[day], max=24)
for hour in range(24):
progress_bar.next()
# everything[day][hour] = {}
for hour_files in log_files_hour[hour]:
with gzip.open(hour_files, 'rt', newline='') as file:
reader = csv.reader(file)
all_rows = list(reader)
# batch mode (batches of 1000 entries)
for log_entries in batch(all_rows, 1000):
db.mariadb_insert_logs(log_entries)
db.mongodb_insert_logs(log_entries)
# single mode
# for log_entry in reader:
# db.mariadb_insert_log(log_entry)
# # db.mongodb_insert_log(log_entry)
progress_bar.finish()
# log_entry[4] == TTL
# if log_entry[4] in distinct_ttl_count:
# distinct_ttl_count[log_entry[4]] += 1
# else:
# distinct_ttl_count[log_entry[4]] = 1
#
# everything[day][hour]['ttl'] = distinct_ttl_count
# a bit faster
# df = pandas.read_csv(log_file, compression='gzip', header=None)
# print(df.iloc[0])
# print('distinct TTLs: ' + str(len(everything[0][0]['ttl'].keys())))
print('total duration: ' + str(time.time() - start) + 's')
db.close()
def batch(iterable, n=1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx:min(ndx + n, l)]
def check_duplicates():
days_cumulated = 0
for day in analysis_days:
days_cumulated += len(get_log_files_for_day(day))
all_logs = len(get_log_files_for_day(''))
if days_cumulated != all_logs:
raise Exception('Log files inconsistency')
# TODO
def get_log_files_for_range_of_day(date, minutes_range):
slot_files = {}
slots_amount = int(1440 / minutes_range)
for slot in range(slots_amount):
total_mins = slot * minutes_range
hours, minutes = divmod(total_mins, 60)
time_range = '%02d-%02d' % (hours, minutes)
slot_files[slot] = 'data/*' + date + '_' + time_range + '*.csv.gz'
def get_log_files_for_hours_of_day(date):
slot_files = {}
slots_amount = 24
for slot in range(slots_amount):
slot_files[slot] = glob.glob('data/*' + date + '_' + ('%02d' % slot) + '*.csv.gz')
return slot_files
def get_log_files_for_day(date):
log_files = 'data/*' + date + '*.csv.gz'
return glob.glob(log_files)
if __name__ == "__main__":
main()

42
src/DoresA/time.py Normal file
View File

@@ -0,0 +1,42 @@
from detect_cusum import detect_cusum
import numpy as np
def cusum():
test_set = np.array((1, 2, 1, 1, 2, 2, 2, 3, 4, 2, 5, 10))
res = detect_cusum(test_set, 3, show=False)
change = res[0]
print('change from ' + str(test_set[change[0]]) + ' index(' + str(change[0]) + ') to ' +
str(test_set[change[1]]) + ' index(' + str(change[1]) + ')')
def test_eucl_dist(a, b):
return np.linalg.norm(a - b)
def variance(a):
return np.var(a)
def test_decision_tree():
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()
clf = tree.DecisionTreeClassifier()
clf = clf.fit(iris.data, iris.target)
import graphviz
dot_data = tree.export_graphviz(clf, out_file=None)
graph = graphviz.Source(dot_data)
graph.render('iris', view=True)
def test():
# a = np.array((1, 2, 3))
# b = np.array((0, 1, 2))
# print(variance(a))
cusum()
if __name__ == "__main__":
test()

9
src/DoresA/ttl.py Normal file
View File

@@ -0,0 +1,9 @@
import numpy as np
def standard_deviation(array):
return np.std(array)
if __name__ == "__main__":
exit()