diff --git a/literature/Technical Trends in Phishing Attacks.pdf b/literature/Technical Trends in Phishing Attacks.pdf new file mode 100644 index 0000000..96790c1 Binary files /dev/null and b/literature/Technical Trends in Phishing Attacks.pdf differ diff --git a/src/DoresA/Dockerfile b/src/DoresA/Dockerfile new file mode 100644 index 0000000..8adcf82 --- /dev/null +++ b/src/DoresA/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3 +ENV PYTHONUNBUFFERED 1 +RUN mkdir /app +WORKDIR /app +ADD requirements.txt /app/ +RUN pip install -r requirements.txt +ADD . /app/ \ No newline at end of file diff --git a/src/DoresA/db.py b/src/DoresA/db.py index fd4dfc9..bb8349b 100644 --- a/src/DoresA/db.py +++ b/src/DoresA/db.py @@ -1,19 +1,46 @@ import MySQLdb as mariadb import time +import os from pymongo import MongoClient -mongo_client = MongoClient('localhost', 27017) -mongo_db = mongo_client.doresa -pdns_logs_mongo = mongo_db.pdns_logs +mongodb_host = 'localhost' +mongodb_db_name = 'doresa' +# mongodb_collection_name = 'pdns_logs' +mongodb_collection_name = 'may' # tmp TODO remove + +sql_host = 'localhost' +sql_db_name = 'doresa' +sql_user_name = 'doresa' +sql_pw = '3qfACEZzbXY4b' +# sql_table_name = 'pdns_logs' +sql_table_name = 'pdns_logs_test' -sql_connection = mariadb.connect(user='doresa', passwd='3qfACEZzbXY4b', db='doresa') +if 'MYSQL_HOST' in os.environ: + sql_host = os.environ['MYSQL_HOST'] + +if 'MYSQL_DATABASE' in os.environ: + sql_db_name = os.environ['MYSQL_DATABASE'] + +if 'MYSQL_USER' in os.environ: + sql_user_name = os.environ['MYSQL_USER'] + +if 'MYSQL_PASSWORD' in os.environ: + sql_pw = os.environ['MYSQL_PASSWORD'] + + +mongo_client = MongoClient(mongodb_host, 27017) +mongo_db = mongo_client[mongodb_db_name] +pdns_logs_mongo = mongo_db[mongodb_collection_name] + + +sql_connection = mariadb.connect(host=sql_host, user=sql_user_name, passwd=sql_pw, db=sql_db_name) sql_cursor = sql_connection.cursor() def mariadb_insert_log(csv_entry): - insert_sql = 'INSERT INTO pdns_logs (timestamp, domain, type, record, ttl) VALUES (%s, %s, %s, %s, %s)' + insert_sql = 'INSERT INTO ' + sql_table_name + ' (timestamp, domain, type, record, ttl) VALUES (%s, %s, %s, %s, %s)' values = (convert_timestamp_to_sql_datetime(float(csv_entry[0])), csv_entry[1], csv_entry[2], csv_entry[3], csv_entry[4]) @@ -22,7 +49,7 @@ def mariadb_insert_log(csv_entry): def mariadb_insert_logs(csv_entries): - inserts_sql = 'INSERT INTO pdns_logs (timestamp, domain, type, record, ttl) VALUES ' + inserts_sql = 'INSERT INTO ' + sql_table_name + ' (timestamp, domain, type, record, ttl) VALUES ' for i in range(len(csv_entries) - 1): inserts_sql += '(%s, %s, %s, %s, %s), ' @@ -39,14 +66,13 @@ def mariadb_insert_logs(csv_entries): def mariadb_get_logs(from_time, to_time): - get_logs_from_to = 'SELECT * FROM pdns_logs WHERE timestamp BETWEEN \'{}\' and \'{}\';'.format(from_time, to_time) + get_logs_from_to = 'SELECT * FROM ' + sql_table_name + ' WHERE timestamp BETWEEN \'{}\' and \'{}\';'.format(from_time, to_time) sql_connection.query(get_logs_from_to) return sql_connection.use_result() def mariadb_create_table(): - create_table = """ - CREATE TABLE pdns_logs ( + create_table = 'CREATE TABLE IF NOT EXISTS ' + sql_table_name + """ ( id INTEGER AUTO_INCREMENT PRIMARY KEY, timestamp DATETIME, domain VARCHAR(255), @@ -84,5 +110,7 @@ def close(): mongo_client.close() +mariadb_create_table() + if __name__ == "__main__": exit() diff --git a/src/DoresA/docker-compose.yml b/src/DoresA/docker-compose.yml new file mode 100644 index 0000000..db6f28c --- /dev/null +++ b/src/DoresA/docker-compose.yml @@ -0,0 +1,31 @@ +version: '3' + +services: + mariadb: + container_name: mariadb + image: mariadb + restart: unless-stopped + ports: + - "3306:3306" + environment: + MYSQL_ROOT_PASSWORD: jDGb3CvbsmBSB + MYSQL_DATABASE: doresa + MYSQL_USER: doresa + MYSQL_PASSWORD: 3qfACEZzbXY4b + + app: + build: . + container_name: app + restart: unless-stopped + command: python3 serialize_logs_to_db.py + volumes: + - .:/app + - ./data:/data + environment: + MYSQL_HOST: mariadb + MYSQL_DATABASE: doresa + MYSQL_USER: doresa + MYSQL_PASSWORD: 3qfACEZzbXY4b + DATA_PATH: + depends_on: + - mariadb diff --git a/src/DoresA/iterate_db.py b/src/DoresA/iterate_db.py new file mode 100644 index 0000000..364eda9 --- /dev/null +++ b/src/DoresA/iterate_db.py @@ -0,0 +1,16 @@ +import db +import datetime + + +def test(): + f = '%Y-%m-%d %H:%M:%S' + results = db.mariadb_get_logs(datetime.date(2017, 4, 7).strftime(f), datetime.date(2017, 4, 8).strftime(f)) + + row = results.fetch_row(how=1) + while row: + print(row[0]['domain']) + row = results.fetch_row(how=1) + + +if __name__ == "__main__": + test() diff --git a/src/DoresA/serialize_logs_to_db.py b/src/DoresA/serialize_logs_to_db.py index 26cb22f..d23229a 100644 --- a/src/DoresA/serialize_logs_to_db.py +++ b/src/DoresA/serialize_logs_to_db.py @@ -4,25 +4,23 @@ import glob import time import datetime import pandas +import os from progress.bar import Bar import db -analysis_start_date = datetime.date(2017, 4, 7) -analysis_days_amount = 3 +analysis_start_date = datetime.date(2017, 5, 1) +analysis_days_amount = 31 +# pdns_logs_path = 'data/' +pdns_logs_path = '/run/media/felix/ext/2017.05/' # tmp TODO remove # e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09'] analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in range(analysis_days_amount)] -# mongodb - -# mariadb - - def main(): - check_duplicates() + # check_duplicates() TODO readd start = time.time() distinct_ttl_count = {} @@ -51,7 +49,7 @@ def main(): # batch mode (batches of 1000 entries) for log_entries in batch(all_rows, 1000): db.mariadb_insert_logs(log_entries) - # db.mongodb_insert_logs(log_entries) + db.mongodb_insert_logs(log_entries) # single mode # for log_entry in reader: @@ -100,7 +98,7 @@ def get_log_files_for_hours_of_day(date): slots_amount = 24 for slot in range(slots_amount): - slot_files[slot] = glob.glob('data/*' + date + '_' + ('%02d' % slot) + '*.csv.gz') + slot_files[slot] = glob.glob(pdns_logs_path + '*' + date + '_' + ('%02d' % slot) + '*.csv.gz') return slot_files