Merge branch 'master' of github.com:felixsteghofer/masters_thesis
This commit is contained in:
@@ -1,7 +1,11 @@
|
|||||||
FROM python:3
|
FROM python:3.6-alpine
|
||||||
ENV PYTHONUNBUFFERED 1
|
|
||||||
|
# numpy scipy spellchecking etc
|
||||||
|
RUN apk add --no-cache mariadb-dev gcc gfortran musl-dev freetype-dev enchant
|
||||||
|
|
||||||
|
#ENV PYTHONUNBUFFERED 1
|
||||||
RUN mkdir /app
|
RUN mkdir /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
ADD requirements.txt /app/
|
ADD requirements.txt /app/
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
ADD . /app/
|
VOLUME /app
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ mongodb_db_name = 'doresa'
|
|||||||
mongodb_collection_name = 'may' # tmp TODO remove
|
mongodb_collection_name = 'may' # tmp TODO remove
|
||||||
|
|
||||||
sql_host = 'localhost'
|
sql_host = 'localhost'
|
||||||
|
sql_port = 3306
|
||||||
sql_db_name = 'doresa'
|
sql_db_name = 'doresa'
|
||||||
sql_user_name = 'doresa'
|
sql_user_name = 'doresa'
|
||||||
sql_pw = '3qfACEZzbXY4b'
|
sql_pw = '3qfACEZzbXY4b'
|
||||||
@@ -20,6 +21,9 @@ sql_table_name = 'pdns_logs_test'
|
|||||||
if 'MYSQL_HOST' in os.environ:
|
if 'MYSQL_HOST' in os.environ:
|
||||||
sql_host = os.environ['MYSQL_HOST']
|
sql_host = os.environ['MYSQL_HOST']
|
||||||
|
|
||||||
|
if 'MYSQL_PORT' in os.environ:
|
||||||
|
sql_port = int(os.environ['MYSQL_PORT'])
|
||||||
|
|
||||||
if 'MYSQL_DATABASE' in os.environ:
|
if 'MYSQL_DATABASE' in os.environ:
|
||||||
sql_db_name = os.environ['MYSQL_DATABASE']
|
sql_db_name = os.environ['MYSQL_DATABASE']
|
||||||
|
|
||||||
@@ -35,7 +39,7 @@ mongo_db = mongo_client[mongodb_db_name]
|
|||||||
pdns_logs_mongo = mongo_db[mongodb_collection_name]
|
pdns_logs_mongo = mongo_db[mongodb_collection_name]
|
||||||
|
|
||||||
|
|
||||||
sql_connection = mariadb.connect(host=sql_host, user=sql_user_name, passwd=sql_pw, db=sql_db_name)
|
sql_connection = mariadb.connect(host=sql_host, user=sql_user_name, passwd=sql_pw, db=sql_db_name, port=sql_port)
|
||||||
sql_cursor = sql_connection.cursor()
|
sql_cursor = sql_connection.cursor()
|
||||||
|
|
||||||
|
|
||||||
@@ -61,7 +65,11 @@ def mariadb_insert_logs(csv_entries):
|
|||||||
values += [convert_timestamp_to_sql_datetime(float(csv_entry[0])), csv_entry[1],
|
values += [convert_timestamp_to_sql_datetime(float(csv_entry[0])), csv_entry[1],
|
||||||
csv_entry[2], csv_entry[3], csv_entry[4]]
|
csv_entry[2], csv_entry[3], csv_entry[4]]
|
||||||
|
|
||||||
sql_cursor.execute(inserts_sql, values)
|
try:
|
||||||
|
sql_cursor.execute(inserts_sql, values)
|
||||||
|
except mariadb.OperationalError:
|
||||||
|
print('something happened') # TODO proper error handling
|
||||||
|
|
||||||
sql_connection.commit()
|
sql_connection.commit()
|
||||||
|
|
||||||
|
|
||||||
@@ -110,7 +118,7 @@ def close():
|
|||||||
mongo_client.close()
|
mongo_client.close()
|
||||||
|
|
||||||
|
|
||||||
mariadb_create_table()
|
#mariadb_create_table()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
exit()
|
exit()
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
version: '3'
|
version: '3'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
mariadb:
|
doresa_mariadb:
|
||||||
container_name: mariadb
|
container_name: doresa_mariadb
|
||||||
image: mariadb
|
image: mariadb
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
ports:
|
||||||
- "3306:3306"
|
- "3306:3306"
|
||||||
environment:
|
environment:
|
||||||
@@ -13,19 +12,19 @@ services:
|
|||||||
MYSQL_USER: doresa
|
MYSQL_USER: doresa
|
||||||
MYSQL_PASSWORD: 3qfACEZzbXY4b
|
MYSQL_PASSWORD: 3qfACEZzbXY4b
|
||||||
|
|
||||||
app:
|
doresa_app:
|
||||||
build: .
|
build: .
|
||||||
container_name: app
|
container_name: doresa_app
|
||||||
restart: unless-stopped
|
|
||||||
command: python3 serialize_logs_to_db.py
|
command: python3 serialize_logs_to_db.py
|
||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- .:/app
|
||||||
- ./data:/data
|
- /media/backup/felix/uni/Mastersthesis/pDNS/2017.05:/data # path to zipped pDNS logs
|
||||||
environment:
|
environment:
|
||||||
MYSQL_HOST: mariadb
|
MYSQL_HOST: doresa_mariadb
|
||||||
|
MYSQL_PORT: 3306
|
||||||
MYSQL_DATABASE: doresa
|
MYSQL_DATABASE: doresa
|
||||||
MYSQL_USER: doresa
|
MYSQL_USER: doresa
|
||||||
MYSQL_PASSWORD: 3qfACEZzbXY4b
|
MYSQL_PASSWORD: 3qfACEZzbXY4b
|
||||||
DATA_PATH:
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- mariadb
|
- doresa_mariadb
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ maxminddb==1.3.0
|
|||||||
maxminddb-geolite2==2017.803
|
maxminddb-geolite2==2017.803
|
||||||
mysqlclient==1.3.12
|
mysqlclient==1.3.12
|
||||||
numpy==1.13.1
|
numpy==1.13.1
|
||||||
pandas==0.20.3
|
|
||||||
progress==1.3
|
progress==1.3
|
||||||
pyenchant==1.6.11
|
pyenchant==1.6.11
|
||||||
pymongo==3.5.1
|
pymongo==3.5.1
|
||||||
|
|||||||
@@ -3,16 +3,16 @@ import gzip
|
|||||||
import glob
|
import glob
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import pandas
|
|
||||||
import os
|
import os
|
||||||
from progress.bar import Bar
|
from progress.bar import Bar
|
||||||
|
|
||||||
import db
|
import db
|
||||||
|
|
||||||
|
# TODO environment this
|
||||||
analysis_start_date = datetime.date(2017, 5, 1)
|
analysis_start_date = datetime.date(2017, 5, 1)
|
||||||
analysis_days_amount = 31
|
analysis_days_amount = 31
|
||||||
# pdns_logs_path = 'data/'
|
# pdns_logs_path = 'data/'
|
||||||
pdns_logs_path = '/run/media/felix/ext/2017.05/' # tmp TODO remove
|
pdns_logs_path = '/data/'
|
||||||
|
|
||||||
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
|
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
|
||||||
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in
|
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in
|
||||||
@@ -27,7 +27,7 @@ def main():
|
|||||||
# everything = {}
|
# everything = {}
|
||||||
|
|
||||||
# for log_file in ['data/pdns_capture.pcap-sgsgpdc0n9x-2017-04-07_00-00-02.csv.gz']:
|
# for log_file in ['data/pdns_capture.pcap-sgsgpdc0n9x-2017-04-07_00-00-02.csv.gz']:
|
||||||
|
|
||||||
for day in range(analysis_days_amount):
|
for day in range(analysis_days_amount):
|
||||||
log_files_hour = get_log_files_for_hours_of_day(analysis_days[day])
|
log_files_hour = get_log_files_for_hours_of_day(analysis_days[day])
|
||||||
# everything[day] = {}
|
# everything[day] = {}
|
||||||
@@ -38,8 +38,7 @@ def main():
|
|||||||
progress_bar.next()
|
progress_bar.next()
|
||||||
# everything[day][hour] = {}
|
# everything[day][hour] = {}
|
||||||
for hour_files in log_files_hour[hour]:
|
for hour_files in log_files_hour[hour]:
|
||||||
|
# a bit faster, 10-15% (but pandas overhead)
|
||||||
# a bit faster
|
|
||||||
# df = pandas.read_csv(log_file, compression='gzip', header=None)
|
# df = pandas.read_csv(log_file, compression='gzip', header=None)
|
||||||
# print(df.iloc[0])
|
# print(df.iloc[0])
|
||||||
with gzip.open(hour_files, 'rt', newline='') as file:
|
with gzip.open(hour_files, 'rt', newline='') as file:
|
||||||
@@ -49,7 +48,7 @@ def main():
|
|||||||
# batch mode (batches of 1000 entries)
|
# batch mode (batches of 1000 entries)
|
||||||
for log_entries in batch(all_rows, 1000):
|
for log_entries in batch(all_rows, 1000):
|
||||||
db.mariadb_insert_logs(log_entries)
|
db.mariadb_insert_logs(log_entries)
|
||||||
db.mongodb_insert_logs(log_entries)
|
#db.mongodb_insert_logs(log_entries)
|
||||||
|
|
||||||
# single mode
|
# single mode
|
||||||
# for log_entry in reader:
|
# for log_entry in reader:
|
||||||
|
|||||||
Reference in New Issue
Block a user