added ability to run in docker

This commit is contained in:
2017-10-19 22:07:30 +02:00
parent 7dc393ae92
commit f62a5cd1fb
6 changed files with 99 additions and 19 deletions

View File

@@ -4,25 +4,23 @@ import glob
import time
import datetime
import pandas
import os
from progress.bar import Bar
import db
analysis_start_date = datetime.date(2017, 4, 7)
analysis_days_amount = 3
analysis_start_date = datetime.date(2017, 5, 1)
analysis_days_amount = 31
# pdns_logs_path = 'data/'
pdns_logs_path = '/run/media/felix/ext/2017.05/' # tmp TODO remove
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in
range(analysis_days_amount)]
# mongodb
# mariadb
def main():
check_duplicates()
# check_duplicates() TODO readd
start = time.time()
distinct_ttl_count = {}
@@ -51,7 +49,7 @@ def main():
# batch mode (batches of 1000 entries)
for log_entries in batch(all_rows, 1000):
db.mariadb_insert_logs(log_entries)
# db.mongodb_insert_logs(log_entries)
db.mongodb_insert_logs(log_entries)
# single mode
# for log_entry in reader:
@@ -100,7 +98,7 @@ def get_log_files_for_hours_of_day(date):
slots_amount = 24
for slot in range(slots_amount):
slot_files[slot] = glob.glob('data/*' + date + '_' + ('%02d' % slot) + '*.csv.gz')
slot_files[slot] = glob.glob(pdns_logs_path + '*' + date + '_' + ('%02d' % slot) + '*.csv.gz')
return slot_files