docker fixes

This commit is contained in:
2017-10-27 14:22:09 +02:00
parent f62a5cd1fb
commit 9ddc99bc37
5 changed files with 32 additions and 23 deletions

View File

@@ -3,16 +3,16 @@ import gzip
import glob
import time
import datetime
import pandas
import os
from progress.bar import Bar
import db
# TODO environment this
analysis_start_date = datetime.date(2017, 5, 1)
analysis_days_amount = 31
# pdns_logs_path = 'data/'
pdns_logs_path = '/run/media/felix/ext/2017.05/' # tmp TODO remove
pdns_logs_path = '/data/'
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in
@@ -27,7 +27,7 @@ def main():
# everything = {}
# for log_file in ['data/pdns_capture.pcap-sgsgpdc0n9x-2017-04-07_00-00-02.csv.gz']:
for day in range(analysis_days_amount):
log_files_hour = get_log_files_for_hours_of_day(analysis_days[day])
# everything[day] = {}
@@ -38,8 +38,7 @@ def main():
progress_bar.next()
# everything[day][hour] = {}
for hour_files in log_files_hour[hour]:
# a bit faster
# a bit faster, 10-15% (but pandas overhead)
# df = pandas.read_csv(log_file, compression='gzip', header=None)
# print(df.iloc[0])
with gzip.open(hour_files, 'rt', newline='') as file:
@@ -49,7 +48,7 @@ def main():
# batch mode (batches of 1000 entries)
for log_entries in batch(all_rows, 1000):
db.mariadb_insert_logs(log_entries)
db.mongodb_insert_logs(log_entries)
#db.mongodb_insert_logs(log_entries)
# single mode
# for log_entry in reader: