docker fixes
This commit is contained in:
@@ -3,16 +3,16 @@ import gzip
|
||||
import glob
|
||||
import time
|
||||
import datetime
|
||||
import pandas
|
||||
import os
|
||||
from progress.bar import Bar
|
||||
|
||||
import db
|
||||
|
||||
# TODO environment this
|
||||
analysis_start_date = datetime.date(2017, 5, 1)
|
||||
analysis_days_amount = 31
|
||||
# pdns_logs_path = 'data/'
|
||||
pdns_logs_path = '/run/media/felix/ext/2017.05/' # tmp TODO remove
|
||||
pdns_logs_path = '/data/'
|
||||
|
||||
# e.g. analysis_days = ['2017-04-07', '2017-04-08', '2017-04-09']
|
||||
analysis_days = [(analysis_start_date + datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in
|
||||
@@ -27,7 +27,7 @@ def main():
|
||||
# everything = {}
|
||||
|
||||
# for log_file in ['data/pdns_capture.pcap-sgsgpdc0n9x-2017-04-07_00-00-02.csv.gz']:
|
||||
|
||||
|
||||
for day in range(analysis_days_amount):
|
||||
log_files_hour = get_log_files_for_hours_of_day(analysis_days[day])
|
||||
# everything[day] = {}
|
||||
@@ -38,8 +38,7 @@ def main():
|
||||
progress_bar.next()
|
||||
# everything[day][hour] = {}
|
||||
for hour_files in log_files_hour[hour]:
|
||||
|
||||
# a bit faster
|
||||
# a bit faster, 10-15% (but pandas overhead)
|
||||
# df = pandas.read_csv(log_file, compression='gzip', header=None)
|
||||
# print(df.iloc[0])
|
||||
with gzip.open(hour_files, 'rt', newline='') as file:
|
||||
@@ -49,7 +48,7 @@ def main():
|
||||
# batch mode (batches of 1000 entries)
|
||||
for log_entries in batch(all_rows, 1000):
|
||||
db.mariadb_insert_logs(log_entries)
|
||||
db.mongodb_insert_logs(log_entries)
|
||||
#db.mongodb_insert_logs(log_entries)
|
||||
|
||||
# single mode
|
||||
# for log_entry in reader:
|
||||
|
||||
Reference in New Issue
Block a user