updated benchmarks

This commit is contained in:
2017-12-01 12:31:42 +01:00
parent 673464137e
commit 8d10f9bf93
7 changed files with 107 additions and 86 deletions

View File

@@ -1,84 +0,0 @@
#!/usr/bin/env python3
import csv
import gzip
import time
import glob
def compare_load_file():
dur_p = load_file_plain()
dur_z = load_file_zipped()
print('plain took: ' + str(dur_p) + ' s')
print('zipped took: ' + str(dur_z) + ' s')
print('(plain - zipped): ' + str(dur_p - dur_z) + ' s')
def load_file_plain():
start_p = time.time()
with open('pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv', 'rt') as file_p:
for line in file_p:
row = line.split()
# print(row)
pass
return time.time() - start_p
def load_file_zipped():
start_z = time.time()
with gzip.open('pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv.gz', 'rt', newline='') as file_z:
reader = csv.reader(file_z)
for row in reader:
# print(row)
pass
return time.time() - start_z
def load_day_zipped():
start_z = time.time()
globbed = glob.glob('/home/felix/pdns/' + '*-2017-09-01*.csv.gz')
for f in globbed:
with gzip.open(f, 'rt', newline='') as file:
reader = csv.reader(file)
for row in reader:
pass
dur_z = time.time() - start_z
print('iterating day took: ' + str(dur_z) + ' s')
return dur_z
def benchmark_load_day():
durs = []
for i in range(10):
durs.append(load_day_zipped())
print('all results: ' + str(durs))
cleaned = ignore_outliers(durs)
print('cleaned results: ' + str(cleaned)
print('average: ' + str(mean(cleaned)))
def ignore_outliers(lst):
med = median(lst)
lst = [e for e in lst if e < med * 1.1]
return lst
def median(lst):
sortedLst = sorted(lst)
lstLen = len(lst)
index = (lstLen - 1) // 2
if (lstLen % 2):
return sortedLst[index]
else:
return (sortedLst[index] + sortedLst[index + 1])/2.0
def mean(lst):
return float(sum(lst)) / max(len(lst), 1)
if __name__ == '__main__':
benchmark_load_day()

View File

@@ -1,5 +1,14 @@
#!/bin/bash
cd /run/media/felix/AE7E01B77E01797B/pDNS;
for i in {01..31}; do echo -n -e "day $i \t size: "; echo -n -e $(du -ch *"2017-10-$i"* | tail -1) " \t #files: "; ls *"2017-10-$i"* | wc -l; done
#cd /run/media/felix/AE7E01B77E01797B/pDNS;
cd /home/felix/sources/MastersThesis/src/DoresA/data;
month="04"
for i in {01..31}; do
if compgen -G *"2017-$month-$i"* > /dev/null; then
echo -n -e "day $i \t size: ";
echo -n -e $(du -ch *"2017-$month-$i"* | tail -1) " \t #files: ";
ls *"2017-$month-$i"* | wc -l;
fi
done

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python3
import gzip
import csv
import time
logs_dir = 'test-data/'
def compare_load_file():
dur_p = load_file_plain()
dur_z = load_file_zipped()
print('plain took: ' + str(dur_p) + ' s')
print('zipped took: ' + str(dur_z) + ' s')
print('(plain - zipped): ' + str(dur_p - dur_z) + ' s')
def load_file_plain():
start_p = time.time()
with open(logs_dir + 'pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv', 'rt') as file_p:
for line in file_p:
row = line.split()
# print(row)
pass
return time.time() - start_p
def load_file_zipped():
start_z = time.time()
with gzip.open(logs_dir + 'pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv.gz', 'rt', newline='') as file_z:
reader = csv.reader(file_z)
for row in reader:
# print(row)
pass
return time.time() - start_z
if __name__ == '__main__':
compare_load_file()

56
src/benchmarks/load_day.py Executable file
View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
import csv
import gzip
import time
import glob
def benchmark_load_day():
durs = []
for i in range(10):
durs.append(load_day_zipped())
print('all results: ' + str(durs))
cleaned = ignore_outliers(durs)
print('cleaned results: ' + str(cleaned))
print('average: ' + str(mean(cleaned)))
def load_day_zipped():
start_z = time.time()
globbed = glob.glob('/home/felix/pdns/' + '*-2017-09-01*.csv.gz')
for f in globbed:
with gzip.open(f, 'rt', newline='') as file:
reader = csv.reader(file)
for row in reader:
pass
dur_z = time.time() - start_z
print('iterating day took: ' + str(dur_z) + ' s')
return dur_z
def ignore_outliers(lst):
med = median(lst)
lst = [e for e in lst if e < med * 1.1]
return lst
def median(lst):
sorted_lst = sorted(lst)
lst_len = len(lst)
index = (lst_len - 1) // 2
if lst_len % 2:
return sorted_lst[index]
else:
return (sorted_lst[index] + sorted_lst[index + 1])/2.0
def mean(lst):
return float(sum(lst)) / max(len(lst), 1)
if __name__ == '__main__':
benchmark_load_day()