updated benchmarks
This commit is contained in:
@@ -1,84 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
import gzip
|
||||
import time
|
||||
import glob
|
||||
|
||||
|
||||
def compare_load_file():
|
||||
dur_p = load_file_plain()
|
||||
dur_z = load_file_zipped()
|
||||
|
||||
print('plain took: ' + str(dur_p) + ' s')
|
||||
print('zipped took: ' + str(dur_z) + ' s')
|
||||
print('(plain - zipped): ' + str(dur_p - dur_z) + ' s')
|
||||
|
||||
|
||||
def load_file_plain():
|
||||
start_p = time.time()
|
||||
with open('pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv', 'rt') as file_p:
|
||||
for line in file_p:
|
||||
row = line.split()
|
||||
# print(row)
|
||||
pass
|
||||
return time.time() - start_p
|
||||
|
||||
|
||||
def load_file_zipped():
|
||||
start_z = time.time()
|
||||
with gzip.open('pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv.gz', 'rt', newline='') as file_z:
|
||||
reader = csv.reader(file_z)
|
||||
for row in reader:
|
||||
# print(row)
|
||||
pass
|
||||
return time.time() - start_z
|
||||
|
||||
|
||||
def load_day_zipped():
|
||||
start_z = time.time()
|
||||
|
||||
globbed = glob.glob('/home/felix/pdns/' + '*-2017-09-01*.csv.gz')
|
||||
|
||||
for f in globbed:
|
||||
with gzip.open(f, 'rt', newline='') as file:
|
||||
reader = csv.reader(file)
|
||||
|
||||
for row in reader:
|
||||
pass
|
||||
dur_z = time.time() - start_z
|
||||
print('iterating day took: ' + str(dur_z) + ' s')
|
||||
return dur_z
|
||||
|
||||
|
||||
def benchmark_load_day():
|
||||
durs = []
|
||||
for i in range(10):
|
||||
durs.append(load_day_zipped())
|
||||
print('all results: ' + str(durs))
|
||||
cleaned = ignore_outliers(durs)
|
||||
print('cleaned results: ' + str(cleaned)
|
||||
print('average: ' + str(mean(cleaned)))
|
||||
|
||||
|
||||
def ignore_outliers(lst):
|
||||
med = median(lst)
|
||||
lst = [e for e in lst if e < med * 1.1]
|
||||
return lst
|
||||
|
||||
def median(lst):
|
||||
sortedLst = sorted(lst)
|
||||
lstLen = len(lst)
|
||||
index = (lstLen - 1) // 2
|
||||
|
||||
if (lstLen % 2):
|
||||
return sortedLst[index]
|
||||
else:
|
||||
return (sortedLst[index] + sortedLst[index + 1])/2.0
|
||||
|
||||
|
||||
def mean(lst):
|
||||
return float(sum(lst)) / max(len(lst), 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
benchmark_load_day()
|
||||
@@ -1,5 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd /run/media/felix/AE7E01B77E01797B/pDNS;
|
||||
for i in {01..31}; do echo -n -e "day $i \t size: "; echo -n -e $(du -ch *"2017-10-$i"* | tail -1) " \t #files: "; ls *"2017-10-$i"* | wc -l; done
|
||||
#cd /run/media/felix/AE7E01B77E01797B/pDNS;
|
||||
cd /home/felix/sources/MastersThesis/src/DoresA/data;
|
||||
month="04"
|
||||
|
||||
for i in {01..31}; do
|
||||
if compgen -G *"2017-$month-$i"* > /dev/null; then
|
||||
echo -n -e "day $i \t size: ";
|
||||
echo -n -e $(du -ch *"2017-$month-$i"* | tail -1) " \t #files: ";
|
||||
ls *"2017-$month-$i"* | wc -l;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
40
src/benchmarks/compare_plain-zipped.py
Executable file
40
src/benchmarks/compare_plain-zipped.py
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
import gzip
|
||||
import csv
|
||||
import time
|
||||
|
||||
|
||||
logs_dir = 'test-data/'
|
||||
|
||||
|
||||
def compare_load_file():
|
||||
dur_p = load_file_plain()
|
||||
dur_z = load_file_zipped()
|
||||
|
||||
print('plain took: ' + str(dur_p) + ' s')
|
||||
print('zipped took: ' + str(dur_z) + ' s')
|
||||
print('(plain - zipped): ' + str(dur_p - dur_z) + ' s')
|
||||
|
||||
|
||||
def load_file_plain():
|
||||
start_p = time.time()
|
||||
with open(logs_dir + 'pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv', 'rt') as file_p:
|
||||
for line in file_p:
|
||||
row = line.split()
|
||||
# print(row)
|
||||
pass
|
||||
return time.time() - start_p
|
||||
|
||||
|
||||
def load_file_zipped():
|
||||
start_z = time.time()
|
||||
with gzip.open(logs_dir + 'pdns_capture.pcap-demchdc902n-2017-09-01_00-20-02.csv.gz', 'rt', newline='') as file_z:
|
||||
reader = csv.reader(file_z)
|
||||
for row in reader:
|
||||
# print(row)
|
||||
pass
|
||||
return time.time() - start_z
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
compare_load_file()
|
||||
56
src/benchmarks/load_day.py
Executable file
56
src/benchmarks/load_day.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
import gzip
|
||||
import time
|
||||
import glob
|
||||
|
||||
|
||||
def benchmark_load_day():
|
||||
durs = []
|
||||
for i in range(10):
|
||||
durs.append(load_day_zipped())
|
||||
print('all results: ' + str(durs))
|
||||
cleaned = ignore_outliers(durs)
|
||||
print('cleaned results: ' + str(cleaned))
|
||||
print('average: ' + str(mean(cleaned)))
|
||||
|
||||
|
||||
def load_day_zipped():
|
||||
start_z = time.time()
|
||||
|
||||
globbed = glob.glob('/home/felix/pdns/' + '*-2017-09-01*.csv.gz')
|
||||
|
||||
for f in globbed:
|
||||
with gzip.open(f, 'rt', newline='') as file:
|
||||
reader = csv.reader(file)
|
||||
|
||||
for row in reader:
|
||||
pass
|
||||
dur_z = time.time() - start_z
|
||||
print('iterating day took: ' + str(dur_z) + ' s')
|
||||
return dur_z
|
||||
|
||||
|
||||
def ignore_outliers(lst):
|
||||
med = median(lst)
|
||||
lst = [e for e in lst if e < med * 1.1]
|
||||
return lst
|
||||
|
||||
|
||||
def median(lst):
|
||||
sorted_lst = sorted(lst)
|
||||
lst_len = len(lst)
|
||||
index = (lst_len - 1) // 2
|
||||
|
||||
if lst_len % 2:
|
||||
return sorted_lst[index]
|
||||
else:
|
||||
return (sorted_lst[index] + sorted_lst[index + 1])/2.0
|
||||
|
||||
|
||||
def mean(lst):
|
||||
return float(sum(lst)) / max(len(lst), 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
benchmark_load_day()
|
||||
|
Can't render this file because it is too large.
|
Reference in New Issue
Block a user