class PickleLoader: def __init__(self): from flow import Flows self.flow = Flows() # Get the netflow def get_netflow(self): return self.flow # Load a file def load(self, host, user, password, db, amount, good=False, classify=False, db_file=""): file_name = db_file try: self.flow = Loader.get_data(file_name) if not self.flow: import pickle self.flow = pickle.load( open( file_name, "r+" ) ) Loader.insert_data(file_name, self.flow) print 'Loaded data manually' else: print 'Use stored data' self.flow = self.flow.random(amount) return True except Exception as e: return False
class PickleLoader: def __init__(self): from flow import Flows self.flow = Flows() # Get the netflow def get_netflow(self): return self.flow # Load a file def load(self, host, user, password, db, amount, good=False, classify=False, db_file=""): file_name = db_file try: self.flow = Loader.get_data(file_name) if not self.flow: import pickle self.flow = pickle.load(open(file_name, "r+")) Loader.insert_data(file_name, self.flow) print 'Loaded data manually' else: print 'Use stored data' self.flow = self.flow.random(amount) return True except Exception as e: return False
def __init__(self): from flow import Flows self.flow = Flows()
class CTULoader: def __init__(self): from flow import Flows self.flow = Flows() # Get the labels used def get_labels(self, file_name): labels = {} try: with open(file_name) as f: next(f) for line in f: items = line.split(',') labels[items[14].strip()] = True except Exception as e: return [] return labels.keys() # Load a file def load(self, file_name, fr, to, good_labels=None, good=True, amount=-1, amountGood=0): f = Loader.get_data(file_name) if not f: f = open(file_name) Loader.insert_data(file_name, f) print 'Loaded data manually' else: print 'Use stored data' f.seek(0, 0) try: next(f) i = 0 total = 0 while i < fr and next(f): i += 1 g = 0 for line in f: if amount >= 0 and total >= amount: if g >= amountGood: break items = line.split(',') item = self.load_single(items) if good_labels: if good: if item.make_target() in good_labels: total += 1 self.flow.add_record(item) else: if not item.make_target() in good_labels: if total <= amount or amount == -1: total += 1 self.flow.add_record(item) elif g < amountGood: self.flow.add_record(item) g += 1 else: self.flow.add_record(item) if i == to: break i += 1 return True except Exception as e: return False # Load a single line # Format: # StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label # 2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577, ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT def load_single(self, items): from flow import FlowRecord rec = FlowRecord() rec.start_time = items[0].strip() rec.duration = items[1].strip() rec.protocol = items[2].strip() rec.src_ip = items[3].strip() rec.src_port = items[4].strip() rec.bidirectional = items[5].strip() rec.dest_ip = items[6].strip() rec.dest_port = items[7].strip() rec.state = items[8].strip() rec.sTos = items[9].strip() rec.dTos = items[10].strip() rec.total_pckts = items[11].strip() rec.total_bytes = items[12].strip() rec.total_srcbytes = items[13].strip() rec.label = items[14].strip() return rec # Get the netflow def get_netflow(self): return self.flow
def __init__(self): from flow import Flows self.flow = Flows() self.cmd = "SELECT * FROM `flows` LIMIT %s, %s" self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s" self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s"
class SQLLoader: def __init__(self): from flow import Flows self.flow = Flows() self.cmd = "SELECT * FROM `flows` LIMIT %s, %s" self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s" self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s" # Load a file def load(self, host, user, password, db, amount, good=False, classify=False): import pymysql.cursors # Connect to the database connection = pymysql.connect(host=host, user=user, password=password, db=db, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cursor: # Read a single record import random size = 14164163 ran = random.randint(0, size - amount) if classify: cursor.execute(self.cmd_total, ( ran, amount, )) elif good: cursor.execute(self.cmd_good, ( ran, amount, )) else: cursor.execute(self.cmd, ( ran, amount, )) for row in cursor: item = self.load_single(row, good, classify) self.flow.add_record(item) ret = True except Exception as e: print "Error: " + str(e) ret = False finally: connection.close() return ret # Load a single line # Format: # StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label # 2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577, ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT def load_single(self, items, good, classify): import socket, ipaddress from flow import FlowRecord rec = FlowRecord() rec.start_time = items['start_time'] rec.duration = (items['start_time'] + items['start_msec'] / 1000.0) - ( items['end_time'] + items['end_msec'] / 1000.0) rec.protocol = items['prot'] rec.src_ip = str(ipaddress.ip_address(items['src_ip'])) rec.src_port = items['src_port'] rec.dest_ip = str(ipaddress.ip_address(items['dst_ip'])) rec.dest_port = items['dst_port'] rec.total_pckts = items['packets'] rec.total_bytes = items['octets'] if classify: rec.label = items['description'] elif good: rec.label = 'non-malicous' else: rec.label = 'malicous' rec.tcp_flags = items['tcp_flags'] return rec # Get the netflow def get_netflow(self): return self.flow
class CTULoader: def __init__(self): from flow import Flows self.flow = Flows() # Get the labels used def get_labels(self, file_name): labels = {} try: with open(file_name) as f: next(f) for line in f: items = line.split(',') labels[items[14].strip()] = True except Exception as e: return [] return labels.keys() # Load a file def load(self, file_name, fr, to, good_labels=None, good=True, amount=-1, amountGood=0): f = Loader.get_data(file_name) if not f: f = open(file_name) Loader.insert_data(file_name, f) print 'Loaded data manually' else: print 'Use stored data' f.seek(0,0) try: next(f) i = 0 total = 0 while i < fr and next(f): i += 1 g = 0 for line in f: if amount >= 0 and total >= amount: if g >= amountGood: break items = line.split(',') item = self.load_single(items) if good_labels: if good: if item.make_target() in good_labels: total += 1 self.flow.add_record(item) else: if not item.make_target() in good_labels: if total <= amount or amount == -1: total += 1 self.flow.add_record(item) elif g < amountGood: self.flow.add_record(item) g += 1 else: self.flow.add_record(item) if i == to: break i += 1 return True except Exception as e: return False # Load a single line # Format: # StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label # 2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577, ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT def load_single(self, items): from flow import FlowRecord rec = FlowRecord() rec.start_time = items[0].strip() rec.duration = items[1].strip() rec.protocol = items[2].strip() rec.src_ip = items[3].strip() rec.src_port = items[4].strip() rec.bidirectional = items[5].strip() rec.dest_ip = items[6].strip() rec.dest_port = items[7].strip() rec.state = items[8].strip() rec.sTos = items[9].strip() rec.dTos = items[10].strip() rec.total_pckts = items[11].strip() rec.total_bytes = items[12].strip() rec.total_srcbytes = items[13].strip() rec.label = items[14].strip() return rec # Get the netflow def get_netflow(self): return self.flow
class SQLLoader: def __init__(self): from flow import Flows self.flow = Flows() self.cmd = "SELECT * FROM `flows` LIMIT %s, %s" self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s" self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s" # Load a file def load(self, host, user, password, db, amount, good=False, classify=False): import pymysql.cursors # Connect to the database connection = pymysql.connect(host=host, user=user, password=password, db=db, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cursor: # Read a single record import random size = 14164163 ran = random.randint(0, size-amount) if classify: cursor.execute(self.cmd_total, (ran, amount,)) elif good: cursor.execute(self.cmd_good, (ran, amount,)) else: cursor.execute(self.cmd, (ran, amount,)) for row in cursor: item = self.load_single(row, good, classify) self.flow.add_record(item) ret = True except Exception as e: print "Error: " + str(e) ret = False finally: connection.close() return ret # Load a single line # Format: # StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label # 2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577, ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT def load_single(self, items, good, classify): import socket, ipaddress from flow import FlowRecord rec = FlowRecord() rec.start_time = items['start_time'] rec.duration = (items['start_time'] + items['start_msec']/1000.0) - (items['end_time'] + items['end_msec']/1000.0) rec.protocol = items['prot'] rec.src_ip = str(ipaddress.ip_address(items['src_ip'])) rec.src_port = items['src_port'] rec.dest_ip = str(ipaddress.ip_address(items['dst_ip'])) rec.dest_port = items['dst_port'] rec.total_pckts = items['packets'] rec.total_bytes = items['octets'] if classify: rec.label = items['description'] elif good: rec.label = 'non-malicous' else: rec.label = 'malicous' rec.tcp_flags = items['tcp_flags'] return rec # Get the netflow def get_netflow(self): return self.flow
class Trainer(object): def __init__(self): from flow import Flows self.flow = Flows() # Train supervised with samples and targets def train(self, algorithm, data, feature, good_labels, config): key = self.check_keys(data, "type", "DefaultTrainer") trainer = Trainer.get_trainer(key, DefaultTrainer()) print "Loaded training algorithm: " + str(key) + "." fl = trainer.train(algorithm, data, feature, good_labels, config) if not fl: return False else: self.flow.addFlows(fl) return True def check_keys(self, dic, key, val): if not key in dic: dic[key] = val return dic[key] @staticmethod def get_trainer(name, default=None): import sys try: return getattr(sys.modules[__name__], name)() except Exception as e: print "Trainer \"" + name + "\"does not exist." return default def trainAll(self, feature, algorithm, good_labels, manager, is_binary): samples = self.flow.get_sample_data_complete(feature) targets = self.flow.get_target_data(is_binary, good_labels) if len(samples) > 2: algorithm.train(samples, targets) pos_train = 0 neg_train = 0 for i in targets: if i in good_labels: neg_train += 1 else: pos_train += 1 manager.add_new_result(pos_train, neg_train) def default(self, algorithm, loader, feature, file): print "Training size is " + str(loader.get_netflow().get_size()) + "." if loader.get_netflow().get_size() <= 2: print "Training set too small." return None #else: # samples = loader.get_netflow().get_sample_data_complete(feature) # targets = loader.get_netflow().get_target_data() # #try: # algorithm.train(samples, targets) # #except Exception as e: # # raise e # # print "Wrong training data set used." # # return False print "Training set \"" + file + "\" done." return loader.get_netflow()