def load_data(self, idir): # loop via all files in the source dir files = os.listdir(idir) for f in files: try: dParse = DataParser(idir + "/" + f, analyse=0, compact=1) self.flow_cnt += dParse.lines_cnt except: print("Error: failued to parse file", (idir + f)) self.errors += 1 continue # binary classification label # 1 - correct traffic # 0 - anomaly traffic #label = 1 #int(f.split("-")[-1].split(".")[0]) #tmpTLS = dParse.getTLSInfo() # Features extraction #tmpBD, tmpBDL = dParse.getByteDistribution() tmpIPT = dParse.getIndividualFlowIPTs() tmpPL = dParse.getIndividualFlowPacketLengths() tmp = dParse.getIndividualFlowMetadata(PKTS=0, BYTES=0, FLOW_TIME=0, WHT=1, BYTE_DIST_M=0, BYTE_DIST_S=1, ENTROPY=0, IDP=1) if tmpPL != None: # and tmpPL != None and tmpIPT != None: # iterate over every flow for i in range(len(tmpPL)): tmp_data = [] tmp_data.extend(tmp[i]) tmp_data.extend(tmpPL[i]) #tmp_data.extend(tmpIPT[i]) ##tmp_data.extend(tmpBD[i]) #tmp_data.extend(tmpBDL[i]) #print("FlowMetadata",tmp[i]) # print("PacketLenghts",tmpPL[i]) # print("IndividualFlowIPT",tmpIPT[i]) #print("bd",list(tmpBD[i])) # tmp_data.extend(tmpTLS[i]) if self.features_cnt == 0: self.features_cnt = len(tmp_data) self.data.append(tmp_data) #print(self.data[i]) #print("final data length and sum",len(self.data[i]),sum(self.data[i])) self.labels.append(self.label)
def load_data(self, idir, label, max_files): files = os.listdir(idir) num_files = 0 for f in files: try: dParse = DataParser(idir + f,self.compact) except: print idir + f print 'fail' continue num_files += 1 tmpTLS = dParse.getTLSInfo() if self.bd_compact == 1: tmpBD = dParse.getByteDistribution_compact() elif self.bd_compact == 2: tmpBD = dParse.getByteDistribution_mean_std() else: tmpBD = dParse.getByteDistribution() tmpIPT = dParse.getIndividualFlowIPTs() tmpPL = dParse.getIndividualFlowPacketLengths() tmp, ignore = dParse.getIndividualFlowMetadata() if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): if ignore[i] == 1 and label == 1.0: continue tmp_data = [] if 0 in self.types: tmp_data.extend(tmp[i]) if 1 in self.types: tmp_data.extend(tmpPL[i]) if 2 in self.types: tmp_data.extend(tmpIPT[i]) if 3 in self.types: tmp_data.extend(tmpBD[i]) if 4 in self.types: tmp_data.extend(tmpTLS[i]) if len(tmp_data) != self.num_params: print len(tmp_data) self.data.append(tmp_data) for i in range(len(tmp)): if ignore[i] == 1 and label == 1.0: continue self.labels.append(label) if max_files != None and num_files >= max_files: break
def load_data(self, idir, label, max_files): files = os.listdir(idir) num_files = 0 for f in files: try: dParse = DataParser(idir + f, self.compact) except: print 'Error: failued to parse file %s' % (idir + f) continue num_files += 1 tmpTLS = dParse.getTLSInfo() tmpBD = dParse.getByteDistribution() tmpIPT = dParse.getIndividualFlowIPTs() tmpPL = dParse.getIndividualFlowPacketLengths() tmp = dParse.getIndividualFlowMetadata() if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): tmp_data = [] if 0 in self.types: tmp_data.extend(tmp[i]) if 1 in self.types: tmp_data.extend(tmpPL[i]) if 2 in self.types: tmp_data.extend(tmpIPT[i]) if 3 in self.types: tmp_data.extend(tmpBD[i]) if 4 in self.types: tmp_data.extend(tmpTLS[i]) if len(tmp_data) != self.num_params: continue self.data.append(tmp_data) self.labels.append(label) if max_files != None and num_files >= max_files: break
def devices(): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file,'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' ')) continue elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4],'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() subnet = '10.0.2.' devices_ = {} file_names = [] is_upload = False if request.files.get('upload') != None: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name+'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir+tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir+tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir+tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir+tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir+tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir+tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp,tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): # if not parser.flows['appflows'][i]['flow']['sa'].startswith(subnet) and \ # not parser.flows['appflows'][i]['flow']['da'].startswith(subnet): # continue tmp_id = '' if tmp_m[len(tmp)-i-1][0].startswith(subnet): tmp_id = tmp_m[len(tmp)-i-1][0] elif tmp_m[len(tmp)-i-1][1].startswith(subnet): tmp_id = tmp_m[len(tmp)-i-1][1] else: continue tmp_data = [] tmp_data.extend(tmp[len(tmp)-i-1]) tmp_data.extend(tmpPL[len(tmp)-i-1]) tmp_data.extend(tmpIPT[len(tmp)-i-1]) tmp_data.extend(tmpBD[len(tmp)-i-1]) data.append(tmp_data) metadata.append(tmp_m[len(tmp)-i-1]) total_flows += 1 if total_flows == count_flocap*2 and not is_upload: break if total_flows == count_flocap*2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) tmp = {} to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) tmp_id = '' if metadata[i][0].startswith(subnet): tmp_id = metadata[i][0] elif metadata[i][1].startswith(subnet): tmp_id = metadata[i][1] else: continue tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) if tmp_id not in devices_: devices_[tmp_id] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0] tmp[tmp_id] = [] devices_[tmp_id][0] += 1 # total flows if metadata[i][9] in ciphers: (name_, rec_) = ciphers[metadata[i][9]] if rec_ == 'RECOMMENDED': devices_[tmp_id][1] += 1 elif rec_ == 'LEGACY': devices_[tmp_id][2] += 1 elif rec_ == 'AVOID': devices_[tmp_id][3] += 1 if metadata[i][10] != -1: devices_[tmp_id][metadata[i][12]+4] += 1 tmp[tmp_id].append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,'','',metadata[i][8],tmp_to_display)) return template('devices',devices=devices_,subnet=subnet+'*',results=tmp,num_flows=len(results),classifier_names=classifier_names, to_display_names=to_display_names)
def results(): #def results(data={}): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file,'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' ')) continue elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4],'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() file_names = [] is_upload = False if request.files.get('upload') != None: # if False: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name+'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir+tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir+tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir+tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir+tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir+tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir+tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp,tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): tmp_data = [] tmp_data.extend(tmp[len(tmp)-i-1]) tmp_data.extend(tmpPL[len(tmp)-i-1]) tmp_data.extend(tmpIPT[len(tmp)-i-1]) tmp_data.extend(tmpBD[len(tmp)-i-1]) # nga issue, will fix when pcaps start flowing again if tmp_data[2] == 0 and tmp_data[4] > 0: continue if tmp_data[3] == 0 and tmp_data[5] > 0: continue # if len(tmp_data) != num_params: # continue data.append(tmp_data) metadata.append(tmp_m[len(tmp)-i-1]) total_flows += 1 if total_flows == count_flocap*2 and not is_upload: break if total_flows == count_flocap*2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) lhost = {} for i in range(len(metadata)): if metadata[i][0] not in lhost: lhost[metadata[i][0]] = 1 else: lhost[metadata[i][0]] += 1 sorted_lhost = sorted(lhost.items(), key=operator.itemgetter(1)) sorted_lhost.reverse() if len(sorted_lhost) > 0: (lh,_) = sorted_lhost[0] else: lh = None tmp = [] to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) s_orgName = '' d_orgName = '' if metadata[i][0] == lh: s_orgName = 'localhost' if metadata[i][1] == lh: d_orgName = 'localhost' tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) tmp.append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,s_orgName,d_orgName,metadata[i][8],tmp_to_display)) end_time = time.time()-start_time tmp = sorted(tmp,key=lambda x: x[0]) tmp.reverse() return template('results',results=tmp,num_flows=len(results),t=end_time,classifier_names=classifier_names, to_display_names=to_display_names)
def devices(): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file, 'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1], tokens[2].replace('_', ' ')) continue elif line.strip() == '' or line.startswith( '#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append( (tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4], 'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append( (tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() subnet = '10.0.2.' devices_ = {} file_names = [] is_upload = False if request.files.get('upload') != None: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name + 'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir + tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir + tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir + tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir + tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir + tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir + tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp, tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): # if not parser.flows['appflows'][i]['flow']['sa'].startswith(subnet) and \ # not parser.flows['appflows'][i]['flow']['da'].startswith(subnet): # continue tmp_id = '' if tmp_m[len(tmp) - i - 1][0].startswith(subnet): tmp_id = tmp_m[len(tmp) - i - 1][0] elif tmp_m[len(tmp) - i - 1][1].startswith(subnet): tmp_id = tmp_m[len(tmp) - i - 1][1] else: continue tmp_data = [] tmp_data.extend(tmp[len(tmp) - i - 1]) tmp_data.extend(tmpPL[len(tmp) - i - 1]) tmp_data.extend(tmpIPT[len(tmp) - i - 1]) tmp_data.extend(tmpBD[len(tmp) - i - 1]) data.append(tmp_data) metadata.append(tmp_m[len(tmp) - i - 1]) total_flows += 1 if total_flows == count_flocap * 2 and not is_upload: break if total_flows == count_flocap * 2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) tmp = {} to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) tmp_id = '' if metadata[i][0].startswith(subnet): tmp_id = metadata[i][0] elif metadata[i][1].startswith(subnet): tmp_id = metadata[i][1] else: continue tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) if tmp_id not in devices_: devices_[tmp_id] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] tmp[tmp_id] = [] devices_[tmp_id][0] += 1 # total flows if metadata[i][9] in ciphers: (name_, rec_) = ciphers[metadata[i][9]] if rec_ == 'RECOMMENDED': devices_[tmp_id][1] += 1 elif rec_ == 'LEGACY': devices_[tmp_id][2] += 1 elif rec_ == 'AVOID': devices_[tmp_id][3] += 1 if metadata[i][10] != -1: devices_[tmp_id][metadata[i][12] + 4] += 1 tmp[tmp_id].append( (results[i], metadata[i][0], metadata[i][1], metadata[i][2], metadata[i][3], metadata[i][4], metadata[i][5], metadata[i][6], metadata[i][7], color, '', '', metadata[i][8], tmp_to_display)) return template('devices', devices=devices_, subnet=subnet + '*', results=tmp, num_flows=len(results), classifier_names=classifier_names, to_display_names=to_display_names)
def results(): #def results(data={}): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file, 'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1], tokens[2].replace('_', ' ')) continue elif line.strip() == '' or line.startswith( '#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append( (tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4], 'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append( (tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() file_names = [] is_upload = False if request.files.get('upload') != None: # if False: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name + 'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir + tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir + tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir + tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir + tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir + tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir + tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp, tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): tmp_data = [] tmp_data.extend(tmp[len(tmp) - i - 1]) tmp_data.extend(tmpPL[len(tmp) - i - 1]) tmp_data.extend(tmpIPT[len(tmp) - i - 1]) tmp_data.extend(tmpBD[len(tmp) - i - 1]) # nga issue, will fix when pcaps start flowing again if tmp_data[2] == 0 and tmp_data[4] > 0: continue if tmp_data[3] == 0 and tmp_data[5] > 0: continue # if len(tmp_data) != num_params: # continue data.append(tmp_data) metadata.append(tmp_m[len(tmp) - i - 1]) total_flows += 1 if total_flows == count_flocap * 2 and not is_upload: break if total_flows == count_flocap * 2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) lhost = {} for i in range(len(metadata)): if metadata[i][0] not in lhost: lhost[metadata[i][0]] = 1 else: lhost[metadata[i][0]] += 1 sorted_lhost = sorted(lhost.items(), key=operator.itemgetter(1)) sorted_lhost.reverse() if len(sorted_lhost) > 0: (lh, _) = sorted_lhost[0] else: lh = None tmp = [] to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) s_orgName = '' d_orgName = '' if metadata[i][0] == lh: s_orgName = 'localhost' if metadata[i][1] == lh: d_orgName = 'localhost' tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) tmp.append((results[i], metadata[i][0], metadata[i][1], metadata[i][2], metadata[i][3], metadata[i][4], metadata[i][5], metadata[i][6], metadata[i][7], color, s_orgName, d_orgName, metadata[i][8], tmp_to_display)) end_time = time.time() - start_time tmp = sorted(tmp, key=lambda x: x[0]) tmp.reverse() return template('results', results=tmp, num_flows=len(results), t=end_time, classifier_names=classifier_names, to_display_names=to_display_names)