def results(): #def results(data={}): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file,'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' ')) continue elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4],'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() file_names = [] is_upload = False if request.files.get('upload') != None: # if False: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name+'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir+tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir+tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir+tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir+tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir+tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir+tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp,tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): tmp_data = [] tmp_data.extend(tmp[len(tmp)-i-1]) tmp_data.extend(tmpPL[len(tmp)-i-1]) tmp_data.extend(tmpIPT[len(tmp)-i-1]) tmp_data.extend(tmpBD[len(tmp)-i-1]) # nga issue, will fix when pcaps start flowing again if tmp_data[2] == 0 and tmp_data[4] > 0: continue if tmp_data[3] == 0 and tmp_data[5] > 0: continue # if len(tmp_data) != num_params: # continue data.append(tmp_data) metadata.append(tmp_m[len(tmp)-i-1]) total_flows += 1 if total_flows == count_flocap*2 and not is_upload: break if total_flows == count_flocap*2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) lhost = {} for i in range(len(metadata)): if metadata[i][0] not in lhost: lhost[metadata[i][0]] = 1 else: lhost[metadata[i][0]] += 1 sorted_lhost = sorted(lhost.items(), key=operator.itemgetter(1)) sorted_lhost.reverse() if len(sorted_lhost) > 0: (lh,_) = sorted_lhost[0] else: lh = None tmp = [] to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) s_orgName = '' d_orgName = '' if metadata[i][0] == lh: s_orgName = 'localhost' if metadata[i][1] == lh: d_orgName = 'localhost' tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) tmp.append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,s_orgName,d_orgName,metadata[i][8],tmp_to_display)) end_time = time.time()-start_time tmp = sorted(tmp,key=lambda x: x[0]) tmp.reverse() return template('results',results=tmp,num_flows=len(results),t=end_time,classifier_names=classifier_names, to_display_names=to_display_names)
import googlemaps from data_parser import DataParser from kmeans import KMeans gmaps = googlemaps.Client(key='AIzaSyC543HM-TAYHQW25N6QQ81RbcFTwv6gyUY') dp = DataParser(gmaps) dp.open_file_path( "C:/Users/Jelle/Documents/GitHub/address-clustering/testadressen.csv") dp.name_address_from_csv() kmeans = KMeans() labeled_data, cluster_centers = kmeans.do_kmeans(dp.address_list, 3) print(labeled_data) for c in cluster_centers: street_name = gmaps.reverse_geocode( c)[0]['address_components'][1]['long_name'] house_no = gmaps.reverse_geocode( c)[0]['address_components'][0]['long_name'] print(street_name + " " + house_no)