def build_feature_table(flow_path, ip_mapper, feature_table, get_flow_features=True): path = flow_path + '.tsv' print('Computing triangle based features ...') triangle_stats = dict() gmetrics_lib.get_triangles(path, triangle_stats) ip_triangle_stats = map_dict(triangle_stats, ip_mapper) add_feature_list(ip_triangle_stats, feature_table, True, True) # in_t out_t through_t cycles out_deg in_deg prank krank inf_s out_s inf_d outf_d # role1: Modest out triangles # role2: High out degree, modest in degree # role3: High in triangles # role4: High out degree, modest out triangles # role5: High in triangles, low out triangle # role6: High in degree # role7: High out flow size print('Computing pagerank ...') pagerank = dict() gmetrics_lib.get_centrality(path, pagerank) ip_pagerank = map_dict(pagerank, ip_mapper) add_feature(ip_pagerank, feature_table, True) print('# ips = ' + str(len(pagerank))) print('Computing kcore ranking ...') kcore_ranking = dict() kcore_ranking = gmetrics_lib.get_kcore_ranking(path) ip_kcore_ranking = map_dict(kcore_ranking, ip_mapper) add_feature(ip_kcore_ranking, feature_table, False, True) print('# ips [1] = ' + str(len(kcore_ranking))) print('# ips [2] = ' + str(len(ip_kcore_ranking))) if get_flow_features == False: return print('Computing flow size features ...') median_in_flow = dict() median_out_flow = dict() flow_lib.get_median_flow(flow_path, median_in_flow, median_out_flow, True) add_feature(median_in_flow, feature_table, False) add_feature(median_out_flow, feature_table, False) print('# ips = ' + str(len(median_in_flow))) print('# ips = ' + str(len(median_out_flow))) print('Computing flow duration features ...') median_in_flow_duration = dict() median_out_flow_duration = dict() flow_lib.get_median_flow(flow_path, \ median_in_flow_duration, median_out_flow_duration, False) add_feature(median_in_flow_duration, feature_table, False) add_feature(median_out_flow_duration, feature_table, False) print('# ips = ' + str(len(median_in_flow_duration))) print('# ips = ' + str(len(median_out_flow_duration)))
def test_histogram(): triangle_stats = dict() path = '/pic/projects/mnms4graphs/iscx/tsv/testbed-11jun-aggr.tsv' #path = 'test.tsv' gmetrics_lib.get_triangles(path, triangle_stats) array = [] fout = open('out-deg.dat', 'w') for k, v in triangle_stats.items(): # read out degree array.append(int(v[-1])) fout.write(str(v[-1]) + '\n') fout.close() #array = [1, 2, 3, 4, 5, 6] #array = [[10,-1], [0, 1], [-1, -1]] #print(sorted(array)) equi_height_histogram(array, 2) equi_height_histogram(array, 3) equi_height_histogram(array, 4) return