def build_feature_table(flow_path,
                        ip_mapper,
                        feature_table,
                        get_flow_features=True):
    path = flow_path + '.tsv'
    print('Computing triangle based features ...')
    triangle_stats = dict()
    gmetrics_lib.get_triangles(path, triangle_stats)
    ip_triangle_stats = map_dict(triangle_stats, ip_mapper)
    add_feature_list(ip_triangle_stats, feature_table, True, True)
    # in_t out_t through_t cycles out_deg in_deg prank krank inf_s out_s inf_d outf_d
    # role1: Modest out triangles
    # role2: High out degree, modest in degree
    # role3: High in triangles
    # role4: High out degree, modest out triangles
    # role5: High in triangles, low out triangle
    # role6: High in degree
    # role7: High out flow size

    print('Computing pagerank ...')
    pagerank = dict()
    gmetrics_lib.get_centrality(path, pagerank)
    ip_pagerank = map_dict(pagerank, ip_mapper)
    add_feature(ip_pagerank, feature_table, True)
    print('# ips = ' + str(len(pagerank)))

    print('Computing kcore ranking ...')
    kcore_ranking = dict()
    kcore_ranking = gmetrics_lib.get_kcore_ranking(path)
    ip_kcore_ranking = map_dict(kcore_ranking, ip_mapper)
    add_feature(ip_kcore_ranking, feature_table, False, True)
    print('# ips [1] = ' + str(len(kcore_ranking)))
    print('# ips [2] = ' + str(len(ip_kcore_ranking)))

    if get_flow_features == False:
        return

    print('Computing flow size features ...')
    median_in_flow = dict()
    median_out_flow = dict()
    flow_lib.get_median_flow(flow_path, median_in_flow, median_out_flow, True)
    add_feature(median_in_flow, feature_table, False)
    add_feature(median_out_flow, feature_table, False)
    print('# ips = ' + str(len(median_in_flow)))
    print('# ips = ' + str(len(median_out_flow)))

    print('Computing flow duration features ...')
    median_in_flow_duration = dict()
    median_out_flow_duration = dict()
    flow_lib.get_median_flow(flow_path, \
            median_in_flow_duration, median_out_flow_duration, False)
    add_feature(median_in_flow_duration, feature_table, False)
    add_feature(median_out_flow_duration, feature_table, False)
    print('# ips = ' + str(len(median_in_flow_duration)))
    print('# ips = ' + str(len(median_out_flow_duration)))
def build_feature_table(flow_path, ip_mapper, feature_table, get_flow_features=True):
    path = flow_path + '.tsv'
    print('Computing triangle based features ...')
    triangle_stats = dict()
    gmetrics_lib.get_triangles(path, triangle_stats)
    ip_triangle_stats = map_dict(triangle_stats, ip_mapper)
    add_feature_list(ip_triangle_stats, feature_table, True, True)
    # in_t out_t through_t cycles out_deg in_deg prank krank inf_s out_s inf_d outf_d
    # role1: Modest out triangles
    # role2: High out degree, modest in degree
    # role3: High in triangles
    # role4: High out degree, modest out triangles
    # role5: High in triangles, low out triangle
    # role6: High in degree 
    # role7: High out flow size

    print('Computing pagerank ...')
    pagerank = dict()
    gmetrics_lib.get_centrality(path, pagerank)
    ip_pagerank = map_dict(pagerank, ip_mapper)
    add_feature(ip_pagerank, feature_table, True)
    print('# ips = ' + str(len(pagerank))) 

    print('Computing kcore ranking ...')
    kcore_ranking = dict()
    kcore_ranking = gmetrics_lib.get_kcore_ranking(path)
    ip_kcore_ranking = map_dict(kcore_ranking, ip_mapper)
    add_feature(ip_kcore_ranking, feature_table, False, True)
    print('# ips [1] = ' + str(len(kcore_ranking))) 
    print('# ips [2] = ' + str(len(ip_kcore_ranking))) 

    if get_flow_features == False:
      return

    print('Computing flow size features ...')
    median_in_flow = dict()
    median_out_flow = dict()
    flow_lib.get_median_flow(flow_path, median_in_flow, median_out_flow, True)
    add_feature(median_in_flow, feature_table, False)
    add_feature(median_out_flow, feature_table, False)
    print('# ips = ' + str(len(median_in_flow))) 
    print('# ips = ' + str(len(median_out_flow))) 

    print('Computing flow duration features ...')
    median_in_flow_duration = dict()
    median_out_flow_duration = dict()
    flow_lib.get_median_flow(flow_path, \
            median_in_flow_duration, median_out_flow_duration, False)
    add_feature(median_in_flow_duration, feature_table, False)
    add_feature(median_out_flow_duration, feature_table, False)
    print('# ips = ' + str(len(median_in_flow_duration))) 
    print('# ips = ' + str(len(median_out_flow_duration))) 
示例#3
0
def test_histogram():
    triangle_stats = dict()
    path = '/pic/projects/mnms4graphs/iscx/tsv/testbed-11jun-aggr.tsv'
    #path = 'test.tsv'
    gmetrics_lib.get_triangles(path, triangle_stats)
    array = []
    fout = open('out-deg.dat', 'w')
    for k, v in triangle_stats.items():
        # read out degree
        array.append(int(v[-1]))
        fout.write(str(v[-1]) + '\n')
    fout.close()
    #array = [1, 2, 3, 4, 5, 6]
    #array = [[10,-1], [0, 1], [-1, -1]]
    #print(sorted(array))
    equi_height_histogram(array, 2)
    equi_height_histogram(array, 3)
    equi_height_histogram(array, 4)
    return
示例#4
0
def test_histogram():
    triangle_stats = dict()
    path = '/pic/projects/mnms4graphs/iscx/tsv/testbed-11jun-aggr.tsv'
    #path = 'test.tsv'
    gmetrics_lib.get_triangles(path, triangle_stats)
    array = []
    fout = open('out-deg.dat', 'w')
    for k, v in triangle_stats.items():
        # read out degree
        array.append(int(v[-1]))
        fout.write(str(v[-1]) + '\n')
    fout.close()
    #array = [1, 2, 3, 4, 5, 6]
    #array = [[10,-1], [0, 1], [-1, -1]]
    #print(sorted(array))
    equi_height_histogram(array, 2)
    equi_height_histogram(array, 3)
    equi_height_histogram(array, 4)
    return