def node2vec_embedding(graph):

    p = 1.0
    q = 1.0
    dimensions = 128
    num_walks = 10
    walk_length = 80
    window_size = 10
    num_iter = 1
    workers = multiprocessing.cpu_count()

    graph = StellarGraph(graph)
    rw = BiasedRandomWalk(graph)
    walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)

    print(f"Number of random walks: {len(walks)}")

    model = Word2Vec(walks,
                     size=dimensions,
                     window=window_size,
                     min_count=0,
                     sg=1,
                     workers=workers,
                     iter=num_iter)

    features = pd.DataFrame(data=model.wv.vectors, index=list(graph.nodes()))
    features.index = features.index.map(str)

    return features
Пример #2
0
    def prepare_data_for_stellargraph(self):
        def load_raw_input():
            adj = np.load(os.path.join(self.data_path, 'adj.pkl'),
                          allow_pickle=True)
            features = np.load(os.path.join(self.data_path, 'features.pkl'),
                               allow_pickle=True)
            labels = np.load(os.path.join(self.data_path, 'train.pkl'),
                             allow_pickle=True)
            return adj, features, labels

        print("Reading raw inputs...")
        adj, features, labels = load_raw_input()

        print("creating nodes...")
        adj_list = [[i, j, adj[i, j]] for i, j in zip(*adj.nonzero())]
        tmp_df = pd.DataFrame(adj_list)
        tmp_df.columns = ["source", "target", "weight"]

        print("creating edges...")
        feature_df = pd.DataFrame(features)
        feature_df.columns = [f"w{i}" for i in range(feature_df.shape[1])]

        print("creating labels...")
        label_series = pd.DataFrame({"label": labels})["label"]

        my_graph = StellarGraph({"paper": feature_df}, {"cites": tmp_df})

        print(my_graph.info())
        return my_graph, label_series
def load_from_file(filePrefix):
    nodes_filename = filePrefix + "_nodes.txt"
    edges_filename = filePrefix + "_edges.txt"

    node_features = None
    edge_features = None

    #https://stellargraph.readthedocs.io/en/stable/demos/basics/loading-numpy.html
    with open(nodes_filename) as f:
        num_nodes, num_node_features = map(int, f.readline().split('\t')[:-1])
        if num_node_features > 0:
            node_features = np.zeros((num_node_features, num_nodes))
            for i, line in enumerate(f.readlines()):
                features = np.array(list(map(float, line.split('\t')[1:-1])))
                for fIndex in range(num_node_features):
                    node_features[fIndex][i] = features[fIndex]
                #node_features[i] = features

    # read edge features
    with open(edges_filename) as f:
        num_edges, num_edge_features = map(int, f.readline().split('\t')[:-1])
        senders = np.zeros(num_edges, dtype=int)
        receivers = np.zeros(num_edges, dtype=int)
        if num_edge_features > 0:
            edge_features = np.zeros((num_edge_features, num_edges))

        for i, line in enumerate(f.readlines()):
            elements = line.split('\t')
            senders[i] = int(elements[0])
            receivers[i] = int(elements[1])
            if edge_features is not None:
                features = np.array(list(map(float, elements[2:-1])))
                for fIndex in range(num_edge_features):
                    edge_features[fIndex][i] = features[fIndex]
                #edge_features[i] = np.array(list(map(float, elements[2:-1])))

    square_numeric_edges = pd.DataFrame({
        "source": senders,
        "target": receivers
    })
    square_node_data = pd.DataFrame({"x": node_features[0].tolist()})
    #square_node_data = pd.DataFrame( { "x": node_features[0].tolist(), "y": node_features[1].tolist(), "z" : node_features[2].tolist()  } )

    #feature_array = np.array([[1.0, -0.2], [2.0, 0.3], [3.0, 0.0], [4.0, -0.5]], dtype=np.float32)
    #print("node_features")
    #print(node_features)
    #print("square_numeric_edges")
    #print(square_numeric_edges)
    square_numeric = StellarGraph(square_node_data, edges=square_numeric_edges)

    print("GRAPH INFO")
    print("....................................")
    print(square_numeric.info())
    print("....................................")

    return square_numeric
Пример #4
0
def example_graph_1(feature_size=None):
    G = nx.Graph()
    elist = [(1, 2), (2, 3), (1, 4), (3, 2), (5, 6), (1, 5)]
    G.add_nodes_from([1, 2, 3, 4, 5, 6], label="default")
    G.add_edges_from(elist, label="default")

    # Add example features
    if feature_size is not None:
        for v in G.nodes():
            G.nodes[v]["feature"] = np.ones(feature_size)
        return StellarGraph(G, node_features="feature")

    else:
        return StellarGraph(G)
Пример #5
0
def arange_graph(request):
    shape = (3, 7, 11) if request.param == "multivariate" else (3, 7)
    total_elems = np.product(shape)
    nodes = IndexedArray(np.arange(total_elems).reshape(shape) / total_elems,
                         index=["a", "b", "c"])
    edges = pd.DataFrame({"source": ["a", "b"], "target": ["b", "c"]})
    return StellarGraph(nodes, edges)
Пример #6
0
def graphCreationForSingleStudent(transitionRow,
                                  activityCodeList,
                                  mode='networkx'):
    #transitionRow as series
    transitionList = generateTransition(activityCodeList)
    checkActivityList = []
    G = nx.Graph()
    for i in transitionList:
        if i[1] in transitionRow.index:
            if transitionRow[i[1]] > 0:
                if i[0][0] not in checkActivityList:
                    G.add_node(i[0][0],
                               weight=transitionRow[i[2][0]],
                               name=i[2][0])
                    checkActivityList.append(i[0][0])
                if i[0][1] not in checkActivityList:
                    G.add_node(i[0][1],
                               weight=transitionRow[i[2][1]],
                               name=i[2][1])
                    checkActivityList.append(i[0][1])
                G.add_edge(i[0][0], i[0][1], weight=transitionRow[i[1]])
    if mode == 'networkx':
        return G
    else:
        return StellarGraph.from_networkx(G)
Пример #7
0
def test_APPNP_linkmodel_apply_dense():
    G, features = create_graph_features()
    adj = nx.to_numpy_array(G)[None, :, :]
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchLinkGenerator(G, sparse=False, method="none")
    appnpnModel = APPNP([3], generator, activations=["relu"], dropout=0.5)

    x_in, x_out = appnpnModel.build()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[[0, 1], [1, 2]]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2, 3)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow([("a", "b"), ("b", "c")]))
    assert preds_2.shape == (1, 2, 2, 3)

    assert preds_1 == pytest.approx(preds_2)
Пример #8
0
def test_PPNP_edge_cases():
    G, features = create_graph_features()
    adj = nx.to_scipy_sparse_matrix(G)
    features, adj = PPNP_Aadj_feats_op(features, adj)

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f for n, f in zip(nodes, features)}, orient="index"
    )
    G = StellarGraph(G, node_features=node_features)

    ppnp_sparse_failed = False
    try:
        generator = FullBatchNodeGenerator(G, sparse=True, method="ppnp")
    except ValueError as e:
        ppnp_sparse_failed = True
    assert ppnp_sparse_failed

    generator = FullBatchNodeGenerator(G, sparse=False, method="ppnp")

    try:
        ppnpModel = PPNP([2, 2], generator=generator, activations=["relu"], dropout=0.5)
    except ValueError as e:
        error = e
    assert str(error) == "The number of layers should equal the number of activations"

    try:
        ppnpModel = PPNP([2], generator=[0, 1], activations=["relu"], dropout=0.5)
    except TypeError as e:
        error = e
    assert str(error) == "Generator should be a instance of FullBatchNodeGenerator"
Пример #9
0
def example_graph_1_saliency_maps(feature_size=None):
    # saliency gcn, saliency gat
    graph = nx.Graph()
    elist = [(0, 1), (0, 2), (2, 3), (3, 4), (0, 0), (1, 1), (2, 2), (3, 3),
             (4, 4)]
    graph.add_nodes_from([0, 1, 2, 3, 4], label="default")
    graph.add_edges_from(elist, label="default")

    # Add example features
    if feature_size is not None:
        for v in graph.nodes():
            graph.nodes[v]["feature"] = np.ones(feature_size)
        return StellarGraph(graph, node_features="feature")

    else:
        return StellarGraph(graph)
Пример #10
0
def test_PPNP_apply_dense():
    G, features = create_graph_features()
    adj = nx.to_scipy_sparse_matrix(G)
    features, adj = PPNP_Aadj_feats_op(features, adj)
    adj = adj[None, :, :]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f for n, f in zip(nodes, features)}, orient="index"
    )
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="ppnp")
    ppnpModel = PPNP([2], generator=generator, activations=["relu"], dropout=0.5)

    x_in, x_out = ppnpModel.build()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Пример #11
0
def weighted_hin():
    a_ids = [0, 1, 2, 3]
    a = pd.DataFrame(index=a_ids)

    b_ids = [4, 5, 6]
    b = pd.DataFrame(index=b_ids)

    # no weights A-R->A
    r_ids = [7, 8]
    r = pd.DataFrame([(0, 1), (0, 2)], columns=["source", "target"], index=r_ids)

    # single weighted edge A-S->A
    s_ids = [9, 10]
    s = pd.DataFrame([(0, 3, 2)], columns=["source", "target", "weight"], index=s_ids)

    # 3 edges with same weight A-T->B
    t_ids = [11, 12, 13]
    t = pd.DataFrame(
        [(0, 4, 2), (0, 5, 2), (0, 6, 2)],
        columns=["source", "target", "weight"],
        index=t_ids,
    )

    # weights [2, 3] A-U->A; weights [4, 5, 6] A-U->B
    u_ids = [14, 15, 16, 17, 18]
    u = pd.DataFrame(
        [(1, 2, 2), (1, 3, 3), (1, 4, 4), (1, 4, 5), (6, 1, 5)],
        columns=["source", "target", "weight"],
        index=u_ids,
    )

    return StellarGraph(nodes={"A": a, "B": b}, edges={"R": r, "S": s, "T": t, "U": u})
Пример #12
0
def test_APPNP_apply_propagate_model_dense():
    G, features = create_graph_features()
    adj = nx.to_scipy_sparse_matrix(G)
    features, adj = GCN_Aadj_feats_op(features, adj)
    adj = np.array(adj.todense()[None, :, :])
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="gcn")
    appnpnModel = APPNP([2],
                        generator=generator,
                        activations=["relu"],
                        dropout=0.5)

    fully_connected_model = keras.Sequential()
    fully_connected_model.add(Dense(2))

    x_in, x_out = appnpnModel.propagate_model(fully_connected_model)
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Пример #13
0
def create_graph_features():
    # APPNP, ClusterGCN, GCN, PPNP, node_mappers, full_batch_generators
    features = np.array([[1, 1], [1, 0], [0, 1]])
    nodes = pd.DataFrame(features, index=["a", "b", "c"])
    edges = pd.DataFrame([("a", "b"), ("b", "c"), ("a", "c")],
                         columns=["source", "target"])
    return StellarGraph(nodes, edges), features
    def learn_embeddings(self,
                         embedding_dim=100,
                         window_size=5,
                         max_rw_len=50,
                         walks_per_node=20,
                         p=0.5,
                         q=2.0):
        print('Running node2vec...')
        rw = BiasedRandomWalk(StellarGraph(self.graph))
        walks = rw.run(nodes=list(self.graph),
                       length=max_rw_len,
                       n=walks_per_node,
                       p=p,
                       q=q)
        print(f'Number of random walks: {len(walks)}')

        print('Running word2vec...')
        model = Word2Vec(walks,
                         size=embedding_dim,
                         window=window_size,
                         min_count=0,
                         sg=1,
                         workers=2,
                         iter=1)
        model.init_sims(replace=True)

        return model.wv
Пример #15
0
 def intra_and_inter(pep, hla, after_pca):
     source, target = Graph_Constructor.combinator(pep, hla)
     combine = list(itertools.product(source, target))
     weight = itertools.repeat(2, len(source) * len(target))
     edges_inter = pd.DataFrame({
         'source': [item[0] for item in combine],
         'target': [item[1] for item in combine],
         'weight': weight
     })
     intra_pep = list(itertools.combinations(source, 2))
     intra_hla = list(itertools.combinations(target, 2))
     intra = intra_pep + intra_hla
     weight = itertools.repeat(1, len(intra))
     edges_intra = pd.DataFrame({
         'source': [item[0] for item in intra],
         'target': [item[1] for item in intra],
         'weight': weight
     })
     edges = pd.concat([edges_inter, edges_intra])
     edges = edges.set_index(pd.Index(np.arange(edges.shape[0])))
     feature_array = Graph_Constructor.numerical(pep, hla, after_pca)
     nodes = IndexedArray(feature_array, index=source + target)
     graph = StellarGraph(nodes,
                          edges,
                          node_type_default='corner',
                          edge_type_default='line')
     return graph
Пример #16
0
    def walks(self, walklen, n1):
        G = nx.Graph()
        G = nx.read_weighted_edgelist(self.dataset + "/krnmdata1/CQAG1.txt")

        rw = BiasedRandomWalk(StellarGraph(G))

        weighted_walks = rw.run(
            nodes=G.nodes(),  # root nodes
            length=walklen,  # maximum length of a random walk
            n=n1,  # number of random walks per root node 
            p=0.5,  # Defines (unormalised) probability, 1/p, of returning to source node
            q=2.0,  # Defines (unormalised) probability, 1/q, for moving away from source node
            weighted=True,  #for weighted random walks
            seed=42  # random seed fixed for reproducibility
        )
        print("Number of random walks: {}".format(len(weighted_walks)))
        #print(weighted_walks[0:10])

        #remove answer nodes
        walks = []
        for i in range(len(weighted_walks)):
            walk = weighted_walks[i]
            w = []
            for node in walk:
                if int(node) < self.qnum:
                    w.append(node)
                elif int(node) > (self.qnum + self.anum):
                    n = int(node) - self.anum
                    w.append(str(n))
            walks.append(w)
        print(walks[0:10])
        return walks
Пример #17
0
def graph_embed():
    combine = get_combine()
    li = ['bank', 'acquirer', 'coin', 'mcc', 'shop', 'nation', 'city']
    d = {
        'bank': 'b',
        'mcc': 'm',
        'acquirer': 'a',
        'coin': 'c',
        'shop': 's',
        'nation': 'n',
        'city': 'z'
    }
    have_df = False
    df_all = None

    for col_a in li:
        combine[col_a] = combine[col_a].astype(str) + [d[col_a]]

    for index, col_a in enumerate(li[1:]):
        print(f'{col_a} started..')
        walk_all = []
        for day in np.linspace(1, 120, 120):
            print(day, end=',', flush=True)
            df = combine[combine['date'] == day]
            G = construct_graph('bank', col_a, df)
            rw = BiasedRandomWalk(StellarGraph(G))
            walk = rw.run(
                nodes=list(G.nodes()),  # root nodes
                length=80,  # maximum length of a random walk
                n=1,  # number of random walks per root node 
                p=1,  # Defines (unormalised) probability, 1/p, of returning to source node
                q=1,  # Defines (unormalised) probability, 1/q, for moving away from source node
            )
            walk_all.extend(walk)
            del df, G, rw, walk
            gc.collect()

        model = Word2Vec(walk_all,
                         size=5,
                         window=3,
                         min_count=1,
                         sg=0,
                         workers=16,
                         iter=10)
        temp_d = {}
        for w in list(model.wv.vocab):
            temp_d[w] = model[w]
        temp_df = pd.DataFrame(
            data=combine[col_a].map(temp_d).tolist(),
            columns=['embed_bank_' + col_a + str(x + 1) for x in range(5)])
        if (have_df):
            df_all = pd.concat([df_all, temp_df], axis=1)
        else:
            df_all = temp_df
            have_df = True
        del temp_d, model
        gc.collect()
    return df_all
Пример #18
0
def example_graph_random(feature_size=4, n_edges=20, n_nodes=6, n_isolates=1):
    # core/utils, link mapper, node mapper graph 3
    graph = nx.Graph()
    n_noniso = n_nodes - n_isolates
    edges = [(random.randint(0, n_noniso - 1), random.randint(0, n_noniso - 1))
             for _ in range(n_edges)]
    graph.add_nodes_from(range(n_nodes))
    graph.add_edges_from(edges, label="default")

    # Add example features
    if feature_size is not None:
        for v in graph.nodes():
            graph.nodes[v]["feature"] = int(v) * np.ones(feature_size,
                                                         dtype="int")
        return StellarGraph(graph, node_features="feature")

    else:
        return StellarGraph(graph)
Пример #19
0
def create_stellargraph():
    # cluster gcn, cluster gcn node mapper
    Gnx, features = create_graph_features()
    nodes = Gnx.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    graph = StellarGraph(Gnx, node_features=node_features)

    return graph
Пример #20
0
 def load4graph(self):
     """
     将所有边和顶点信息装入图,并生成游走路径rw
     :return:
     """
     g_nx = self.load_dataset_SMDB(self.location, self.graph_infos)
     print("Number of nodes {} and number  of edges {} in graph.".format(g_nx.number_of_nodes(),
                                                                         g_nx.number_of_edges()))
     from stellargraph.data import UniformRandomMetaPathWalk
     rw = UniformRandomMetaPathWalk(StellarGraph(g_nx))
     return g_nx, rw
Пример #21
0
    def __init__(self, edges_path, lables_path):
        """
        Hard-coded initialization
        """
        fstar = 1
        a = 0.125 # p, q left bound
        b = 4.125 # p, q right bound
        graph, labels = self.read_data(edges_path, lables_path)
        rw = BiasedRandomWalk(StellarGraph.from_networkx(graph))

        super().__init__(fstar, a, b, graph, labels, rw)
Пример #22
0
def eda(graph):
    '''
    eda for an apk

    graph --> filepath to a graph
    returns a dictionary in case
    '''

    app_dir, app_filename = os.path.split(graph)
    
    #building output
    target = "/teams/DSC180A_FA20_A00/a04malware/personal-group03/eda_sab/features1/"
    out_csv = os.path.join(target, (app_filename + ".csv"))
    target1 = "/teams/DSC180A_FA20_A00/a04malware/personal-group03/eda/features/"
    others = os.path.join(target1, (app_filename + ".csv"))

    if os.path.exists(out_csv):
        print("csv exists already")
        return "csv exists already"
    if os.path.exists(others):
        print("csv others exists already")
        return "csv others exists already"

    try:
        networkx = nx.read_gml(graph)
    except:
        return graph + " might be broken!"

    stellar = StellarGraph.from_networkx(networkx, node_type_attr = "type")

    nodes = stellar.node_types
    node_types = {}
    for node in nodes:
        node_types[node] = len(stellar.nodes_of_type(node_type=node))

    data = {}
    
    # get number of nodes and edges
    data["app"] = graph
    data["node_types_counts"] = len(stellar.node_types)
    data["node_types"] = node_types
    data["number_nodes"] = len(stellar.nodes())
    data["number_edges"] = len(stellar.edges())

    if "benign" in app_dir:
        label = 0
    else:
        label = 1

    data["label"] = label
    
    df = pd.DataFrame.from_dict([data])
    
    return df.to_csv(out_csv)
Пример #23
0
def example_graph_2(feature_size=None, label="default") -> StellarGraph:
    # unsupervised sampler, link mapper
    elist = pd.DataFrame([(1, 2), (2, 3), (1, 4), (4, 2)],
                         columns=["source", "target"])
    nodes = [1, 2, 3, 4]
    if feature_size is not None:
        features = _repeated_features(nodes, feature_size)
    else:
        features = []

    nodes = pd.DataFrame(features, index=nodes)
    return StellarGraph(nodes={label: nodes}, edges={label: elist})
Пример #24
0
def example_graph_1(feature_size=None):
    nlist = [1, 2, 3, 4, 5, 6]
    if feature_size is not None:
        features = np.ones((len(nlist), feature_size))
    else:
        features = []

    elist = [(1, 2), (2, 3), (1, 4), (3, 2), (5, 6), (1, 5)]

    return StellarGraph(
        pd.DataFrame(features, index=nlist),
        pd.DataFrame(elist, columns=["source", "target"]),
    )
Пример #25
0
def example_graph_1_saliency_maps(feature_size=None):
    # saliency gcn, saliency gat
    nlist = [0, 1, 2, 3, 4]
    if feature_size is None:
        nodes = pd.DataFrame(index=nlist)
    else:
        # Example features
        nodes = pd.DataFrame(np.ones((len(nlist), feature_size)), index=nlist)

    elist = [(0, 1), (0, 2), (2, 3), (3, 4), (0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]
    edges = pd.DataFrame(elist, columns=["source", "target"])

    return StellarGraph(nodes, edges)
Пример #26
0
def _metapath_randomwalk(graph):
    # Create the random walker
    rw = UniformRandomMetaPathWalk(StellarGraph(graph))

    # specify the metapath schemas as a list of lists of node types.

    walks = rw.run(nodes=list(graph.nodes()),  # root nodes
                   length=WALK_DISTANCE,  # maximum length of a random walk
                   n=1,        # number of random walks per root node
                   metapaths=METAPATHS  # the metapaths
                   )

    print("Number of random walks: {}".format(len(walks)))

    return walks
    def preprocessing(self, g, train_node, file_emb_output="./emb/100_900_nede2vec.emb"):

        node_subjects = train_node['values']

        node_subjects = node_subjects.astype(str)
        print(Counter(node_subjects))

        #file_emb_output = "./emb/100_900_nede2vec.emb"
        model = KeyedVectors.load_word2vec_format(file_emb_output)
        node_ids = model.wv.index2word
        node_embeddings = (
            model.wv.vectors
        )  # num
        print("Embedding load success.")

        reinex_node_embedding = pd.DataFrame(node_embeddings, index=map(int, node_ids))
        g_feature_attr = g.copy()

        G = StellarGraph.from_networkx(
            g_feature_attr, node_features=reinex_node_embedding, node_type_default="n", edge_type_default="e"
        )
        print(G.info())

        train_subjects, test_subjects = model_selection.train_test_split(
            node_subjects, train_size=160, test_size=None, stratify=node_subjects
        )
        val_subjects, test_subjects = model_selection.train_test_split(
            test_subjects, train_size=20, test_size=None, stratify=test_subjects
        )

        train_subjects.value_counts().to_frame()

        target_encoding = preprocessing.LabelBinarizer()
        # target_encoding = preprocessing.OneHotEncoder()

        train_targets = target_encoding.fit_transform(train_subjects)
        val_targets = target_encoding.transform(val_subjects)
        test_targets = target_encoding.transform(test_subjects)

        generator = FullBatchNodeGenerator(G, method="gcn")
        train_gen = generator.flow(train_subjects.index, train_targets)
        val_gen = generator.flow(val_subjects.index, val_targets)
        test_gen = generator.flow(test_subjects.index, test_targets)

        all_nodes = node_subjects.index
        all_gen = generator.flow(all_nodes)

        return G, train_gen, train_targets, val_gen, val_targets, test_targets, test_gen, all_gen, generator
Пример #28
0
def build_graph(outfolder, app_data_list, nodes_path, edge_path):
#     with Client() as client, performance_report(os.path.join(outfolder, "performance_report.html")):
#     print(f"Dask Cluster: {client.cluster}")
#     print(f"Dashboard port: {client.scheduler_info()['services']['dashboard']}")

    data = dd.read_csv(list(app_data_list), dtype=str).compute()

    nodes = {}
    api_map = None

    # setup edges.csv
    pd.DataFrame(columns=['source', 'target']).to_csv(edge_path, index=False)

    for label in ['api', 'app', 'method', 'package']:
        print(f'Indexing {label}s')
#         uid_map = data[label].unique()
        uid_map = pd.DataFrame()
        uid_map[label] = data[label].unique()

#             if base_data is not None: # load base items
#                 base_items = pd.read_csv(
#                     os.path.join(base_data, label+'_map.csv'),
#                     usecols=[label]
#                 )
#                 uid_map = pd.concat([base_items, uid_map], ignore_index=True).drop_duplicates().reset_index(drop=True)

        uid_map['uid'] = label + pd.Series(uid_map.index).astype(str)
        uid_map = uid_map.set_index(label)
        uid_map.to_csv(os.path.join(outfolder, label+'_map.csv'))
        nodes[label] = IndexedArray(index=uid_map.uid.values)

        # get edges if not api
        if label == 'api':
            api_map = uid_map.uid  # create api map
        else:
            print(f'Finding {label}-api edges')
            edges = data[[label, 'api']].drop_duplicates()
            edges[label] = edges[label].map(uid_map.uid)
            edges['api'] = edges['api'].map(api_map)
            edges.to_csv(edge_path, mode='a', index=False, header=False)

    del data
    
    # save nodes to file
    with open(nodes_path, 'wb') as file:
        pickle.dump(nodes, file)

    return StellarGraph(nodes = nodes, edges = pd.read_csv(edge_path))
Пример #29
0
 def walks(self,walklen):
     G=nx.Graph();
     G=nx.read_weighted_edgelist(self.dataset+"/krnmdata1/teamsG.txt")        
     rw = BiasedRandomWalk(StellarGraph(G))
     weighted_walks = rw.run(
     nodes=G.nodes(), # root nodes
     length=walklen,    # maximum length of a random walk
     n=5,          # number of random walks per root node 
     p=0.1,         # Defines (unormalised) probability, 1/p, of returning to source node
     q=2.0,         # Defines (unormalised) probability, 1/q, for moving away from source node
     weighted=True, #for weighted random walks
     seed=42        # random seed fixed for reproducibility
     )
     print("Number of random walks: {}".format(len(weighted_walks)))
     print(weighted_walks[0:10])               
     return weighted_walks      
Пример #30
0
def createGraphFromCounter(dfg, mode='networkx'):
    transitionList = list(dfg)
    #transitionRow as series
    checkActivityList = []
    G = nx.DiGraph()
    for i in transitionList:
        # if i[0] != i[1]:
        if i[0] not in checkActivityList:
            G.add_node(i[0], name=i[0])
            checkActivityList.append(i[0])
        if i[1] not in checkActivityList:
            G.add_node(i[1], name=i[1])
            checkActivityList.append(i[1])
        G.add_edge(i[0], i[1], weight=1)
    if mode == 'networkx':
        return G
    else:
        return StellarGraph.from_networkx(G)