Exemplo n.º 1
0
def init(params, metric, output_path, draw):
    # load graph structure
    def load_data(params):
        params["network_file"] = os.path.join(DATA_PATH,
                                              params["network_file"])
        G = getattr(dh, params["func"])(params)
        return G

    time_path = output_path + "_time"

    G = load_data(params["load_data"])

    module_embedding = __import__("init_embedding." +
                                  params["init_train"]["func"],
                                  fromlist=["init_embedding"]).NodeEmbedding
    ne = module_embedding(params["init_train"], G)
    print("after module_embedding")
    st = datetime.datetime.now()
    embeddings, weights = ne.train()
    ed = datetime.datetime.now()
    dh.append_to_file(time_path, str(ed - st) + "\n")
    with open(output_path + "_init", "w") as f:
        f.write(
            json.dumps({
                "embeddings": embeddings.tolist(),
                "weights": weights.tolist()
            }))
    metric(embeddings)
    draw(embeddings)
    return G, embeddings, weights
Exemplo n.º 2
0
def train_model(params):
    g_mat, tree = extract_tree(params)

    handlers = {}
    handlers["get_network"] = gn(g_mat, params["get_network_hierarchy"])
    handlers["embedding_model"] = __import__(
        'node_embedding.' + params["embedding_model"]["func"],
        fromlist=["node_embedding"]).NodeEmbedding
    handlers["transfer_embeddings"] = __import__(
        'transfer_embeddings.' + params["transfer_embeddings"]["func"],
        fromlist=["transfer_embeddings"]).TransferEmbedding

    res_coordinates = [None] * len(tree)
    res_coordinates[len(tree) - 1] = np.zeros(
        params["embedding_model"]["embedding_size"], dtype=np.float32)
    res_radius = [None] * len(tree)
    res_radius[len(tree) - 1] = float(params["init_radius"])
    dfs(len(tree) - 1, tree, handlers, params, res_radius, res_coordinates)

    res_path = params["train_output"]
    dh.symlink(res_path, os.path.join(RES_PATH, "new_train_res"))
    dh.append_to_file(
        res_path,
        json.dumps({
            "radius": np.array(res_radius).tolist(),
            "coordinates": np.array(res_coordinates).tolist()
        }))

    return res_coordinates, res_radius
Exemplo n.º 3
0
 def metric(embeddings):
     if "metrics" not in params:
         return
     for metric in params["metrics"]:
         res = getattr(Metric, metric["func"])(embeddings, metric)
         dh.append_to_file(metric_path, str(res) + "\n")
         print res
Exemplo n.º 4
0
def loop(params, G, embeddings, weights, metric, output_path, draw):
    params["get_next"]["input_file"] = os.path.join(
        DATA_PATH, params["get_next"]["input_file"])
    module_next = __import__("get_next." + params["get_next"]["func"],
                             fromlist=["get_next"]).GetNext
    gn = module_next(params["get_next"])

    params_new = params["new_embedding"]
    module_new_embedding = __import__("init_embedding." + params_new["func"],
                                      fromlist=["new_embedding"]).NodeEmbedding

    def new_embedding(G, init_embeddings, init_weights, n):
        ne = module_new_embedding(params_new, G)
        embeddings, weights = ne.train()
        return embeddings, weights

    time_path = output_path + "_time"
    dynamic_embeddings = []
    while True:
        num_new = gn.get_next(G)
        if num_new == 0:
            break
        st = datetime.datetime.now()
        embeddings, weights = new_embedding(G, embeddings, weights, num_new)
        ed = datetime.datetime.now()
        dh.append_to_file(time_path, str(ed - st) + "\n")
        res = metric(embeddings)
        draw(embeddings)
        dynamic_embeddings.append({
            "embeddings": embeddings.tolist(),
            "weights": weights.tolist()
        })

    with open(output_path + "_dynamic", "w") as f:
        f.write(json.dumps(dynamic_embeddings))
 def metric(embeddings):
     if "metrics" not in params:
         return
     for metric in params["metrics"]:
         print("[] Start node classification...")
         res = getattr(Metric, metric["func"])(embeddings, metric)
         dh.append_to_file(metric_path, str(res) + "\n")
         print("[+] Metric: " + str(res))
Exemplo n.º 6
0
def init(params, metric, output_path, draw):
    params['output_path'] = output_path

    time_path = output_path + "_time"

    start_time = datetime.datetime.now()
    ut.sage_main(params)
    train_time = datetime.datetime.now() - start_time
    print("the train_time is" + str(train_time))
    dh.append_to_file(time_path, str(train_time) + "\n")
    G = None
    embedding = None
    weight = None
    return G, embedding, weight
Exemplo n.º 7
0
Arquivo: main.py Projeto: lundu28/GNE
def metric(params):
    js = json.loads(open(params["metric_input"]).read())
    coordinates = np.array(js["coordinates"])
    radius = np.array(js["radius"])
    res_path = params["metric_output"]
    dh.symlink(res_path, os.path.join(RES_PATH, "new_metric_res"))
    ret = []
    for metric in params["metric_function"]:
        if metric["metric_func"] == "draw_circle_2D":
            pic_path = os.path.join(PIC_PATH, "draw_circle_" + str(int(time.time() * 1000.0)) + ".pdf")
            dh.symlink(pic_path, os.path.join(PIC_PATH, "new_draw_circle"))
            getattr(Metric, metric["metric_func"])(coordinates, radius, metric, params["num_nodes"], pic_path)
        else:
            origin_coordinates = coordinates[: params["num_nodes"]]
            res = getattr(Metric, metric["metric_func"])(origin_coordinates, metric)
            ret.append((metric["metric_func"], res))
    dh.append_to_file(res_path, json.dumps(ret))

    return ret
Exemplo n.º 8
0
def loop(params, G, embeddings, weights, metric, output_path, draw):
    params["get_next"]["input_file"] = os.path.join(
        DATA_PATH, params["get_next"]["input_file"])
    module_next = __import__("get_next." + params["get_next"]["func"],
                             fromlist=["get_next"]).GetNext
    gn = module_next(params["get_next"])

    params_dynamic = params["dynamic_embedding"]
    module_dynamic_embedding = __import__(
        "dynamic_embedding." + params_dynamic["func"],
        fromlist=["dynamic_embedding"]).NodeEmbedding

    time_path = output_path + "_time"
    init_n = G.number_of_nodes()
    cnt = init_n
    init_embeddings = np.array(embeddings)
    embedding_size = init_embeddings.shape[1]
    while True:
        num_new = gn.get_next(G, init_n)
        if num_new == 0:
            break
        st = datetime.datetime.now()
        for _ in xrange(num_new):
            if G.node[cnt]['out_degree'] == 0:
                embed = np.random.rand(1, embedding_size) * 2.0 - 1.0
            else:
                ne = module_dynamic_embedding(params_dynamic, init_embeddings,
                                              weights, G, cnt)
                embed = ne.train()
            np.append(embeddings, embed, axis=0)
            cnt += 1

        ed = datetime.datetime.now()
        dh.append_to_file(time_path, str(ed - st) + "\n")

        res = metric(embeddings)
        draw(embeddings)
Exemplo n.º 9
0
def dynamic_test(init_file, dynamic_file, feats, id_map, flag_file, params,
                 metric, draw):
    [G, walks, class_map, node_flag, flag_no] = init_G(init_file, flag_file,
                                                       id_map, feats)
    train([G, feats, id_map, walks, class_map], params, G.number_of_nodes(),
          metric, draw)
    change_G_status(G)
    print("first test finished!, enter to continue")
    node_edges_lst = []
    with open(dynamic_file, 'r') as infile:
        while True:
            line = infile.readline()
            if not line:
                break
            items = line.strip().split()
            if len(items) != 2:
                continue
            edges_added = []
            for i in range(int(items[1])):
                line = infile.readline()
                items = line.strip().split()
                edges_added.append([int(it) for it in items])
            node_edges_lst.append(edges_added)

    edges_added = []
    none_line = 0
    start_time = datetime.datetime.now()
    for i in range(len(node_edges_lst)):
        # add edges
        none_line += 1
        edges_added += node_edges_lst[i]

        if none_line == FLAGS.test_batch_size:
            update_G(G, feats, id_map, class_map, edges_added, node_flag,
                     flag_no)
            # construct walks
            nodes = [
                n for n in G.nodes()
                if not G.node[n]['val'] and not G.node[n]['test']
            ]
            G_part = G.subgraph(nodes)
            walks = run_random_walks(G_part, nodes)
            # train model
            train([G, feats, id_map, walks, class_map], params,
                  G.number_of_nodes(), metric, draw)
            change_G_status(G)
            print("update status: " + str(G.number_of_nodes()) +
                  "nodes, enter to continue")

            end_time = datetime.datetime.now()
            dh.append_to_file(params['output_path'] + "_time",
                              str(end_time - start_time) + "\n")
            start_time = end_time
            none_line = 0
            edges_added = []

    if len(edges_added):
        update_G(G, feats, id_map, class_map, edges_added, node_flag, flag_no)
        # construct walks
        nodes = [
            n for n in G.nodes()
            if not G.node[n]['val'] and not G.node[n]['test']
        ]
        G_part = G.subgraph(nodes)
        walks = run_random_walks(G_part, nodes)
        # train model
        train([G, feats, id_map, walks, class_map], params,
              G.number_of_nodes(), metric, draw)
        change_G_status(G)
        end_time = datetime.datetime.now()
        dh.append_to_file(params['output_path'] + "_time",
                          str(end_time - start_time) + "\n")
        print("update status: " + str(G.number_of_nodes()) +
              "nodes, enter to continue")
Exemplo n.º 10
0
def loop(params, G, embeddings, weights, metric, output_path, draw):
    params["get_next"]["input_file"] = os.path.join(DATA_PATH, params["get_next"]["input_file"])
    module_next = __import__(
            "get_next." + params["get_next"]["func"], fromlist = ["get_next"]).GetNext
    gn = module_next(params["get_next"])

    mapp = range(G.number_of_nodes())
    rmapp = range(G.number_of_nodes())

    params_dynamic = params["dynamic_embedding"]
    K = params_dynamic["num_sampled"]
    def cal_delta(num_new):
        num_pre = G.number_of_nodes() - num_new
        for u, v in G.edges():
            if u >= num_pre or v >= num_pre:
                continue
            um, vm = mapp[u], mapp[v]
            delta_real = np.matmul(embeddings[[um]], weights[[vm]].T)[0, 0]
            G[u][v]['delta'] = delta_real - np.log(
                    float(G[u][v]['weight'] * G.graph['degree']) / float(G.node[u]['in_degree'] * G.node[v]['out_degree'])) + np.log(K)


    def rank_nodes(num_new):
        num_pre = G.number_of_nodes() - num_new
        num_modify = params_dynamic['num_modify']
        if num_modify == 0:
            return
        delta_list = [0.0] * num_pre
        for u, v in G.edges():
            if u >= num_pre or v >= num_pre:
                continue
            delta_list[u] += float(G[u][v]['weight']) * abs(G[u][v]['delta'])
            delta_list[v] += float(G[u][v]['weight']) * abs(G[u][v]['delta'])

        for u in G:
            if u >= num_pre:
                continue
            delta_list[u] /= (G.node[u]['in_degree'] + G.node[u]['out_degree'])

        q = pq()
        for u in G:
            if u >= num_pre:
                continue
            if q.qsize() < num_modify:
                q.put_nowait((delta_list[u], u))
                continue
            items = q.get_nowait()
            if items[0] < delta_list[u]:
                q.put_nowait((delta_list[u], u))
            else:
                q.put_nowait(items)

        idx = num_pre - 1
        while not q.empty():
            u = q.get_nowait()[1]
            um = mapp[u]
            v = rmapp[idx]
            mapp[u] = idx
            rmapp[idx] = u
            mapp[v] = um
            rmapp[um] = v
            embeddings[[um, idx], :] = embeddings[[idx, um], :]
            weights[[um, idx], :] = weights[[idx, um], :]

    def reset(num_new):
        num_modify = params_dynamic['num_modify']
        num_pre = G.number_of_nodes() - num_new
        embeddings[[range(num_pre)], :] = embeddings[rmapp, :]
        weights[[range(num_pre)], :] = weights[rmapp, :]


    module_dynamic_embedding = __import__(
            "dynamic_embedding." + params_dynamic["func"],
            fromlist = ["dynamic_embedding"]).NodeEmbedding

    time_path = output_path + "_time"
    dynamic_embeddings = []
    while True:
        mapp = range(G.number_of_nodes())
        rmapp = range(G.number_of_nodes())

        num_new = gn.get_next(G)
        if num_new == 0:
            break
        cal_delta(num_new)
        rank_nodes(num_new)
        ne = module_dynamic_embedding(params_dynamic, embeddings, weights, G, mapp, rmapp, num_new)
        
        st = datetime.datetime.now()
        embeddings, weights = ne.train()
        ed = datetime.datetime.now()
        dh.append_to_file(time_path, str(ed - st) + "\n")

        reset(num_new)
        res = metric(embeddings)
        draw(embeddings)
        dynamic_embeddings.append({"embeddings": embeddings.tolist(), "weights": weights.tolist()})

    with open(output_path + "_dynamic", "w") as f:
        f.write(json.dumps(dynamic_embeddings))