Exemplo n.º 1
0
def run(args):
    gl.set_tape_capacity(1)
    g = load_graph(args)
    if args.use_mp:
        gl.set_tracker_mode(0)
        thg.set_client_num(args.client_num)
        thg.launch_server(g)
    else:
        g.init(task_index=args.rank, task_count=args.world_size)

    # TODO(baole): This is an estimate and an accurate value will be needed from graphlearn.
    length_per_worker = args.train_length // args.train_batch_size // args.world_size
    print('length_per_worker being set to: ' + str(length_per_worker))

    # data loader
    train_query = query(g, args, mask=gl.Mask.TRAIN)
    if args.use_mp:
        train_dataset = thg.Dataset(train_query,
                                    window=5,
                                    induce_func=induce_func,
                                    graph=g)
    else:
        train_dataset = thg.Dataset(train_query,
                                    window=5,
                                    induce_func=induce_func)
    train_loader = thg.PyGDataLoader(train_dataset,
                                     multi_process=args.use_mp,
                                     length=length_per_worker)
    test_query = query(g, args, mask=gl.Mask.TEST)
    if args.use_mp:
        test_dataset = thg.Dataset(test_query,
                                   window=5,
                                   induce_func=induce_func,
                                   graph=g)
    else:
        test_dataset = thg.Dataset(test_query,
                                   window=5,
                                   induce_func=induce_func)
    test_loader = thg.PyGDataLoader(test_dataset, multi_process=args.use_mp)

    # define model
    model = GCN(input_dim=args.features_num,
                hidden_dim=args.hidden_dim,
                output_dim=args.class_num,
                depth=args.depth,
                drop_rate=args.drop_rate).to(device)
    if dist.is_initialized():
        model = torch.nn.parallel.DistributedDataParallel(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # train and test
    for epoch in range(0, args.epoch):
        train(model, train_loader, optimizer, args)
        test_acc = test(model, test_loader, args)
        log = 'Epoch: {:03d}, Test: {:.4f}'
        print(log.format(epoch, test_acc))

    if not args.use_mp:
        g.close()
Exemplo n.º 2
0
def main(argv):
    cur_path = sys.path[0]

    cluster = ""
    job_name = ""
    task_index = 0
    mode = 0

    opts, args = getopt.getopt(
        argv, 'c:j:t:', ['cluster=', 'job_name=', 'task_index=', 'mode='])
    for opt, arg in opts:
        if opt in ('-c', '--cluster'):
            cluster = arg
        elif opt in ('-j', '--job_name'):
            job_name = arg
        elif opt in ('-t', '--task_index'):
            task_index = int(arg)
        elif opt in ('-m', '--mode'):
            mode = int(arg)
        else:
            pass

    gl.set_tracker_mode(mode)

    g = gl.Graph()

    g.node(os.path.join(cur_path, "data/user"),
           node_type="user", decoder=gl.Decoder(weighted=True)) \
      .node(os.path.join(cur_path, "data/item"),
            node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
      .edge(os.path.join(cur_path, "data/u-i"),
            edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True))

    g.init(cluster=cluster, job_name=job_name, task_index=task_index)

    if job_name == "server":
        print("Server {} started.".format(task_index))
        g.wait_for_close()

    if job_name == "client":
        print("Client {} started.".format(task_index))
        q = g.V("user").batch(10).values()
        for i in range(3):
            while True:
                try:
                    print(g.run(q).ids)
                except gl.OutOfRangeError:
                    print("Out of range......")
                    break

        q = g.E("buy").batch(10).values()
        for i in range(3):
            while True:
                try:
                    print(g.run(q).dst_ids)
                except gl.OutOfRangeError:
                    print("Out of range......")
                    break
        g.close()
def main(argv):
  cur_path = sys.path[0]

  task_index = -1
  hosts = ""

  opts, args = getopt.getopt(argv,
                             'ti:h',
                             ['task_index=', 'hosts='])
  for opt, arg in opts:
    if opt in ('-ti', '--task_index'):
      task_index = int(arg)
    elif opt in ('-h', '--hosts'):
      hosts = arg
    else:
      pass

  # Set rpc as tracker
  gl.set_tracker_mode(0)

  g = gl.Graph()

  g.node(os.path.join(cur_path, "data/user"),
         node_type="user", decoder=gl.Decoder(weighted=True)) \
    .node(os.path.join(cur_path, "data/item"),
          node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
    .edge(os.path.join(cur_path, "data/u-i"),
          edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True), directed=False) \
    .node(os.path.join(cur_path, "data/entity"),
          node_type="entity", decoder=gl.Decoder(attr_types=['float', 'float', 'float', 'float'], labeled=True)) \
    .edge(os.path.join(cur_path, "data/relation"),
          edge_type=("entity", "entity", "relation"), decoder=gl.Decoder(weighted=True), directed=False) \
    .edge(os.path.join(cur_path, "data/relation"),
          edge_type=("cond_node", "cond_node", "cond_edge"), decoder=gl.Decoder(weighted=True), directed=True) \
    .node(os.path.join(cur_path, "data/cond_node"),
          node_type="cond_node", decoder=gl.Decoder(attr_types=['int','int','float','string'], weighted=True))

  g.init(task_index=task_index, hosts=hosts)

  test_node_iterate(g)
  test_edge_iterate(g)
  test_truncated_full_edge_sample(g)
  test_conditional_negtaive_sample(g)

  g.close()
  print("Worker {} stopped.".format(task_index))
Exemplo n.º 4
0
def main():
  gl.set_tracker_mode(0)
  gl_cluster, tf_cluster, job_name, task_index = gl.get_cluster()
  ps_hosts = tf_cluster.get("ps")
  gl_cluster["server"] = ",".join([host.split(":")[0] + ":8889" for host in ps_hosts])
  worker_count = len(tf_cluster["worker"])

  # global settings.
  tfg.conf.emb_max_partitions = len(ps_hosts) # embedding varible partition num.

  g = load_graph(task_index)

  tf_cluster = tf.train.ClusterSpec(tf_cluster)
  trainer = DistTrainer(tf_cluster, job_name, task_index, worker_count)

  if job_name == 'ps':
    g.init(cluster=gl_cluster, job_name='server', task_index=task_index)
    trainer.join()
    g.wait_for_close()

  else:
    g.init(cluster=gl_cluster, job_name='client', task_index=task_index)
    # training and save embedding.
    
    attr_dims = json.loads(FLAGS.attr_dims)
    input_dim = sum([1 if not i else i for i in attr_dims])
    depth = len(json.loads(FLAGS.nbrs_num))
    dims = [input_dim] + [FLAGS.hidden_dim] * (depth- 1) + [FLAGS.output_dim]
    with trainer.context(): # model must under trainer.context.
      model = EgoGraphSAGE(dims,
                          agg_type=FLAGS.agg_type,
                          dropout=FLAGS.drop_out)
      train_iter, loss = train(g, model)
      save_iter, ids, emb = save_node_embedding(g, model)
    # training
    trainer.train(train_iter, loss, FLAGS.learning_rate, epochs=FLAGS.epochs)
    # saving node embedding.
    print('Start saving node embedding...')
    trainer.save(FLAGS.outputs.split(',')[0] + str(task_index), save_iter, ids, emb, FLAGS.batch_size)
      
    g.close()
    print('Finished!')
Exemplo n.º 5
0
def main(argv):
    cur_path = sys.path[0]

    task_index = 0
    task_count = 1
    tracker = ""
    hosts = ""

    opts, args = getopt.getopt(
        argv, 'c:j:t:', ['task_index=', 'task_count=', 'tracker=', 'hosts='])
    for opt, arg in opts:
        if opt in ('-c', '--task_index'):
            task_index = int(arg)
        elif opt in ('-j', '--task_count'):
            task_count = int(arg)
        elif opt in ('-t', '--tracker'):
            tracker = arg
        elif opt in ('-h', '--hosts'):
            hosts = arg
        else:
            pass

    mode = 0 if hosts else 1
    gl.set_tracker_mode(mode)

    g = gl.Graph()

    g.node(os.path.join(cur_path, "data/user"),
           node_type="user", decoder=gl.Decoder(weighted=True)) \
      .node(os.path.join(cur_path, "data/item"),
            node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
      .edge(os.path.join(cur_path, "data/u-i"),
            edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True))

    if mode == 0:
        g.init(task_index, hosts=hosts)
    else:
        g.init(task_index, task_count, tracker=tracker)

    print("Get Edges...")
    edges = g.E("buy").batch(2).shuffle().emit()
    print(edges.src_ids)
    print(edges.dst_ids)
    print(edges.weights)
    print("Get Edges Done...")

    print("Get Nodes...")
    print("Get user Nodes...")
    nodes = g.V("user", np.array([0, 1, 2, 3, 4])).emit()
    print(nodes.ids)
    print(nodes.weights)
    print("Get item Nodes...")
    nodes = g.V("item").batch(4).shuffle().emit()
    print(nodes.ids)
    print(nodes.int_attrs)
    print(nodes.float_attrs)
    print(nodes.string_attrs)
    print("Query item Nodes...")
    nodes = g.V("item", np.array([101, 102, 103])).emit()
    print(nodes.ids)
    print(nodes.int_attrs)
    print("Get Nodes Done...")

    print("Random sample...")
    s = g.neighbor_sampler("buy", expand_factor=2, strategy="random")
    nodes = s.get(np.array([0, 1, 2])).layer_nodes(1)
    print(nodes.ids)
    print(nodes.float_attrs)
    print(nodes.embedding_agg(func="mean"))
    print("Random Sample Done...")

    print("Full sample...")
    s = g.neighbor_sampler("buy", expand_factor=2, strategy="full")
    nodes = s.get(np.array([0, 1, 2])).layer_nodes(1)
    print(nodes.ids)
    print(nodes.offsets)
    print(nodes.embedding_agg())
    print("Full Sample Done...")

    print("InDegree neg sample...")
    s = g.negative_sampler("buy", expand_factor=3, strategy="in_degree")
    print(s.get(np.array([0, 1, 2])).ids)
    print("InDegree Negative Sample Done...")

    print("NodeWeight neg sample...")
    s = g.negative_sampler("user", expand_factor=3, strategy="node_weight")
    print(s.get(np.array([0, 1, 2])).ids)
    print("NodeWeight Negative Sample Done...")
    g.close()
Exemplo n.º 6
0
def main(argv):
  cur_path = sys.path[0]

  servers = ""
  client_count = -1
  job_name = ""
  task_index = -1

  opts, args = getopt.getopt(argv,
                             's:c:j:ti:',
                             ['server=', 'client_count=',
                              'job_name=', 'task_index='])
  for opt, arg in opts:
    if opt in ('-s', '--server'):
      server = arg
    elif opt in ('-c', '--client_count'):
      client_count = int(arg)
    elif opt in ('-j', '--job_name'):
      job_name = arg
    elif opt in ('-ti', '--task_index'):
      task_index = int(arg)
    else:
      pass

  # Set rpc as tracker
  gl.set_tracker_mode(0)

  g = gl.Graph()

  g.node(os.path.join(cur_path, "data/user"),
         node_type="user", decoder=gl.Decoder(weighted=True)) \
    .node(os.path.join(cur_path, "data/item"),
          node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
    .edge(os.path.join(cur_path, "data/u-i"),
          edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True), directed=False) \
    .node(os.path.join(cur_path, "data/entity"),
          node_type="entity", decoder=gl.Decoder(attr_types=['float', 'float', 'float', 'float'], labeled=True)) \
    .edge(os.path.join(cur_path, "data/relation"),
          edge_type=("entity", "entity", "relation"), decoder=gl.Decoder(weighted=True), directed=False) \
    .edge(os.path.join(cur_path, "data/relation"),
          edge_type=("cond_node", "cond_node", "cond_edge"), decoder=gl.Decoder(weighted=True), directed=True) \
    .node(os.path.join(cur_path, "data/cond_node"),
          node_type="cond_node", decoder=gl.Decoder(attr_types=['int','int','float','string'], weighted=True))

  cluster={"server": server, "client_count": client_count}
  g.init(cluster=cluster, job_name=job_name, task_index=task_index)

  if job_name == "server":
    print("Server {} started.".format(task_index))
    g.wait_for_close()

  if job_name == "client":
    print("Client {} started.".format(task_index))

    test_node_iterate(g)
    test_edge_iterate(g)
    test_truncated_full_edge_sample(g)
    test_conditional_negtaive_sample(g)

    g.close()
    print("Client {} stopped.".format(task_index))
Exemplo n.º 7
0
def main(argv):
    cur_path = sys.path[0]

    cluster = ""
    job_name = ""
    task_index = 0
    mode = 0

    opts, args = getopt.getopt(
        argv, 'c:j:t:', ['cluster=', 'job_name=', 'task_index=', 'mode='])
    for opt, arg in opts:
        if opt in ('-c', '--cluster'):
            cluster = arg
        elif opt in ('-j', '--job_name'):
            job_name = arg
        elif opt in ('-t', '--task_index'):
            task_index = int(arg)
        elif opt in ('-m', '--mode'):
            mode = int(arg)
        else:
            pass

    gl.set_tracker_mode(mode)

    g = gl.Graph()

    g.node(os.path.join(cur_path, "data/user"),
           node_type="user", decoder=gl.Decoder(weighted=True)) \
      .node(os.path.join(cur_path, "data/item"),
            node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
      .edge(os.path.join(cur_path, "data/u-i"),
            edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True))

    g.init(cluster=cluster, job_name=job_name, task_index=task_index)

    if job_name == "server":
        print("Server {} started.".format(task_index))
        g.wait_for_close()

    if job_name == "client":
        print("Client {} started.".format(task_index))

        print("Get Edges...")
        edges = g.E("buy").batch(2).shuffle().emit()
        print(edges.src_ids)
        print(edges.dst_ids)
        print(edges.weights)
        print("Get Edges Done...")

        print("Get Nodes...")
        print("Get user Nodes...")
        nodes = g.V("user", np.array([0, 1, 2, 3, 4])).emit()
        print(nodes.ids)
        print(nodes.weights)
        print("Get item Nodes...")
        nodes = g.V("item").batch(4).shuffle().emit()
        print(nodes.ids)
        print(nodes.int_attrs)
        print(nodes.float_attrs)
        print(nodes.string_attrs)
        print("Query item Nodes...")
        nodes = g.V("item", np.array([101, 102, 103])).emit()
        print(nodes.ids)
        print(nodes.int_attrs)
        print("Get Nodes Done...")

        print("Random sample...")
        s = g.neighbor_sampler("buy", expand_factor=2, strategy="random")
        nodes = s.get(np.array([0, 1, 2])).layer_nodes(1)
        print(nodes.ids)
        print(nodes.float_attrs)
        print(nodes.embedding_agg(func="mean"))
        print("Random Sample Done...")

        print("Full sample...")
        s = g.neighbor_sampler("buy", expand_factor=2, strategy="full")
        nodes = s.get(np.array([0, 1, 2])).layer_nodes(1)
        print(nodes.ids)
        print(nodes.offsets)
        print(nodes.embedding_agg())
        print("Full Sample Done...")

        print("InDegree neg sample...")
        s = g.negative_sampler("buy", expand_factor=3, strategy="in_degree")
        print(s.get(np.array([0, 1, 2])).ids)
        print("InDegree Negative Sample Done...")

        print("NodeWeight neg sample...")
        s = g.negative_sampler("user", expand_factor=3, strategy="node_weight")
        print(s.get(np.array([0, 1, 2])).ids)
        print("NodeWeight Negative Sample Done...")
        g.close()
        print("Client {} stopped.".format(task_index))