def main(args): import logging log.setLevel(logging.DEBUG) log.info("start") num_devices = len(F.cuda_places()) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, False, False, 1.) pyreader = model.pyreader loss = model.forward() train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() if args.warm_start_from_dir is not None: F.io.load_params(exe, args.warm_start_from_dir, train_prog) train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def train(args): import logging log.setLevel(logging.DEBUG) log.info("start") worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0")) num_devices = int(os.getenv("CPU_NUM", 10)) data = load_raw_edges_fn(args.edge_path, args.undirected) edges = data[0] weights = data[1] node2idx = data[2] num_nodes = len(node2idx) model = DeepwalkModel(num_nodes, args.hidden_size, args.neg_num, args.is_sparse, args.is_distributed, 1.) pyreader = model.pyreader loss = model.forward() # init fleet log.info("init_role") init_role() train_steps = math.ceil(1. * num_nodes * args.epoch / args.batch_size / num_devices / worker_num) log.info("Train step: %s" % train_steps) if args.optimizer == "sgd": args.lr *= args.batch_size * args.walk_len * args.win_size optimization(args.lr, loss, train_steps, args.optimizer) # init and run server or worker if fleet.is_server(): log.info("PS server mode") fleet.init_server() fleet.run_server() if fleet.is_worker(): log.info("start init worker done") exe = F.Executor(F.CPUPlace()) exe.run(F.default_startup_program()) log.info("Startup done") fleet.init_worker() #just the worker, load the sample log.info("init worker done") print("LEO num_nodes:",num_nodes, len(edges)) edges_feat={} edges_feat["weight"] = np.array(weights) graph = pgl.graph.Graph(num_nodes, edges, edge_feat=edges_feat) # bind gen gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) train_prog(exe, F.default_main_program(), loss, pyreader, args, train_steps) print("fleet try to stop worker\r\n") fleet.stop_worker() print("Game over\r\n")
def main(args): """ main """ import logging log.setLevel(logging.DEBUG) log.info("start") if args.dataset is not None: if args.dataset == "BlogCatalog": graph = data_loader.BlogCatalogDataset().graph else: raise ValueError(args.dataset + " dataset doesn't exists") log.info("Load buildin BlogCatalog dataset done.") node_feat = np.expand_dims(graph.node_feat["group_id"].argmax(-1), -1) + graph.num_nodes args.num_nodes = graph.num_nodes args.num_embedding = graph.num_nodes + graph.node_feat[ "group_id"].shape[-1] else: graph = build_graph(args.num_nodes, args.edge_path, args.output_path) node_feat = np.load(args.node_feat_npy) model = GESModel(args.num_embedding, node_feat.shape[1] + 1, args.hidden_size, args.neg_num, False, 2) pyreader = model.pyreader loss = model.forward() num_devices = len(F.cuda_places()) train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) log.info("Train steps: %s" % train_steps) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) gen_func = build_gen_func(args, graph, node_feat) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def train(args): import logging log.setLevel(logging.DEBUG) log.info("start") worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0")) num_devices = int(os.getenv("CPU_NUM", 10)) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, args.is_sparse, args.is_distributed, 1.) pyreader = model.pyreader loss = model.forward() # init fleet init_role() train_steps = math.ceil(1. * args.num_nodes * args.epoch / args.batch_size / num_devices / worker_num) log.info("Train step: %s" % train_steps) if args.optimizer == "sgd": args.lr *= args.batch_size * args.walk_len * args.win_size optimization(args.lr, loss, train_steps, args.optimizer) # init and run server or worker if fleet.is_server(): fleet.init_server(args.warm_start_from_dir) fleet.run_server() if fleet.is_worker(): log.info("start init worker done") fleet.init_worker() #just the worker, load the sample log.info("init worker done") exe = F.Executor(F.CPUPlace()) exe.run(fleet.startup_program) log.info("Startup done") if args.dataset is not None: if args.dataset == "BlogCatalog": graph = data_loader.BlogCatalogDataset().graph elif args.dataset == "ArXiv": graph = data_loader.ArXivDataset().graph else: raise ValueError(args.dataset + " dataset doesn't exists") log.info("Load buildin BlogCatalog dataset done.") elif args.walkpath_files is None or args.walkpath_files == "None": graph = build_graph(args.num_nodes, args.edge_path) log.info("Load graph from '%s' done." % args.edge_path) else: graph = build_fake_graph(args.num_nodes) log.info("Load fake graph done.") # bind gen gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() compiled_prog = build_complied_prog(fleet.main_program, loss) train_prog(exe, compiled_prog, loss, pyreader, args, train_steps)
import numpy as np import tqdm import pgl from pgl.graph_kernel import alias_sample_build_table from pgl.utils.logger import log import paddle.fluid.dygraph as D import paddle.fluid as F from easydict import EasyDict as edict import yaml from ernie.tokenizing_ernie import ErnieTokenizer from ernie.tokenizing_ernie import ErnieTinyTokenizer from ernie.modeling_ernie import ErnieModel log.setLevel(logging.DEBUG) def term2id(string, tokenizer, max_seqlen): tokens = tokenizer.tokenize(string) ids = tokenizer.convert_tokens_to_ids(tokens) ids = ids[:max_seqlen - 1] ids = ids + [tokenizer.sep_id] # ids + [sep] ids = ids + [tokenizer.pad_id] * (max_seqlen - len(ids)) return ids def load_graph(config, str2id, term_file, terms, item_distribution): edges = [] with io.open(config.graph_data, encoding=config.encoding) as f: for idx, line in enumerate(f):