def generate_samplers(self): # Generate train samplers train_samplers = [] for i in range(self.num_proc): print("Building training sampler for proc %d" % i) t1 = time.time() # for each GPU, allocate num_proc // num_GPU processes train_sampler_head = dgl.contrib.sampling.EdgeSampler( self.train_data, seed_edges=F.tensor(self.train_data.edge_parts[i]), batch_size=self.batch_size, neg_sample_size=self.neg_sample_size, chunk_size=self.neg_sample_size, negative_mode='head', num_workers=self.num_workers, shuffle=True, exclude_positive=False, return_false_neg=False, ) train_sampler_tail = dgl.contrib.sampling.EdgeSampler( self.train_data, seed_edges=F.tensor(self.train_data.edge_parts[i]), batch_size=self.batch_size, neg_sample_size=self.neg_sample_size, chunk_size=self.neg_sample_size, negative_mode='tail', num_workers=self.num_workers, shuffle=True, exclude_positive=False, return_false_neg=False, ) print(train_sampler_head) print(train_sampler_tail) train_samplers.append( NewBidirectionalOneShotIterator( dataloader_head=train_sampler_head, dataloader_tail=train_sampler_tail, neg_chunk_size=self.neg_sample_size, neg_sample_size=self.neg_sample_size, is_chunked=True, num_nodes=self.n_entities, has_edge_importance=self.has_edge_importance, )) print("Training sampler for proc {} created, it takes {} seconds". format(i, time.time() - t1))
def run(args, logger): init_time_start = time.time() # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files) if args.neg_sample_size_eval < 0: args.neg_sample_size_eval = dataset.n_entities args.batch_size = get_compatible_batch_size(args.batch_size, args.neg_sample_size) args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval) args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." train_data = TrainDataset(dataset, args, ranks=args.num_proc) # if there is no cross partition relaiton, we fall back to strict_rel_part args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False) args.soft_rel_part = args.mix_cpu_gpu and args.soft_rel_part and train_data.cross_part args.num_workers = 8 # fix num_worker to 8 if args.num_proc > 1: train_samplers = [] for i in range(args.num_proc): train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='head', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='tail', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_samplers.append( NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities)) train_sampler = NewBidirectionalOneShotIterator( train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities) else: # This is used for debug train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='head', num_workers=args.num_workers, shuffle=True, exclude_positive=False) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='tail', num_workers=args.num_workers, shuffle=True, exclude_positive=False) train_sampler = NewBidirectionalOneShotIterator( train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities) if args.valid or args.test: if len(args.gpu) > 1: args.num_test_proc = args.num_proc if args.num_proc < len( args.gpu) else len(args.gpu) else: args.num_test_proc = args.num_proc eval_dataset = EvalDataset(dataset, args) if args.valid: if args.num_proc > 1: valid_sampler_heads = [] valid_sampler_tails = [] for i in range(args.num_proc): valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_proc) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_proc) valid_sampler_heads.append(valid_sampler_head) valid_sampler_tails.append(valid_sampler_tail) else: # This is used for debug valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=0, ranks=1) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=0, ranks=1) if args.test: if args.num_test_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_test_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=0, ranks=1) # load model model = load_model(logger, args, dataset.n_entities, dataset.n_relations) if args.num_proc > 1 or args.async_update: model.share_memory() # We need to free all memory referenced by dataset. eval_dataset = None dataset = None print('Total initialize time {:.3f} seconds'.format(time.time() - init_time_start)) # train start = time.time() rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None cross_rels = train_data.cross_rels if args.soft_rel_part else None if args.num_proc > 1: procs = [] barrier = mp.Barrier(args.num_proc) for i in range(args.num_proc): valid_sampler = [valid_sampler_heads[i], valid_sampler_tails[i] ] if args.valid else None proc = mp.Process(target=train_mp, args=(args, model, train_samplers[i], valid_sampler, i, rel_parts, cross_rels, barrier)) procs.append(proc) proc.start() for proc in procs: proc.join() else: valid_samplers = [valid_sampler_head, valid_sampler_tail ] if args.valid else None train(args, model, train_sampler, valid_samplers, rel_parts=rel_parts) print('training takes {} seconds'.format(time.time() - start)) if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model.save_emb(args.save_emb, args.dataset) # We need to save the model configurations as well. conf_file = os.path.join(args.save_emb, 'config.json') with open(conf_file, 'w') as outfile: json.dump( { 'dataset': args.dataset, 'model': args.model_name, 'emb_size': args.hidden_dim, 'max_train_step': args.max_step, 'batch_size': args.batch_size, 'neg_sample_size': args.neg_sample_size, 'lr': args.lr, 'gamma': args.gamma, 'double_ent': args.double_ent, 'double_rel': args.double_rel, 'neg_adversarial_sampling': args.neg_adversarial_sampling, 'adversarial_temperature': args.adversarial_temperature, 'regularization_coef': args.regularization_coef, 'regularization_norm': args.regularization_norm }, outfile, indent=4) # test if args.test: start = time.time() if args.num_test_proc > 1: queue = mp.Queue(args.num_test_proc) procs = [] for i in range(args.num_test_proc): proc = mp.Process(target=test_mp, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_test_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) for k, v in metrics.items(): print('Test average {} : {}'.format(k, v)) for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail]) print('testing takes {:.3f} seconds'.format(time.time() - start))
def run(args, logger): # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format) n_entities = dataset.n_entities n_relations = dataset.n_relations if args.neg_sample_size_test < 0: args.neg_sample_size_test = n_entities train_data = TrainDataset(dataset, args, ranks=args.num_proc) if args.num_proc > 1: train_samplers = [] for i in range(args.num_proc): train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, mode='PBG-head', num_workers=args.num_worker, shuffle=True, exclude_positive=True, rank=i) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, mode='PBG-tail', num_workers=args.num_worker, shuffle=True, exclude_positive=True, rank=i) train_samplers.append( NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, True, n_entities)) else: train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, mode='PBG-head', num_workers=args.num_worker, shuffle=True, exclude_positive=True) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, mode='PBG-tail', num_workers=args.num_worker, shuffle=True, exclude_positive=True) train_sampler = NewBidirectionalOneShotIterator( train_sampler_head, train_sampler_tail, True, n_entities) if args.valid or args.test: eval_dataset = EvalDataset(dataset, args) if args.valid: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. if args.num_proc > 1: valid_sampler_heads = [] valid_sampler_tails = [] for i in range(args.num_proc): valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, mode='PBG-head', num_workers=args.num_worker, rank=i, ranks=args.num_proc) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, mode='PBG-tail', num_workers=args.num_worker, rank=i, ranks=args.num_proc) valid_sampler_heads.append(valid_sampler_head) valid_sampler_tails.append(valid_sampler_tail) else: valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, mode='PBG-head', num_workers=args.num_worker, rank=0, ranks=1) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, mode='PBG-tail', num_workers=args.num_worker, rank=0, ranks=1) if args.test: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. if args.num_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, mode='PBG-head', num_workers=args.num_worker, rank=i, ranks=args.num_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, mode='PBG-tail', num_workers=args.num_worker, rank=i, ranks=args.num_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, mode='PBG-head', num_workers=args.num_worker, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, mode='PBG-tail', num_workers=args.num_worker, rank=0, ranks=1) # We need to free all memory referenced by dataset. eval_dataset = None dataset = None # load model model = load_model(logger, args, n_entities, n_relations) if args.num_proc > 1: model.share_memory() # train start = time.time() if args.num_proc > 1: procs = [] for i in range(args.num_proc): valid_samplers = [valid_sampler_heads[i], valid_sampler_tails[i] ] if args.valid else None proc = mp.Process(target=train, args=(args, model, train_samplers[i], valid_samplers)) procs.append(proc) proc.start() for proc in procs: proc.join() else: valid_samplers = [valid_sampler_head, valid_sampler_tail ] if args.valid else None train(args, model, train_sampler, valid_samplers) print('training takes {} seconds'.format(time.time() - start)) if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model.save_emb(args.save_emb, args.dataset) # test if args.test: if args.num_proc > 1: procs = [] for i in range(args.num_proc): proc = mp.Process(target=test, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ])) procs.append(proc) proc.start() for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail])
def run(args, logger): # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format) n_entities = dataset.n_entities n_relations = dataset.n_relations if args.neg_sample_size_test < 0: args.neg_sample_size_test = n_entities args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." # When we generate a batch of negative edges from a set of positive edges, # we first divide the positive edges into chunks and corrupt the edges in a chunk # together. By default, the chunk size is equal to the negative sample size. # Usually, this works well. But we also allow users to specify the chunk size themselves. if args.neg_chunk_size < 0: args.neg_chunk_size = args.neg_sample_size if args.neg_chunk_size_valid < 0: args.neg_chunk_size_valid = args.neg_sample_size_valid if args.neg_chunk_size_test < 0: args.neg_chunk_size_test = args.neg_sample_size_test train_data = TrainDataset(dataset, args, ranks=args.num_proc) if args.num_proc > 1: train_samplers = [] for i in range(args.num_proc): train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='chunk-head', num_workers=args.num_worker, shuffle=True, exclude_positive=True, rank=i) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='chunk-tail', num_workers=args.num_worker, shuffle=True, exclude_positive=True, rank=i) train_samplers.append( NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_chunk_size, True, n_entities)) else: train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='chunk-head', num_workers=args.num_worker, shuffle=True, exclude_positive=True) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='chunk-tail', num_workers=args.num_worker, shuffle=True, exclude_positive=True) train_sampler = NewBidirectionalOneShotIterator( train_sampler_head, train_sampler_tail, args.neg_chunk_size, True, n_entities) # for multiprocessing evaluation, we don't need to sample multiple batches at a time # in each process. num_workers = args.num_worker if args.num_proc > 1: num_workers = 1 if args.valid or args.test: eval_dataset = EvalDataset(dataset, args) if args.valid: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. if args.num_proc > 1: valid_sampler_heads = [] valid_sampler_tails = [] for i in range(args.num_proc): valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=i, ranks=args.num_proc) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=i, ranks=args.num_proc) valid_sampler_heads.append(valid_sampler_head) valid_sampler_tails.append(valid_sampler_tail) else: valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=0, ranks=1) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=0, ranks=1) if args.test: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. if args.num_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=i, ranks=args.num_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=i, ranks=args.num_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=0, ranks=1) # We need to free all memory referenced by dataset. eval_dataset = None dataset = None # load model model = load_model(logger, args, n_entities, n_relations) if args.num_proc > 1: model.share_memory() # train start = time.time() if args.num_proc > 1: procs = [] for i in range(args.num_proc): rel_parts = train_data.rel_parts if args.rel_part else None valid_samplers = [valid_sampler_heads[i], valid_sampler_tails[i] ] if args.valid else None proc = mp.Process(target=train, args=(args, model, train_samplers[i], i, rel_parts, valid_samplers)) procs.append(proc) proc.start() for proc in procs: proc.join() else: valid_samplers = [valid_sampler_head, valid_sampler_tail ] if args.valid else None train(args, model, train_sampler, valid_samplers) print('training takes {} seconds'.format(time.time() - start)) if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model.save_emb(args.save_emb, args.dataset) # test if args.test: start = time.time() if args.num_proc > 1: queue = mp.Queue(args.num_proc) procs = [] for i in range(args.num_proc): proc = mp.Process(target=test, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} for i in range(args.num_proc): metrics = queue.get() for k, v in metrics.items(): if i == 0: total_metrics[k] = v / args.num_proc else: total_metrics[k] += v / args.num_proc for k, v in metrics.items(): print('Test average {} at [{}/{}]: {}'.format( k, args.step, args.max_step, v)) for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail]) print('test:', time.time() - start)
def start_worker(args, logger): """Start kvclient for training """ init_time_start = time.time() time.sleep(WAIT_TIME) # wait for launch script server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config) args.machine_id = get_local_machine_id(server_namebook) dataset, entity_partition_book, local2global = get_partition_dataset( args.data_path, args.dataset, args.format, args.machine_id) n_entities = dataset.n_entities n_relations = dataset.n_relations print('Partition %d n_entities: %d' % (args.machine_id, n_entities)) print("Partition %d n_relations: %d" % (args.machine_id, n_relations)) entity_partition_book = F.tensor(entity_partition_book) relation_partition_book = get_long_tail_partition(dataset.n_relations, args.total_machine) relation_partition_book = F.tensor(relation_partition_book) local2global = F.tensor(local2global) relation_partition_book.share_memory_() entity_partition_book.share_memory_() local2global.share_memory_() train_data = TrainDataset(dataset, args, ranks=args.num_client) # if there is no cross partition relaiton, we fall back to strict_rel_part args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False) args.soft_rel_part = args.mix_cpu_gpu and args.soft_rel_part and train_data.cross_part if args.neg_sample_size_eval < 0: args.neg_sample_size_eval = dataset.n_entities args.batch_size = get_compatible_batch_size(args.batch_size, args.neg_sample_size) args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval) args.num_workers = 8 # fix num_workers to 8 train_samplers = [] for i in range(args.num_client): train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='head', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='tail', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_samplers.append( NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, n_entities)) dataset = None model = load_model(logger, args, n_entities, n_relations) model.share_memory() print('Total initialize time {:.3f} seconds'.format(time.time() - init_time_start)) rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None cross_rels = train_data.cross_rels if args.soft_rel_part else None procs = [] barrier = mp.Barrier(args.num_client) for i in range(args.num_client): proc = mp.Process(target=dist_train_test, args=(args, model, train_samplers[i], entity_partition_book, relation_partition_book, local2global, i, rel_parts, cross_rels, barrier)) procs.append(proc) proc.start() for proc in procs: proc.join()
def main(): args = ArgParser().parse_args() prepare_save_path(args) args.neg_sample_size_eval = 1000 set_global_seed(args.seed) init_time_start = time.time() dataset = get_dataset(args, args.data_path, args.dataset, args.format, args.delimiter, args.data_files, args.has_edge_importance) args.batch_size = get_compatible_batch_size(args.batch_size, args.neg_sample_size) args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval) #print(args) set_logger(args) print("To build training dataset") t1 = time.time() train_data = TrainDataset(dataset, args, has_importance=args.has_edge_importance) print("Training dataset built, it takes %s seconds" % (time.time() - t1)) args.num_workers = 8 # fix num_worker to 8 print("Building training sampler") t1 = time.time() train_sampler_head = train_data.create_sampler( batch_size=args.batch_size, num_workers=args.num_workers, neg_sample_size=args.neg_sample_size, neg_mode='head') train_sampler_tail = train_data.create_sampler( batch_size=args.batch_size, num_workers=args.num_workers, neg_sample_size=args.neg_sample_size, neg_mode='tail') train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail) print("Training sampler created, it takes %s seconds" % (time.time() - t1)) if args.valid or args.test: if len(args.gpu) > 1: args.num_test_proc = args.num_proc if args.num_proc < len( args.gpu) else len(args.gpu) else: args.num_test_proc = args.num_proc print("To create eval_dataset") t1 = time.time() eval_dataset = EvalDataset(dataset, args) print("eval_dataset created, it takes %d seconds" % (time.time() - t1)) if args.valid: if args.num_proc > 1: valid_samplers = [] for i in range(args.num_proc): print("creating valid sampler for proc %d" % i) t1 = time.time() valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, mode='tail', num_workers=args.num_workers, rank=i, ranks=args.num_proc) valid_samplers.append(valid_sampler_tail) print( "Valid sampler for proc %d created, it takes %s seconds" % (i, time.time() - t1)) else: valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, mode='tail', num_workers=args.num_workers, rank=0, ranks=1) valid_samplers = [valid_sampler_tail] for arg in vars(args): logging.info('{:20}:{}'.format(arg, getattr(args, arg))) print("To create model") t1 = time.time() model = BaseKEModel(args=args, n_entities=dataset.n_entities, n_relations=dataset.n_relations, model_name=args.model_name, hidden_size=args.hidden_dim, entity_feat_dim=dataset.entity_feat.shape[1], relation_feat_dim=dataset.relation_feat.shape[1], gamma=args.gamma, double_entity_emb=args.double_ent, relation_times=args.ote_size, scale_type=args.scale_type) model.entity_feat = dataset.entity_feat model.relation_feat = dataset.relation_feat print(len(model.parameters())) if args.cpu_emb: print("using cpu emb\n" * 5) else: print("using gpu emb\n" * 5) optimizer = paddle.optimizer.Adam(learning_rate=args.mlp_lr, parameters=model.parameters()) lr_tensor = paddle.to_tensor(args.lr) global_step = 0 tic_train = time.time() log = {} log["loss"] = 0.0 log["regularization"] = 0.0 for step in range(0, args.max_step): pos_triples, neg_triples, ids, neg_head = next(train_sampler) loss = model.forward(pos_triples, neg_triples, ids, neg_head) log["loss"] = loss.numpy()[0] if args.regularization_coef > 0.0 and args.regularization_norm > 0: coef, nm = args.regularization_coef, args.regularization_norm reg = coef * norm(model.entity_embedding.curr_emb, nm) log['regularization'] = reg.numpy()[0] loss = loss + reg loss.backward() optimizer.step() if args.cpu_emb: model.entity_embedding.step(lr_tensor) optimizer.clear_grad() if (step + 1) % args.log_interval == 0: speed = args.log_interval / (time.time() - tic_train) logging.info( "step: %d, train loss: %.5f, regularization: %.4e, speed: %.2f steps/s" % (step, log["loss"], log["regularization"], speed)) log["loss"] = 0.0 tic_train = time.time() if args.valid and ( step + 1 ) % args.eval_interval == 0 and step > 1 and valid_samplers is not None: print("Valid begin") valid_start = time.time() valid_input_dict = test(args, model, valid_samplers, step, rank=0, mode='Valid') paddle.save( valid_input_dict, os.path.join(args.save_path, "valid_{}.pkl".format(step))) # Save the model for the inference if (step + 1) % args.save_step == 0: print("The step:{}, save model path:{}".format( step + 1, args.save_path)) model.save_model() print("Save model done.")
def run(args, logger): train_time_start = time.time() # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files) n_entities = dataset.n_entities n_relations = dataset.n_relations if args.neg_sample_size_test < 0: args.neg_sample_size_test = n_entities args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." # When we generate a batch of negative edges from a set of positive edges, # we first divide the positive edges into chunks and corrupt the edges in a chunk # together. By default, the chunk size is equal to the negative sample size. # Usually, this works well. But we also allow users to specify the chunk size themselves. if args.neg_chunk_size < 0: args.neg_chunk_size = args.neg_sample_size if args.neg_chunk_size_valid < 0: args.neg_chunk_size_valid = args.neg_sample_size_valid if args.neg_chunk_size_test < 0: args.neg_chunk_size_test = args.neg_sample_size_test num_workers = args.num_worker train_data = TrainDataset(dataset, args, ranks=args.num_proc) # if there is no cross partition relaiton, we fall back to strict_rel_part args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False) args.soft_rel_part = args.mix_cpu_gpu and args.soft_rel_part and train_data.cross_part # Automatically set number of OMP threads for each process if it is not provided # The value for GPU is evaluated in AWS p3.16xlarge # The value for CPU is evaluated in AWS x1.32xlarge if args.nomp_thread_per_process == -1: if len(args.gpu) > 0: # GPU training args.num_thread = 4 else: # CPU training args.num_thread = 1 else: args.num_thread = args.nomp_thread_per_process if args.num_proc > 1: train_samplers = [] for i in range(args.num_proc): train_sampler_head = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='head', num_workers=num_workers, shuffle=True, exclude_positive=False, rank=i) train_sampler_tail = train_data.create_sampler( args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='tail', num_workers=num_workers, shuffle=True, exclude_positive=False, rank=i) train_samplers.append( NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_chunk_size, args.neg_sample_size, True, n_entities)) else: train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='head', num_workers=num_workers, shuffle=True, exclude_positive=False) train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='tail', num_workers=num_workers, shuffle=True, exclude_positive=False) train_sampler = NewBidirectionalOneShotIterator( train_sampler_head, train_sampler_tail, args.neg_chunk_size, args.neg_sample_size, True, n_entities) # for multiprocessing evaluation, we don't need to sample multiple batches at a time # in each process. if args.num_proc > 1: num_workers = 1 if args.valid or args.test: if len(args.gpu) > 1: args.num_test_proc = args.num_proc if args.num_proc < len( args.gpu) else len(args.gpu) else: args.num_test_proc = args.num_proc eval_dataset = EvalDataset(dataset, args) if args.valid: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. if args.num_proc > 1: valid_sampler_heads = [] valid_sampler_tails = [] for i in range(args.num_proc): valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=i, ranks=args.num_proc) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=i, ranks=args.num_proc) valid_sampler_heads.append(valid_sampler_head) valid_sampler_tails.append(valid_sampler_tail) else: valid_sampler_head = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=0, ranks=1) valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, args.neg_sample_size_valid, args.neg_chunk_size_valid, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=0, ranks=1) if args.test: # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. # We use a maximum of num_gpu in test stage to save GPU memory. if args.num_test_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_test_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=i, ranks=args.num_test_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=i, ranks=args.num_test_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=0, ranks=1) # We need to free all memory referenced by dataset. eval_dataset = None dataset = None # load model model = load_model(logger, args, n_entities, n_relations) if args.num_proc > 1 or args.async_update: model.share_memory() print('Total data loading time {:.3f} seconds'.format(time.time() - train_time_start)) # train start = time.time() rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None cross_rels = train_data.cross_rels if args.soft_rel_part else None if args.num_proc > 1: procs = [] barrier = mp.Barrier(args.num_proc) for i in range(args.num_proc): valid_sampler = [valid_sampler_heads[i], valid_sampler_tails[i] ] if args.valid else None proc = mp.Process(target=train_mp, args=(args, model, train_samplers[i], valid_sampler, i, rel_parts, cross_rels, barrier)) procs.append(proc) proc.start() for proc in procs: proc.join() else: valid_samplers = [valid_sampler_head, valid_sampler_tail ] if args.valid else None train(args, model, train_sampler, valid_samplers, rel_parts=rel_parts) print('training takes {} seconds'.format(time.time() - start)) if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model.save_emb(args.save_emb, args.dataset) # We need to save the model configurations as well. conf_file = os.path.join(args.save_emb, 'config.json') with open(conf_file, 'w') as outfile: json.dump( { 'dataset': args.dataset, 'model': args.model_name, 'emb_size': args.hidden_dim, 'max_train_step': args.max_step, 'batch_size': args.batch_size, 'neg_sample_size': args.neg_sample_size, 'lr': args.lr, 'gamma': args.gamma, 'double_ent': args.double_ent, 'double_rel': args.double_rel, 'neg_adversarial_sampling': args.neg_adversarial_sampling, 'adversarial_temperature': args.adversarial_temperature, 'regularization_coef': args.regularization_coef, 'regularization_norm': args.regularization_norm }, outfile, indent=4) # test if args.test: start = time.time() if args.num_test_proc > 1: queue = mp.Queue(args.num_test_proc) procs = [] for i in range(args.num_test_proc): proc = mp.Process(target=test_mp, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_test_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) for k, v in metrics.items(): print('Test average {} at [{}/{}]: {}'.format( k, args.step, args.max_step, v)) for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail]) print('test:', time.time() - start)
def start_worker(args, logger): """Start kvclient for training """ train_time_start = time.time() server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config) args.machine_id = get_local_machine_id(server_namebook) dataset, entity_partition_book, local2global = get_partition_dataset( args.data_path, args.dataset, args.format, args.machine_id) n_entities = dataset.n_entities n_relations = dataset.n_relations print('Partition %d n_entities: %d' % (args.machine_id, n_entities)) print("Partition %d n_relations: %d" % (args.machine_id, n_relations)) entity_partition_book = F.tensor(entity_partition_book) relation_partition_book = get_long_tail_partition(dataset.n_relations, args.total_machine) relation_partition_book = F.tensor(relation_partition_book) local2global = F.tensor(local2global) relation_partition_book.share_memory_() entity_partition_book.share_memory_() local2global.share_memory_() model = load_model(logger, args, n_entities, n_relations) model.share_memory() # When we generate a batch of negative edges from a set of positive edges, # we first divide the positive edges into chunks and corrupt the edges in a chunk # together. By default, the chunk size is equal to the negative sample size. # Usually, this works well. But we also allow users to specify the chunk size themselves. if args.neg_chunk_size < 0: args.neg_chunk_size = args.neg_sample_size num_workers = NUM_WORKER train_data = TrainDataset(dataset, args, ranks=args.num_client) train_samplers = [] for i in range(args.num_client): train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='head', num_workers=num_workers, shuffle=True, exclude_positive=False, rank=i) train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_chunk_size, mode='tail', num_workers=num_workers, shuffle=True, exclude_positive=False, rank=i) train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_chunk_size, args.neg_sample_size, True, n_entities)) dataset = None print('Total data loading time {:.3f} seconds'.format(time.time() - train_time_start)) rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None cross_rels = train_data.cross_rels if args.soft_rel_part else None args.num_thread = NUM_THREAD procs = [] barrier = mp.Barrier(args.num_client) for i in range(args.num_client): proc = mp.Process(target=dist_train_test, args=(args, model, train_samplers[i], entity_partition_book, relation_partition_book, local2global, i, rel_parts, cross_rels, barrier)) procs.append(proc) proc.start() for proc in procs: proc.join()