def main(): args = parse_args() logger = set_logger() logger.info(args) if args.gpu: device = torch.device("cuda:{}".format(get_free_gpu())) else: device = torch.device("cpu") edges, nodes = test_graph() g = construct_dglgraph(edges, nodes, device) logger.info(f"Begin Conv. Device {device}") dim = g.ndata["nfeat"].shape[-1] dims = [dim, 108, 4] # for l in range(1, 3): # logger.info(f"Graph Conv Layer {l}.") # model = TSAGEConv(in_feats=dims[l-1], out_feats=dims[l], aggregator_type="mean") # model = model.to(device) # src_feat, dst_feat = model(g, current_layer=l) # g.edata[f"src_feat{l}"] = src_feat # g.edata[f"dst_feat{l}"] = dst_feat model = TSAGEConv(in_feats=dims[0], out_feats=dims[1], aggregator_type="mean") model = model.to(device) import copy nfeat_copy = copy.deepcopy(g.ndata["nfeat"]) loss_fn = nn.CosineEmbeddingLoss(margin=0.5) import itertools optimizer = torch.optim.Adam(itertools.chain([g.ndata["nfeat"]], model.parameters()), lr=0.01) # print(nfeat_copy) for i in range(10): logger.info("Epoch %3d", i) model.train() optimizer.zero_grad() src_feat, dst_feat = model(g, current_layer=1) labels = torch.ones((g.number_of_edges()), device=device) loss = loss_fn(src_feat, dst_feat, labels) loss.backward() optimizer.step() print("nfeat") print(g.ndata["nfeat"].storage().data_ptr()) print("nfeat copy") print(nfeat_copy.storage().data_ptr()) assert not torch.all(torch.eq(nfeat_copy, g.ndata["nfeat"])) print(src_feat.shape, dst_feat.shape) # z = src_feat.sum() # z.backward() print(g.ndata["nfeat"].grad) return src_feat, dst_feat
def main(args, logger): set_random_seed() logger.info("Set random seeds.") logger.info(args) numba_logger = logging.getLogger('numba') numba_logger.setLevel(logging.WARNING) # Set device utility. if args.gpu: if args.gid >= 0: device = torch.device("cuda:{}".format(args.gid)) else: device = torch.device("cuda:{}".format(get_free_gpu())) logger.info( "Begin Conv on Device %s, GPU Memory %d GB", device, torch.cuda.get_device_properties(device).total_memory // 2**30) else: device = torch.device("cpu") logger.info("Begin COnv on Device CPU.") # Load nodes, edges, and labeled dataset for training, validation and test. nodes, edges, train_labels, val_labels, test_labels = prepare_dataset( args.dataset) delta = edges["timestamp"].shift(-1) - edges["timestamp"] # Pandas loc[low:high] includes high, so we use slice operations here instead. assert np.all(delta[:len(delta) - 1] >= 0) # Set DGLGraph, node_features, edge_features, and edge timestamps. g = construct_dglgraph(edges, nodes, device, node_dim=args.n_hidden) nfeat = g.ndata["nfeat"] efeat = g.edata["efeat"] tfeat = g.edata["timestamp"] if nfeat.requires_grad and not args.trainable: g.ndata["nfeat"] = torch.zeros_like(g.ndata["nfeat"]) # Prepare the agg_graph, prop_graph and their required features. src = edges["from_node_id"].to_numpy() dst = edges["to_node_id"].to_numpy() t = edges["timestamp"].to_numpy().astype('float32') adj_eid_l, adj_ngh_l, adj_ts_l = construct_adj(src, dst, t, len(nodes)) new_node_ids, new_node_tss, old_id_map, agg_graph, prop_graph = GTCUtility.split_adj( adj_ngh_l, adj_ts_l) old_ids = [m[0] for m in old_id_map] old_ids = torch.tensor(old_ids, device=device) old_tss = [m[1] for m in old_id_map] agg_graph = agg_graph.to(device) prop_graph = prop_graph.to(device) agg_graph.ndata["timestamp"] = torch.tensor(old_tss).to(nfeat) old_eids = torch.tensor(np.concatenate(adj_eid_l)).to(device) agg_graph.edata["efeat"] = efeat[old_eids].detach() agg_graph.edata["timestamp"] = tfeat[old_eids].detach() degs = compute_degrees(new_node_ids, len(old_id_map)) agg_graph.ndata["degree"] = torch.tensor(degs).to(efeat) prop_graph.ndata["degree"] = agg_graph.ndata["degree"] # Set model configuration. in_feats = g.ndata["nfeat"].shape[-1] edge_feats = g.edata["efeat"].shape[-1] model = GTCTrainer(nfeat, in_feats, edge_feats, args.n_hidden, args.n_hidden, args) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # clip gradients by value: https://stackoverflow.com/questions/54716377/how-to-do-gradient-clipping-in-pytorch for p in model.parameters(): p.register_hook(lambda grad: torch.clamp(grad, -args.clip, args.clip)) if nfeat.requires_grad: logger.info("Trainable node embeddings.") else: logger.info("Freeze node embeddings.") neg_sampler = NegativeSampler(np.unique(dst), args.n_neg) num_batch = np.int(np.ceil(len(train_labels) / args.batch_size)) epoch_bar = trange(args.epochs, disable=(not args.display)) early_stopper = EarlyStopMonitor(max_round=5) lr = "%.4f" % args.lr trainable = "train" if args.trainable else "no-train" norm = "norm" if hasattr(model, "norm") else "no-norm" pos = "pos" if model.pos_contra else "no-pos" neg = "neg" if model.neg_contra else "no-neg" lam = '%.1f' % args.lam margin = '%.1f' % (args.margin) for epoch in epoch_bar: batch_samples = (src, dst, t) neg_dst = neg_sampler(len(src)) if args.pos_contra or args.neg_contra: contra_samples = history_sampler(src, t, adj_ngh_l, adj_ts_l, args.n_hist) else: contra_samples = None start = time.time() # These features are not leaf-tensors. agg_graph.ndata["nfeat"] = nfeat[old_ids] model.train() optimizer.zero_grad() loss = model(agg_graph, prop_graph, new_node_ids, new_node_tss, batch_samples, neg_dst, contra_samples) # loss = checkpoint(model, agg_graph, prop_graph, new_node_ids, new_node_tss, # batch_samples, neg_dst, contra_samples) loss.backward() optimizer.step() # print('One epoch {:.2f}s'.format(time.time() - start)) acc, f1, auc = eval_linkpred(model, val_labels, agg_graph, prop_graph, new_node_ids, new_node_tss) epoch_bar.update() epoch_bar.set_postfix(loss=loss.item(), acc=acc, f1=f1, auc=auc) def ckpt_path(epoch): return f'./ckpt/{args.dataset}-{args.agg_type}-{trainable}-{norm}-{pos}-{neg}-{lr}-{epoch}-{args.hostname}-{device.type}-{device.index}.pth' if early_stopper.early_stop_check(auc): logger.info( f"No improvement over {early_stopper.max_round} epochs.") logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') model.load_state_dict( torch.load(ckpt_path(early_stopper.best_epoch))) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' ) break else: torch.save(model.state_dict(), ckpt_path(epoch)) model.eval() # _, _, val_auc = eval_linkpred(model, g, val_labels) _, _, val_auc = eval_linkpred(model, val_labels, agg_graph, prop_graph, new_node_ids, new_node_tss) # acc, f1, auc = eval_linkpred(model, g, test_labels) acc, f1, auc = eval_linkpred(model, test_labels, agg_graph, prop_graph, new_node_ids, new_node_tss) params = { "best_epoch": early_stopper.best_epoch, "trainable": args.trainable, "lr": "%.4f" % (args.lr), "agg_type": args.agg_type, "no-ce": args.no_ce, "norm": norm, "pos_contra": args.pos_contra, "neg_contra": args.neg_contra, "n_hist": args.n_hist, "n_neg": args.n_neg, "n_layers": args.n_layers, "time_encoding": args.time_encoding, "lambda": args.lam, "margin": args.margin } write_result(val_auc, (acc, f1, auc), args.dataset, params, postfix="FullBatch") MODEL_SAVE_PATH = f'./saved_models/{args.dataset}-{args.agg_type}-{lr}-{lam}-{margin}.pth' model = model.cpu() torch.save(model.state_dict(), MODEL_SAVE_PATH)
args = config_parser().parse_args() # Arguments if True: BATCH_SIZE = args.bs NUM_NEIGHBORS = args.n_degree NUM_NEG = 1 NUM_EPOCH = args.n_epoch NUM_HEADS = args.n_head DROP_OUT = args.drop_out # GPU = get_free_gpu() # GPU = str(args.gpu) if args.gpu >= 0: GPU = str(args.gpu) else: GPU = get_free_gpu() # UNIFORM = args.uniform USE_TIME = args.time AGG_METHOD = args.agg_method ATTN_MODE = args.attn_mode SEQ_LEN = NUM_NEIGHBORS DATA = args.data NUM_LAYER = args.n_layer LEARNING_RATE = args.lr NODE_DIM = args.node_dim TIME_DIM = args.time_dim MODEL_SAVE_PATH = f'./saved_models/{args.model}-{args.agg_method}-{args.attn_mode}-{args.data}.pth' def get_checkpoint_path(epoch):
def main(args, logger): set_random_seed() logger.info("Set random seeds.") logger.info(args) # Set device utility. if args.gpu: if args.gid >= 0: device = torch.device("cuda:{}".format(args.gid)) else: device = torch.device("cuda:{}".format(get_free_gpu())) logger.info( "Begin Conv on Device %s, GPU Memory %d GB", device, torch.cuda.get_device_properties(device).total_memory // 2**30) else: device = torch.device("cpu") # Load nodes, edges, and labeled dataset for training, validation and test. nodes, edges, train_labels, val_labels, test_labels = prepare_dataset( args.dataset) delta = edges["timestamp"].shift(-1) - edges["timestamp"] # Pandas loc[low:high] includes high, so we use slice operations here instead. assert np.all(delta[:len(delta) - 1] >= 0) # Set DGLGraph, node_features, edge_features, and edge timestamps. g = construct_dglgraph(edges, nodes, device, bidirected=args.bidirected) if not args.trainable: g.ndata["nfeat"] = torch.zeros_like(g.ndata["nfeat"]) # For each entry in the adjacency list `u: (v, t)` of node `u`, compute # the related upper_bound with respect to `t`. So that we can use `cumsum` # or `cummax` to accelerate the computation speed. Otherwise, we have to # compute a mask matrix multiplication for each `(v, t)`, which costs even # 12GB memory for 58K interactions. if args.dataset == "ia-slashdot-reply-dir": deg_indices = _deg_indices_full(g) else: deg_indices = _par_deg_indices_full(g) for k, v in deg_indices.items(): g.edata[k] = v.to(device).unsqueeze(-1).detach() # Set model configuration. # Input features: node_featurs + edge_features + time_encoding in_feats = (g.ndata["nfeat"].shape[-1] + g.edata["efeat"].shape[-1]) model = TemporalLinkTrainer(g, in_feats, args.n_hidden, args.n_hidden, args) model = model.to(device) if args.opt == "Adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.opt == "SGD": optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise NotImplementedError(args.opt) # clip gradients by value: https://stackoverflow.com/questions/54716377/how-to-do-gradient-clipping-in-pytorch for p in model.parameters(): p.register_hook(lambda grad: torch.clamp(grad, -args.clip, args.clip)) # Only use positive edges, so we have to divide eids by 2. train_eids = np.arange(train_labels.shape[0] // 2) num_batch = np.int(np.ceil(len(train_eids) / args.batch_size)) epoch_bar = trange(args.epochs, disable=(not args.display)) early_stopper = EarlyStopMonitor(max_round=5) for epoch in epoch_bar: np.random.shuffle(train_eids) batch_bar = trange(num_batch, disable=(not args.display)) for idx in batch_bar: model.train() optimizer.zero_grad() batch_eids = train_eids[idx * args.batch_size:(idx + 1) * args.batch_size] mul = 2 if args.bidirected else 1 loss = model(g, batch_eids * mul) loss.backward() optimizer.step() acc, f1, auc = eval_linkpred(model, g, val_labels, batch_size=args.batch_size) batch_bar.set_postfix(loss=loss.item(), acc=acc, f1=f1, auc=auc) acc, f1, auc = eval_linkpred(model, g, val_labels) # if epoch % 100 == 0: # logger.info("epoch:%d acc: %.4f, auc: %.4f, f1: %.4f", # epoch, acc, auc, f1) epoch_bar.update() epoch_bar.set_postfix(loss=loss.item(), acc=acc, f1=f1, auc=auc) lr = "%.4f" % args.lr trainable = "train" if args.trainable else "no-train" norm = "norm" if hasattr(model, "norm") else "no-norm" pos = "pos" if model.pos_contra else "no-pos" neg = "neg" if model.neg_contra else "no-neg" def ckpt_path(epoch): return f'./ckpt/{args.dataset}-{args.agg_type}-{trainable}-{norm}-{pos}-{neg}-{lr}-{epoch}-{args.hostname}-{device.type}-{device.index}.pth' if early_stopper.early_stop_check(auc): logger.info( f"No improvement over {early_stopper.max_round} epochs.") logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') model.load_state_dict( torch.load(ckpt_path(early_stopper.best_epoch))) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' ) break else: torch.save(model.state_dict(), ckpt_path(epoch)) model.eval() _, _, val_auc = eval_linkpred(model, g, val_labels) acc, f1, auc = eval_linkpred(model, g, test_labels) params = { "best_epoch": early_stopper.best_epoch, "bidirected": args.bidirected, "trainable": args.trainable, "opt": args.opt, "lr": "%.4f" % (args.lr), "agg_type": args.agg_type, "no-ce": args.no_ce, "norm": norm, "pos_contra": args.pos_contra, "neg_contra": args.neg_contra, "n_hist": args.n_hist, "n_neg": args.n_neg, "n_layers": args.n_layers, "time_encoding": args.time_encoding, "lambda": args.lam, "margin": args.margin } write_result(val_auc, (acc, f1, auc), args.dataset, params) lr = '%.4f' % (args.lr) lam = '%.1f' % args.lam margin = '%.1f' % (args.margin) MODEL_SAVE_PATH = f'./saved_models/{args.dataset}-{args.agg_type}-{lr}-{lam}-{margin}.pth' model = model.cpu() torch.save(model.state_dict(), MODEL_SAVE_PATH)
def main(args, logger): set_random_seed() logger.info("Set random seeds.") logger.info(args) # Set device utility. if args.gpu: if args.gid >= 0: device = torch.device("cuda:{}".format(args.gid)) else: device = torch.device("cuda:{}".format(get_free_gpu())) logger.info( "Begin Conv on Device %s, GPU Memory %d GB", device, torch.cuda.get_device_properties(device).total_memory // 2**30) else: device = torch.device("cpu") # Load nodes, edges, and labeled dataset for training, validation and test. nodes, edges, train_data, val_data, test_data = prepare_node_dataset( args.dataset) logger.info("Train, valid, test: %d, %d, %d", (train_data["state_label"] == 1).sum(), (val_data["state_label"] == 1).sum(), (test_data["state_label"] == 1).sum()) delta = edges["timestamp"].shift(-1) - edges["timestamp"] # Pandas loc[low:high] includes high, so we use slice operations here instead. assert np.all(delta[:len(delta) - 1] >= 0) # Set DGLGraph, node_features, edge_features, and edge timestamps. g = construct_dglgraph(edges, nodes, device, bidirected=args.bidirected) if not args.trainable: g.ndata["nfeat"] = torch.zeros_like(g.ndata["nfeat"]) deg_indices = _par_deg_indices_full(g) for k, v in deg_indices.items(): g.edata[k] = v.to(device).unsqueeze(-1).detach() # Set model configuration. # Input features: node_featurs + edge_features + time_encoding in_feats = (g.ndata["nfeat"].shape[-1] + g.edata["efeat"].shape[-1]) tgcl = TemporalLinkTrainer(g, in_feats, args.n_hidden, args.n_hidden, args) tgcl = tgcl.to(device) logger.info("loading saved TGCL model") gcn_lr = '%.4f' % args.gcn_lr lam = '%.1f' % args.lam margin = '%.1f' % args.margin model_path = f"./saved_models/{args.dataset}-{args.agg_type}-{gcn_lr}-{lam}-{margin}.pth" tgcl.load_state_dict(torch.load(model_path)) tgcl.eval() logger.info("TGCL models loaded") logger.info("Start training node classification task") lr_model = LR(args.n_hidden * 2).to(device) optimizer = torch.optim.Adam(lr_model.parameters(), lr=args.lr, weight_decay=args.weight_decay) train_ids = np.arange(len(train_data)) val_eids = len(train_data) + np.arange(len(val_data)) test_eids = len(train_data) + len(val_data) + np.arange(len(test_data)) batch_size = args.batch_size num_batch = np.int(np.ceil(len(train_data) / batch_size)) epoch_bar = trange(args.epochs, disable=(not args.display)) early_stopper = EarlyStopMonitor(max_round=10) if args.pos_weight: if args.sampling == "balance": pos_weight = torch.tensor(args.neg_ratio) else: pos_num = (train_data["state_label"] == 1).sum() neg_num = (train_data["state_label"] == 0).sum() pos_weight = torch.tensor(neg_num / pos_num / 10) loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight) else: loss_fn = nn.BCEWithLogitsLoss() start = time.time() with torch.no_grad(): g.ndata["deg"] = (g.in_degrees() + g.out_degrees()).to( g.ndata["nfeat"]) src_feat, dst_feat = tgcl.conv(g) embeds = torch.cat((src_feat, dst_feat), dim=1) logger.info("Convolution takes %.2f secs.", time.time() - start) for epoch in epoch_bar: np.random.shuffle(train_ids) batch_bar = trange(num_batch, disable=(not args.display)) batch_sampler = balance_batch(train_ids, train_data.loc[train_ids, "state_label"], num_batch) for idx in batch_bar: tgcl.eval() lr_model.train() if args.sampling == "normal": batch_ids = train_ids[idx * batch_size:(idx + 1) * batch_size] elif args.sampling == "resample": batch_ids = resample(train_ids, n_samples=batch_size, stratify=train_data["state_label"]) elif args.sampling == "balance": batch_ids = next(batch_sampler) labels = train_data.loc[batch_ids, "state_label"].to_numpy() optimizer.zero_grad() batch_embeds = embeds[batch_ids] lr_prob = lr_model(batch_embeds) loss = loss_fn(lr_prob, torch.tensor(labels).to(lr_prob)) loss.backward() optimizer.step() acc, f1, auc = eval_nodeclass(embeds, lr_model, val_eids, val_data) batch_bar.set_postfix(loss=loss.item(), acc=acc, f1=f1, auc=auc) acc, f1, auc = eval_nodeclass(embeds, lr_model, val_eids, val_data) epoch_bar.update() epoch_bar.set_postfix(loss=loss.item(), acc=acc, f1=f1, auc=auc) def ckpt_path(epoch): return f'./nc-ckpt/{args.dataset}-{args.lr}-{args.batch_size}-{args.sampling}-{args.pos_weight}-{epoch}-{args.hostname}-{device.type}-{device.index}.pth' if early_stopper.early_stop_check(auc): logger.info('No improvment over {} epochs, stop training'.format( early_stopper.max_round)) logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') best_model_path = ckpt_path(early_stopper.best_epoch) lr_model.load_state_dict(torch.load(best_model_path)) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' ) break else: torch.save(lr_model.state_dict(), ckpt_path(epoch)) lr_model.eval() _, _, val_auc = eval_nodeclass(embeds, lr_model, val_eids, val_data) acc, f1, auc = eval_nodeclass(embeds, lr_model, test_eids, test_data) params = { "best_epoch": early_stopper.best_epoch, "batch_size": args.batch_size, "lr": args.lr, "sampling": args.sampling, "pos_weight": args.pos_weight, "neg_ratio": args.neg_ratio } write_result(val_auc, (acc, f1, auc), args.dataset, params, postfix="NC-GTC", results="nc-results")