# "val_loss": loss, # "val_metric": val_metric, # "test_metric": test_metric, # }, double_precision=5) # else: # tolerance+=1 # print('-- done --', file=sys.stderr) # print(best_result) # sys.stdout.flush() # print('-- sklearn --', file=sys.stderr) features = features.numpy() for ids, targets, epoch_progress in problem.iterate(mode='train', shuffle=True, batch_size=999999): x_train = features[ids] y_train = targets for ids, targets, epoch_progress in problem.iterate(mode='val', shuffle=True, batch_size=999999): x_val = features[ids] y_val = targets for ids, targets, epoch_progress in problem.iterate(mode='test', shuffle=True, batch_size=999999): x_test = features[ids] y_test = targets from sklearn.linear_model import LogisticRegression
if args.cuda: model = model.cuda() print(model, file=sys.stderr) # -- # Train set_seeds(args.seed**2) for epoch in range(args.epochs): # Train _ = model.train() for ids, targets, progress in problem.iterate( mode='train', shuffle=True, batch_size=args.batch_size): preds = model.train_step( ids=ids, adj=problem.adj, targets=targets, loss_fn=problem.loss_fn, ) sys.stderr.write("\repoch=%d | progress=%f" % (epoch, progress)) sys.stderr.flush() # Evaluate _ = model.eval() print() print({ "epoch":
def main(args): # Load problem mp_index = { 'dblp': ['APA', 'APAPA', 'APCPA'], 'yelp': ['BRURB', 'BRKRB'], 'yago': ['MAM', 'MDM', 'MWM'], 'dblp2': ['APA', 'APAPA', 'APCPA'], } schemes = mp_index[args.problem] device = torch.device( "cuda:0" if torch.cuda.is_available() and args.cuda else "cpu") problem = NodeProblem(problem_path=args.problem_path, problem=args.problem, device=device, schemes=schemes, train_per=args.train_per, K=args.K, input_edge_dims=args.in_edge_len, emb_len=args.in_node_len) # -- # Define model n_train_samples = list(map(int, args.n_train_samples.split(','))) n_val_samples = list(map(int, args.n_val_samples.split(','))) output_dims = list(map(int, args.output_dims.split(','))) model = CLING( **{ "problem": problem, "n_mp": len(schemes), "sampler_class": sampler_lookup[args.sampler_class], "prep_class": prep_lookup[args.prep_class], "prep_len": args.prep_len, "aggregator_class": aggregator_lookup[args.aggregator_class], "mpaggr_class": metapath_aggregator_lookup[args.mpaggr_class], "edgeupt_class": edge_aggregator_lookup[args.edgeupt_class], "n_head": args.n_head, "layer_specs": [ { "n_train_samples": n_train_samples[0], "n_val_samples": n_val_samples[0], "output_dim": output_dims[0], "activation": F.relu, "concat_node": args.concat_node, "concat_edge": args.concat_edge, 'n_hid': args.n_hid, }, { "n_train_samples": n_train_samples[1], "n_val_samples": n_val_samples[1], "output_dim": output_dims[1], "activation": F.relu, # lambda x: x "concat_node": args.concat_node, "concat_edge": args.concat_edge, 'n_hid': args.n_hid, }, # { # "n_train_samples": n_train_samples[2], # "n_val_samples": n_val_samples[2], # "output_dim": output_dims[2], # "activation": lambda x: x, # lambda x: x # "concat_node": args.concat_node, # "concat_edge": args.concat_edge, # }, ][:args.n_layer], # # "lr_init" : args.lr_init, # "lr_schedule" : args.lr_schedule, # "weight_decay" : args.weight_decay, "dropout": args.dropout, "input_dropout": args.input_dropout, "batchnorm": args.batchnorm, "attn_dropout": args.attn_dropout, "concat_node": True, }) if args.cuda: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) model = model.to(device) # -- # Define optimizer lr = args.lr_init if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=args.weight_decay, amsgrad=False) elif args.optimizer == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=args.weight_decay) elif args.optimizer == 'asgd': optimizer = torch.optim.ASGD(model.parameters(), lr=lr, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) elif args.optimizer == 'rms': optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=args.weight_decay, momentum=0.9, centered=False) if args.lr_schedule == 'cosinew': scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=args.lr_patience, T_mult=2, eta_min=1e-5, last_epoch=-1) elif args.lr_schedule == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.lr_patience, eta_min=1e-5, last_epoch=-1) elif args.lr_schedule == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=args.factor, patience=args.lr_patience, verbose=False, threshold=0.000001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) elif args.lr_schedule == 'onecycle': scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=lr, total_steps=args.epochs * 26, pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, last_epoch=-1) elif args.lr_schedule == 'step': scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) else: scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=1) #optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=args.weight_decay,momentum=0.9) # print(model, file=sys.stdout) # -- # Train set_seeds(args.seed) start_time = time() val_metric = None tolerance = 0 best_val_loss = 100000 best_val_acc = 0 best_result = None best_model = None for epoch in range(args.epochs): # early stopping if tolerance > args.tolerance: break train_loss = 0 # Train _ = model.train() id_count = 0 for ids, targets, epoch_progress in problem.iterate( mode='train', shuffle=True, batch_size=args.batch_size): loss, preds = train_step( model=model, optimizer=optimizer, ids=ids, targets=targets, loss_fn=problem.loss_fn, ) train_loss += loss.item() * ids.shape[0] id_count += ids.shape[0] # train_metric = problem.metric_fn( # to_numpy(targets), to_numpy(preds)) # print(json.dumps({ # "epoch": epoch, # "epoch_progress": epoch_progress, # "train_metric": train_metric, # "time": time() - start_time, # }, double_precision=5)) # sys.stdout.flush() if args.lr_schedule == 'onecycle': scheduler.step() if args.lr_schedule in ['cosine', 'cosinew']: scheduler.step(epoch + epoch_progress) print( json.dumps( { "epoch": epoch, 'lr': [optimizer.param_groups[0]['lr']], "time": time() - start_time, "train_loss": train_loss / id_count, }, double_precision=5)) sys.stdout.flush() # Evaluate if epoch >= -1: _ = model.eval() val_loss, val_metric = evaluate( model, problem, batch_size=args.val_batch_size, mode='val', loss_fn=problem.loss_fn, ) # _, test_metric = evaluate( # model, problem, batch_size=8, mode='test', loss_fn=problem.loss_fn,) if val_metric['accuracy'] > best_val_acc or ( val_metric['accuracy'] == best_val_acc and val_loss < best_val_loss): tolerance = 0 best_val_loss = val_loss best_val_acc = val_metric['accuracy'] best_result = json.dumps( { "epoch": epoch, "val_loss": val_loss, "val_metric": val_metric, # "test_metric": test_metric, }, double_precision=5) best_model = model else: tolerance += 1 print( json.dumps( { "epoch": epoch, "val_loss": val_loss, "val_metric": val_metric, # "test_metric": test_metric, "tolerance:": tolerance, }, double_precision=5)) sys.stdout.flush() if args.lr_schedule == 'plateau': scheduler.step(val_loss) if args.lr_schedule in ['step']: scheduler.step() print('-- done --') _, test_metric = evaluate( best_model, problem, batch_size=args.val_batch_size, mode='test', loss_fn=problem.loss_fn, ) print( json.dumps( { # "epoch": epoch, # "val_loss": loss, # "val_metric": val_metric, "test_metric": test_metric, # "tolerance:": tolerance, }, double_precision=5), file=sys.stderr) # print(best_result, file=sys.stderr) sys.stdout.flush()