def test_edgemask_pt_ft_freeze(): args = get_default_args() args.auxiliary_task = "edgemask" args.trainer = "self_auxiliary_task_pretrain" args.alpha = 1 args.freeze = True dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def test_grarep_ppi(): args = get_default_args() args.task = 'link_prediction' args.dataset = 'ppi' args.model = 'grarep' dataset = build_dataset(args) args.step = 2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def __init__(self, args): super(TripleLinkPrediction, self).__init__() self.dataset = build_dataset(args) args.nentity = self.dataset.num_entities args.nrelation = self.dataset.num_relations self.model = build_model(args) self.args = args set_logger(args) logging.info('Model: %s' % args.model) logging.info('#entity: %d' % args.nentity) logging.info('#relation: %d' % args.nrelation)
def test_hope_ppi(): args = get_default_args() args.task = 'link_prediction' args.dataset = 'ppi' args.model = 'hope' dataset = build_dataset(args) args.beta = 0.001 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def __init__(self, args, dataset=None, model=None): super(MultiplexLinkPrediction, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) if model is None else model self.model = model self.eval_type = args.eval_type
def test_hope_ppi(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'ppi' args.model = 'hope' dataset = build_dataset(args) args.beta = 0.001 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def run(dataset_name): args = build_default_args_for_multiplex_node_classification(dataset_name) args = DATASET_REGISTRY[dataset_name](args) dataset = build_dataset(args) results = [] for seed in args.seed: set_random_seed(seed) task = build_task(args, dataset=dataset) result = task.train() results.append(result) return results
def test_grarep_ppi(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'ppi' args.model = 'grarep' dataset = build_dataset(args) args.step = 1 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def __init__(self, args, dataset=None, model=None): super(MultiplexNodeClassification, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset self.data = dataset[0] self.label_matrix = self.data.y self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes self.hidden_size = args.hidden_size self.model = build_model(args) if model is None else model self.args = args self.device = torch.device('cpu' if args.cpu else 'cuda') self.model = self.model.to(self.device)
def test_mvgrl(): args = get_unsupervised_nn_args() args.task = "unsupervised_node_classification" args.dataset = "cora" args.max_epochs = 2 args.model = "mvgrl" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes task = build_task(args) ret = task.train() assert ret['Acc'] > 0
def test_gcn_cora(): args = get_default_args() args.task = "node_classification" args.dataset = "cora" args.model = "gcn" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes model = build_model(args) task = build_task(args) ret = task.train() assert ret["Acc"] >= 0 and ret["Acc"] <= 1
def plot_graph(args): # path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', name) # dataset = Planetoid(path, name) dataset = build_dataset(args) data = dataset[0] name = args.dataset depth = args.depth pic_file = osp.join(args.save_dir, 'display.png') col_names = [ 'Dataset', '#nodes', '#edges', '#features', '#classes', '#labeled data' ] tab_data = [[ name, data.x.shape[0], data.edge_index.shape[1], data.x.shape[1], len(set(data.y.numpy())), sum(data.train_mask.numpy()) ]] print(tabulate(tab_data, headers=col_names, tablefmt='psql')) G = nx.Graph() G.add_edges_from([ tuple(data.edge_index[:, i].numpy()) for i in range(data.edge_index.shape[1]) ]) s = random.choice(list(G.nodes())) q = [s] node_set = set([s]) node_index = {s: 0} max_index = 1 for _ in range(depth): nq = [] for x in q: for key in G[x].keys(): if key not in node_set: nq.append(key) node_set.add(key) node_index[key] = node_index[x] + 1 if len(nq) > 0: max_index += 1 q = nq cmap = cm.rainbow(np.linspace(0.0, 1.0, max_index)) for node, index in node_index.items(): G.nodes[node]['color'] = cmap[index] G.nodes[node]['size'] = (max_index - index) * 50 fig, ax = plt.subplots() plot_network(G.subgraph(list(node_set)), node_style=use_attributes()) plt.savefig(pic_file) print(f'Sampled ego network saved to {pic_file} .')
def test_gcn_cora(): args = get_default_args() args.task = 'node_classification' args.dataset = 'cora' args.model = 'gcn' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def __init__(self, args, dataset=None, model=None): super(MultiplexNodeClassification, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset self.data = dataset[0] self.label_matrix = self.data.y self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes self.hidden_size = args.hidden_size self.model = build_model(args) if model is None else model self.args = args self.device = "cpu" if not torch.cuda.is_available() or args.cpu else args.device_id[0] self.model = self.model.to(self.device)
def test_prone_blogcatalog(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'blogcatalog' args.model = 'prone' dataset = build_dataset(args) args.step = 5 args.theta = 0.5 args.mu = 0.2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def test_mixhop_citeseer(): args = get_default_args() args.task = "node_classification" args.dataset = "citeseer" args.model = "mixhop" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 2 model = build_model(args) task = build_task(args) ret = task.train() assert ret["Acc"] >= 0 and ret["Acc"] <= 1
def __init__(self, args): super(MultiplexLinkPrediction, self).__init__(args) dataset = build_dataset(args) data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) self.model = model self.patience = args.patience self.max_epoch = args.max_epoch self.eval_type = args.eval_type
def test_compgcn_wn18rr(): args = get_kg_default_args() args.lbl_smooth = 0.1 args.score_func = "distmult" args.dataset = "wn18rr" args.model = "compgcn" args.task = "link_prediction" args.regularizer = "basis" dataset = build_dataset(args) args = get_nums(dataset, args) task = build_task(args) ret = task.train() assert 0 <= ret["MRR"] <= 1
def test_mixhop_citeseer(): args = get_default_args() args.task = 'node_classification' args.dataset = 'citeseer' args.model = 'mixhop' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def test_rgcn_wn18(): args = get_kg_default_args() args.self_dropout = 0.2 args.self_loop = True args.dataset = "wn18" args.model = "rgcn" args.task = "link_prediction" args.regularizer = "basis" dataset = build_dataset(args) args = get_nums(dataset, args) task = build_task(args) ret = task.train() assert 0 <= ret["MRR"] <= 1
def test_prone_amazon(): args = get_default_args() args.task = 'multiplex_link_prediction' args.dataset = 'amazon' args.model = 'prone' dataset = build_dataset(args) args.step = 5 args.theta = 0.5 args.mu = 0.2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def __init__(self, args, dataset=None, model=None): super(TripleLinkPrediction, self).__init__() self.dataset = build_dataset(args) if dataset is None else dataset args.nentity = self.dataset.num_entities args.nrelation = self.dataset.num_relations self.model = build_model(args) if model is None else model self.args = args self.device = "cpu" if args.cpu else args.device_id[0] self.model = self.model.to(self.device) set_logger(args) logging.info("Model: %s" % args.model) logging.info("#entity: %d" % args.nentity) logging.info("#relation: %d" % args.nrelation)
def __init__(self, args, dataset=None, model=None): super(MultiplexLinkPrediction, self).__init__(args) self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] dataset = build_dataset(args) if dataset is None else dataset data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) if model is None else model self.model = model self.eval_type = args.eval_type
def __init__( self, args, dataset=None, model: Optional[SupervisedHomogeneousNodeClassificationModel] = None, ): super(NodeClassification, self).__init__(args) self.args = args self.model_name = args.model self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] dataset = build_dataset(args) if dataset is None else dataset if args.model == "sgcpn" and args.missing_rate > 0: assert args.dataset in ["cora", "citeseer", "pubmed"] dataset.data = preprocess_data_sgcpn(dataset.data, normalize_feature=True, missing_rate=0) # adj_slice = torch.tensor(dataset.data.adj.size()) # adj_slice[0] = 0 # dataset.slices["adj"] = adj_slice self.dataset = dataset self.data = dataset[0] args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_nodes = dataset.data.x.shape[0] self.model: SupervisedHomogeneousNodeClassificationModel = build_model( args) if model is None else model self.model.set_device(self.device) self.trainer: Optional[ SupervisedHomogeneousNodeClassificationTrainer] = ( self.model.get_trainer(NodeClassification, self.args)( self.args) if self.model.get_trainer( NodeClassification, self.args) else None) if not self.trainer: self.optimizer = (torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if not hasattr(self.model, "get_optimizer") else self.model.get_optimizer(args)) self.data.apply(lambda x: x.to(self.device)) self.model: SupervisedHomogeneousNodeClassificationModel = self.model.to( self.device) self.patience = args.patience self.max_epoch = args.max_epoch
def raw_experiment(args): variants = list( gen_variants(dataset=args.dataset, model=args.model, seed=args.seed, split=args.split)) results_dict = defaultdict(list) if len(args.devices) == 1 or args.cpu or args.distributed: results = [ train(args) for args in variant_args_generator(args, variants) ] for variant, result in zip(variants, results): results_dict[variant[:-2]].append(result) else: mp.set_start_method("spawn", force=True) # Make sure datasets are downloaded first datasets = args.dataset for dataset in datasets: args.dataset = dataset build_dataset(args) args.dataset = datasets num_workers = len(args.devices) with mp.Pool(processes=num_workers) as pool: pids = pool.map(getpid, range(num_workers)) args.pid_to_cuda = dict(zip(pids, args.devices)) results = pool.map(train_parallel, variant_args_generator(args, variants)) for variant, result in zip(variants, results): results_dict[variant[:-2]].append(result) tablefmt = args.tablefmt if hasattr(args, "tablefmt") else "github" output_results(results_dict, tablefmt) return results_dict
def test_netmf_ppi(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'ppi' args.model = 'netmf' dataset = build_dataset(args) args.window_size = 2 args.rank = 32 args.negative = 3 args.is_large = False model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def test_dgi(): args = get_unsupervised_nn_args() args.task = "unsupervised_node_classification" args.dataset = "cora" args.activation = "relu" args.sparse = True args.max_epochs = 2 args.model = "dgi" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes task = build_task(args) ret = task.train() assert ret["Acc"] > 0
def test_pyg_cheb_cora(): args = get_default_args() args.task = 'node_classification' args.dataset = 'cora' args.model = 'pyg_cheb' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 2 args.filter_size = 5 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def test_asgcn_cora(): args = get_default_args() args.task = 'node_classification_sampling' args.dataset = 'cora' args.model = 'asgcn' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 3 args.sample_size = [64, 64, 32] model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def run(dataset_name, missing_rate=0, num_layers=40): args = build_default_args_for_node_classification( dataset_name, missing_rate=missing_rate, num_layers=num_layers) args = DATASET_REGISTRY[dataset_name](args) dataset, args = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes results = [] for seed in args.seed: set_random_seed(seed) task = build_task(args, dataset=dataset) result = task.train() results.append(result) return results