def import_dataset(name='CORA'): root = f'BENCHMARK/{name.upper()}/' if name.upper() == 'CORA': dataset = Planetoid(root=root, name='CORA') elif name.upper() == 'CORA-F': dataset = CitationFull(root=root, name='cora') elif name.upper() == 'CITESEER': dataset = Planetoid(root=root, name='citeseer') elif name.upper() == 'PUBMED': dataset = Planetoid(root=root, name='PubMed') elif name.upper() == 'COAUTHOR-P': dataset = Coauthor(root=root, name='Physics') elif name.upper() == 'COAUTHOR-C': dataset = Coauthor(root=root, name='CS') elif name.upper() == 'AMAZON-C': dataset = Amazon(root=root, name='Computers') elif name.upper() == 'AMAZON-P': dataset = Amazon(root=root, name='Photo') elif name.lower() == 'all': Planetoid(root=root, name='CORA') Planetoid(root=root, name='citeseer') CitationFull(root=root, name='cora') Planetoid(root=root, name='PubMed') Coauthor(root=root, name='Physics') Coauthor(root=root, name='CS') Amazon(root=root, name='Computers') Amazon(root=root, name='Photo') exit() return dataset
def load_citation(dataset): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets', 'NodeData', 'Citation') # transforms = T.Compose([T.NormalizeFeatures()]) if dataset == 'PubMedFull': dataset = 'PubMed' dataset = CitationFull(path, dataset) num_per_class = 20 train_index = [] test_index = [] for i in range(dataset.num_classes): index = (dataset[0].y.long() == i).nonzero().view(-1) index = index[torch.randperm(index.size(0))] if len(index) > num_per_class + 30: train_index.append(index[:num_per_class]) test_index.append(index[num_per_class:]) else: continue train_index = torch.cat(train_index) test_index = torch.cat(test_index) train_mask = index_to_mask(train_index, size=dataset[0].num_nodes) test_mask = index_to_mask(test_index, size=dataset[0].num_nodes) data = Data(x=dataset[0].x, edge_index=dataset[0].edge_index, train_mask=train_mask, test_mask=test_mask, y=dataset[0].y) return dataset, data
def load_non_overlapping_dataset( dataset_name: PlanetoidDataset or CitationFullDataset, transform=T.NormalizeFeatures() ) -> Data: path = osp.join(DATASETS_DIR, dataset_name.value) if type(dataset_name) == PlanetoidDataset: data = Planetoid(path, dataset_name.value, transform=transform)[0] elif type(dataset_name) == CitationFullDataset: data = CitationFull(path, dataset_name.value, transform=transform)[0] else: raise Exception("Unknown dataset name") return data
def run(file, data_name, model_name,lr): parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--use_sage', action='store_true') parser.add_argument('--num_layers', type=int, default=2) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--batch_size', type=int, default=64*1024) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=200) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--use_nd', action='store_true') parser.add_argument('--use_lgae', action='store_true') parser.add_argument('--use_vgae', action='store_true') parser.add_argument('--model', type=str, default='') parser.add_argument('--dataset', type=str, default='Citeseer') args = parser.parse_args() if data_name != None and model_name != None and lr != None: args.dataset = data_name args.model = model_name args.lr = lr print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' # device = 'cpu' device = torch.device(device) dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor()) num_training = int(dataset.__len__()*0.8) num_val = int(dataset.__len__()*0.1) num_test = dataset.__len__() - (num_training+num_val) data = dataset[0] adj_t = data.adj_t.to(device) edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense()) data.edge_index = edge_index data.x = data.x.to(device) num_nodes = data.x.shape[0] num_edges = data.edge_index.shape[1] print(data) # nx_data = to_networkx(data, to_undirected=True) # print('graph density='+str(2*num_edges/(num_nodes*(num_nodes-1)))) # print('clustering coefficient='+str(nx.average_clustering(nx_data))) decoder_enable = args.model[-3:] if args.model[-3:] == '-nd': model_name = args.model[:-3] if model_name == 'lgae': model = LGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'vgae': model = DeepVGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'gae': model = GraphAutoEncoder(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'arga': model = AdversarialGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'arvga': model = AdversarialVGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'lrga': model = LRGA(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'sage': model = SAGEAutoEncoder(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) if decoder_enable == '-nd': model.decoder = NeuralDecoder( args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout) evaluator = Evaluator(name='ogbl-ddi') model = model.to(device) loggers = {} K_list = ['20','50','100'] for k in K_list: loggers['Hits@'+k] = Logger(args.runs, args) for run in range(args.runs): torch.manual_seed(run) split_edge = utils.train_test_split_edges(data) # print(split_edge.train_pos_edge_index.shape) # print(split_edge.val_pos_edge_index.shape) # exit() split_edge.edge_index = edge_index # emb.weight.data = features model.reset_parameters() if args.model in ['arga','arga-nd','arvga','arvga-nd']: args.lr=0.005 optimizer = torch.optim.Adam( list(model.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, data.x, adj_t, split_edge, optimizer, args.batch_size) if epoch % args.eval_steps == 0: results = test(model, data.x, adj_t, split_edge, evaluator, args.batch_size) for key, result in results.items(): loggers[key].add_result(run, result) if epoch % args.log_steps == 0: for key, result in results.items(): train_hits, valid_hits, test_hits, test_auc, test_ap, val_auc, val_ap = result print(key) print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'auc: {100 * test_auc:.2f}%, ' f'ap: {100 * test_ap:.2f}%, ' f'Train: {100 * train_hits:.2f}%, ' f'Valid: {100 * valid_hits:.2f}%, ' f'Test: {100 * test_hits:.2f}%', ) print('---') for key in loggers.keys(): print(key) loggers[key].print_statistics(run) for key in loggers.keys(): print(key) toWrite = loggers[key].print_statistics() file.write(str(args.lr)+' ' +key + ' ' +args.model+"'"+str(toWrite)+'\n') file.flush()
def run(file, data_name, model_name,lr): parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--use_sage', action='store_true') parser.add_argument('--num_layers', type=int, default=2) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--batch_size', type=int, default=64*1024) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=200) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--use_nd', action='store_true') parser.add_argument('--use_lgae', action='store_true') parser.add_argument('--use_vgae', action='store_true') parser.add_argument('--model', type=str, default='') parser.add_argument('--dataset', type=str, default='Citeseer') args = parser.parse_args() if data_name != None and model_name != None and lr != None: args.dataset = data_name args.model = model_name args.lr = lr print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' # device = 'cpu' device = torch.device(device) dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor()) num_training = int(dataset.__len__()*0.8) num_val = int(dataset.__len__()*0.1) num_test = dataset.__len__() - (num_training+num_val) data = dataset[0] print('data:',vars(data)) adj_t = data.adj_t.to(device) edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense()) data.edge_index = edge_index data.x = data.x.to(device) split_edge = utils.train_test_split_edges(data) split_edge.edge_index = edge_index print(data) print(edge_index.shape) decoder_enable = args.model[-3:] if args.model[-3:] == '-nd': model_name = args.model[:-3] if model_name == 'lgae': model = LGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'vgae': model = DeepVGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'gae': model = GraphAutoEncoder(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'arga': model = AdversarialGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'arvga': model = AdversarialVGAE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'lrga': model = LRGA(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) elif model_name == 'sage': model = SAGEAutoEncoder(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout) if decoder_enable == '-nd': model.decoder = NeuralDecoder( args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout) evaluator = Evaluator(name='ogbl-ddi') model = model.to(device) loggers = { 'metrics': Logger(args.runs, args) } for run in range(args.runs): torch.manual_seed(run) model.reset_parameters() if args.model in ['arga','arga-nd','arvga','arvga-nd']: args.lr=0.005 optimizer = torch.optim.Adam( list(model.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, data.x, adj_t, split_edge, optimizer, args.batch_size) result = test(model, data.x, data, split_edge, evaluator, args.batch_size) loggers['metrics'].add_result(run, result) for key in loggers.keys(): print(key) toWrite = loggers[key].print_statistics() file.write(args.model+'\t'+'\t'.join(toWrite)+'\n') file.flush() os.fsync(file)
parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--use_gdc', type=bool, default=False) parser.add_argument("--dataset", type=str, default="CiteSeer") args = parser.parse_args() # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = args.dataset path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) lrate = 0.01 if dataset == "F": dataset = Flickr(path, transform=T.NormalizeFeatures()) print(len(dataset)) lrate = 0.1 elif dataset == "C": dataset = CitationFull(path, "DBLP", transform=T.NormalizeFeatures()) print(len(dataset)) else: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) print(len(dataset)) data = dataset[0] print(data) model, data = Net(dataset, data, args), data.to(device) model = model.to(device) optimizer = torch.optim.Adam([ dict(params=model.conv1.parameters(), weight_decay=5e-4), dict(params=model.conv2.parameters(), weight_decay=0) ], lr=lrate) # Only perform weight-decay on first convolution. best_val_acc = test_acc = 0 for epoch in range(1, args.epochs): train(model,data)
parser.add_argument('--layers', type=int, default=30) parser.add_argument('--epochs', type=int, default=800) parser.add_argument('--early', type=int, default=80) args = parser.parse_args() gnn = args.gnn gnndict = {'GAT': GAT, 'SAGE': SAGE, 'GCN': GCN, 'GEN': AdaGNN_v, 'MLP': MLP} reset = args.reset ratio = args.ratio dataset_n = args.dataset t_layers = args.layers log_name = f'./result/Greedy_SRM_GNN_{gnn}_reset_{reset}_dataset_{dataset_n}' path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_n) if dataset_n == 'dblp': dataset = CitationFull(path, dataset_n) else: dataset = Planetoid(path, dataset_n) data = dataset[0] train_split = pickle.load(open(f'./datasetsplit/{dataset_n.lower()}_train', "rb") ) test_split = pickle.load(open(f'./datasetsplit/{dataset_n.lower()}_train', "rb") ) rand = torch.cat([train_split, test_split]) thold = int(data.num_nodes * ratio) train_split = rand[:thold] test_split = rand[thold:] data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[train_split] = 1 data.val_mask = None data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[test_split] = 1 criteria = CrossEntropyLoss()
import torch.optim as optim from torch_geometric.datasets import TUDataset from torch_geometric.datasets import Planetoid from torch_geometric.data import DataLoader import torch_geometric.transforms as T from tensorboardX import SummaryWriter from sklearn.manifold import TSNE import matplotlib.pyplot as plt from torch_geometric.datasets import CitationFull from tqdm import tqdm import pdb data_cora = CitationFull('./CitationFull', 'cora') data_cora_ml = CitationFull('./CitationFull', 'cora_ml') data_citeseer = CitationFull('./CitationFull', 'citeseer') data_dblp = CitationFull('./CitationFull', 'dblp') data_pubmed = CitationFull('./CitationFull', 'pubmed') class GNNStack(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, task='node'): super(GNNStack, self).__init__() self.task = task self.convs = nn.ModuleList() self.convs.append(self.build_conv_model(input_dim, hidden_dim)) self.lns = nn.ModuleList() self.lns.append(nn.LayerNorm(hidden_dim)) self.lns.append(nn.LayerNorm(hidden_dim))
from torch_geometric.datasets import CitationFull from torch_geometric.utils import to_scipy_sparse_matrix import torch_geometric.transforms as T path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'DBLP') from torch_geometric.utils import to_scipy_sparse_matrix from utils import normalize_adjacency_matrix, normalizemx from DBLP_utils import SCAT_Red from utils import normalize_adjacency_matrix, sparse_mx_to_torch_sparse_tensor from layers import GC_withres, GraphConvolution #from torch_geometric.nn import GATConv from torch.optim.lr_scheduler import MultiStepLR, StepLR #dataset = TUDataset(root= path,name='REDDIT-BINARY') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = CitationFull(path, name='dblp', transform=T.TargetIndegree()) data = dataset[0] # Num of feat:1639 adj = to_scipy_sparse_matrix(edge_index=data.edge_index) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) A_tilde = sparse_mx_to_torch_sparse_tensor( normalize_adjacency_matrix(adj, sp.eye(adj.shape[0]))).to(device) adj = sparse_mx_to_torch_sparse_tensor(adj).to(device) #print(dataset) #print(data.x.shape) #print(data.y.shape) #tp = SCAT_Red(in_features=1639,med_f0=10,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device) #tp2 = SCAT_Red(in_features=40,med_f0=30,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device) train_mask = torch.cat((torch.ones(10000), torch.zeros(2000), torch.zeros(2000), torch.zeros(3716)), 0) > 0