def deepsnap_pagerank(args, pyg_dataset): avg_time = 0 task = 'graph' for i in range(args.num_runs): if args.print_run: print("Run {}".format(i + 1)) graphs = GraphDataset.pyg_to_graphs(pyg_dataset, verbose=True, fixed_split=False, netlib=netlib) dataset = GraphDataset(graphs, task=task) s = time.time() dataset.apply_transform(page_fun, update_tensor=False, lib=args.netlib) avg_time += (time.time() - s) print("DeepSNAP has average time: {}".format(avg_time / args.num_runs))
def main(): args = arg_parse() edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) WN_graph = nx.read_gpickle(args.data_path) print('Each node has node ID (n_id). Example: ', WN_graph.nodes[0]) print( 'Each edge has edge ID (id) and categorical label (e_label). Example: ', WN_graph[0][5871]) # Since both feature and label are relation types, # Only the disjoint mode would make sense dataset = GraphDataset( [WN_graph], task='link_pred', edge_train_mode=edge_train_mode, edge_message_ratio=args.edge_message_ratio, edge_negative_sampling_ratio=args.neg_sampling_ratio) # find num edge types max_label = 0 labels = [] for u, v, edge_key in WN_graph.edges: l = WN_graph[u][v][edge_key]['e_label'] if not l in labels: labels.append(l) # labels are consecutive (0-17) num_edge_types = len(labels) print('Pre-transform: ', dataset[0]) dataset = dataset.apply_transform(WN_transform, num_edge_types=num_edge_types, deep_copy=False) print('Post-transform: ', dataset[0]) print('Initial data: {} nodes; {} edges.'.format( dataset[0].G.number_of_nodes(), dataset[0].G.number_of_edges())) print('Number of node features: {}'.format(dataset.num_node_features)) # split dataset datasets = {} datasets['train'], datasets['val'], datasets['test'] = dataset.split( transductive=True, split_ratio=[0.8, 0.1, 0.1]) print('After split:') print('Train message-passing graph: {} nodes; {} edges.'.format( datasets['train'][0].G.number_of_nodes(), datasets['train'][0].G.number_of_edges())) print('Val message-passing graph: {} nodes; {} edges.'.format( datasets['val'][0].G.number_of_nodes(), datasets['val'][0].G.number_of_edges())) print('Test message-passing graph: {} nodes; {} edges.'.format( datasets['test'][0].G.number_of_nodes(), datasets['test'][0].G.number_of_edges())) # node feature dimension input_dim = datasets['train'].num_node_features edge_feat_dim = datasets['train'].num_edge_features num_classes = datasets['train'].num_edge_labels print( 'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format( input_dim, edge_feat_dim, num_classes)) # relation type is both used for edge features and edge labels model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = { split: DataLoader(ds, collate_fn=Batch.collate(follow_batch), batch_size=1, shuffle=(split == 'train')) for split, ds in datasets.items() } print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args)
print("Use SnapX as the backend network library.") else: raise ValueError("{} network library is not supported.".format(args.netlib)) args.netlib = netlib graphs = GraphDataset.pyg_to_graphs(pyg_dataset, netlib=args.netlib) dataset = GraphDataset(graphs, task="graph") datasets = {} datasets['train'], datasets['val'], datasets['test'] = dataset.split( transductive=False, split_ratio = [0.8, 0.1, 0.1]) if args.transform_dataset is not None: trans_func = get_transform(args.transform_dataset) for _, dataset in datasets.items(): dataset.apply_transform(trans_func, radius=args.radius, netlib=args.netlib) dataloaders = { split: DataLoader( dataset, collate_fn=Batch.collate(), batch_size=args.batch_size, shuffle=True ) for split, dataset in datasets.items() } num_classes = datasets['train'].num_graph_labels num_node_features = datasets['train'].num_node_features train(dataloaders['train'], dataloaders['val'], dataloaders['test'], args, num_node_features, num_classes, args.device)
def main(): writer = SummaryWriter() args = arg_parse() edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) ppi_graph = read_ppi_data(args.ppi_path) mode = 'mixed' if mode == 'ppi': message_passing_graph = ppi_graph cmap_graph, knockout_nodes = read_cmap_data(args.data_path) elif mode == 'mixed': message_passing_graph, knockout_nodes = ( read_cmap_data(args.data_path, ppi_graph) ) print('Each node has gene ID. Example: ', message_passing_graph.nodes['ADPGK']) print('Each edge has de direction. Example', message_passing_graph['ADPGK']['IL1B']) print('Total num edges: ', message_passing_graph.number_of_edges()) # disjoint edge label disjoint_split_ratio = 0.1 val_ratio = 0.1 disjoint_edge_label_index = [] val_edges = [] # newly edited train_edges = [] for u in knockout_nodes: rand_num = np.random.rand() if rand_num < disjoint_split_ratio: # add all edges (cmap only) into edge label index # cmap is not a multigraph disjoint_edge_label_index.extend( [ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ] ) train_edges.extend( [ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ] ) elif rand_num < disjoint_split_ratio + val_ratio: val_edges.extend( [ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ] ) else: train_edges.extend( [ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ] ) # add default node types for message_passing_graph for node in message_passing_graph.nodes: message_passing_graph.nodes[node]['node_type'] = 0 print('Num edges to predict: ', len(disjoint_edge_label_index)) print('Num edges in val: ', len(val_edges)) print('Num edges in train: ', len(train_edges)) graph = HeteroGraph( message_passing_graph, custom={ "general_splits": [ train_edges, val_edges ], "disjoint_split": disjoint_edge_label_index, "task": "link_pred" } ) graphs = [graph] graphDataset = GraphDataset( graphs, task="link_pred", edge_train_mode="disjoint" ) # Transform dataset # de direction (currently using homogeneous graph) num_edge_types = 2 graphDataset = graphDataset.apply_transform( cmap_transform, num_edge_types=num_edge_types, deep_copy=False ) print('Number of node features: ', graphDataset.num_node_features()) # split dataset dataset = {} dataset['train'], dataset['val'] = graphDataset.split(transductive=True) # sanity check print(f"dataset['train'][0].edge_label_index.keys(): {dataset['train'][0].edge_label_index.keys()}") print(f"dataset['train'][0].edge_label_index[(0, 1, 0)].shape[1]: {dataset['train'][0].edge_label_index[(0, 1, 0)].shape[1]}") print(f"dataset['val'][0].edge_label_index.keys(): {dataset['val'][0].edge_label_index.keys()}") print(f"dataset['val'][0].edge_label_index[(0, 1, 0)].shape[1]: {dataset['val'][0].edge_label_index[(0, 1, 0)].shape[1]}") print(f"len(list(dataset['train'][0].G.edges)): {len(list(dataset['train'][0].G.edges))}") print(f"len(list(dataset['val'][0].G.edges)): {len(list(dataset['val'][0].G.edges))}") print(f"list(dataset['train'][0].G.edges)[:10]: {list(dataset['train'][0].G.edges)[:10]}") print(f"list(dataset['val'][0].G.edges)[:10]: {list(dataset['val'][0].G.edges)[:10]}") # node feature dimension input_dim = dataset['train'].num_node_features() edge_feat_dim = dataset['train'].num_edge_features() num_classes = dataset['train'].num_edge_labels() print( 'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format( input_dim, edge_feat_dim, num_classes ) ) exit() # relation type is both used for edge features and edge labels model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device) optimizer = torch.optim.Adam( model.parameters(), lr=0.001, weight_decay=5e-3 ) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = { split: DataLoader( ds, collate_fn=Batch.collate(follow_batch), batch_size=1, shuffle=(split == 'train') ) for split, ds in dataset.items() } print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args, writer=writer)
elif args.dataset == 'dd': pyg_dataset = TUDataset('./dd', 'DD') else: raise ValueError("Unsupported dataset.") graphs = GraphDataset.pyg_to_graphs(pyg_dataset) dataset = GraphDataset(graphs, task="graph") datasets = {} datasets['train'], datasets['val'], datasets['test'] = dataset.split( transductive=False, split_ratio=[0.8, 0.1, 0.1]) if args.transform_dataset is not None: trans_func = get_transform(args.transform_dataset) for _, dataset in datasets.items(): dataset.apply_transform(trans_func, radius=args.radius) dataloaders = { split: DataLoader(dataset, collate_fn=Batch.collate(), batch_size=args.batch_size, shuffle=True) for split, dataset in datasets.items() } num_classes = datasets['train'].num_graph_labels num_node_features = datasets['train'].num_node_features train(dataloaders['train'], dataloaders['val'], dataloaders['test'], args, num_node_features, num_classes, args.device)