def prepare_data(graphs, args, test_graphs=None, max_nodes=0): random.shuffle(graphs) if test_graphs is None: train_idx = int(len(graphs) * args.train_ratio) test_idx = int(len(graphs) * (1 - args.test_ratio)) train_graphs = graphs[:train_idx] val_graphs = graphs[train_idx:test_idx] test_graphs = graphs[test_idx:] else: train_idx = int(len(graphs) * args.train_ratio) train_graphs = graphs[:train_idx] val_graphs = graphs[train_idx:] print('Num training graphs: ', len(train_graphs), '; Num validation graphs: ', len(val_graphs), '; Num testing graphs: ', len(test_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print( 'Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) # minibatch dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) train_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) val_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) dataset_sampler = GraphSampler(test_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) test_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) return train_dataset_loader, val_dataset_loader, test_dataset_loader, \ dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
def prepare_val_data(graphs, args, val_idx, max_nodes=0): random.shuffle(graphs) val_size = len(graphs) // 10 train_graphs = graphs[:val_idx * val_size] if val_idx < 9: train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:] val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size] print('Num training graphs: ', len(train_graphs), '; Num validation graphs: ', len(val_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print( 'Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) # minibatch dataset_sampler = GraphSampler(train_graphs, args=args, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) train_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) dataset_sampler = GraphSampler(val_graphs, args=args, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) val_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) return train_dataset_loader, val_dataset_loader, \ dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
def prepare_data(graphs, graphs_list, args, test_graphs=None, max_nodes=0, seed=0): zip_list = list(zip(graphs,graphs_list)) random.Random(seed).shuffle(zip_list) graphs, graphs_list = zip(*zip_list) print('Test ratio: ', args.test_ratio) print('Train ratio: ', args.train_ratio) test_graphs_list = [] if test_graphs is None: train_idx = int(len(graphs) * args.train_ratio) test_idx = int(len(graphs) * (1-args.test_ratio)) train_graphs = graphs[:train_idx] val_graphs = graphs[train_idx: test_idx] test_graphs = graphs[test_idx:] train_graphs_list = graphs_list[:train_idx] val_graphs_list = graphs_list[train_idx: test_idx] test_graphs_list = graphs_list[test_idx:] else: train_idx = int(len(graphs) * args.train_ratio) train_graphs = graphs[:train_idx] train_graphs_list = graphs_list[:train_idx] val_graphs = graphs[train_idx:] val_graphs_list = graphs_list[train_idx: ] print('Num training graphs: ', len(train_graphs), '; Num validation graphs: ', len(val_graphs), '; Num testing graphs: ', len(test_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print('Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) test_dataset_loader = [] dataset_sampler = GraphSampler(train_graphs,train_graphs_list, args.num_pool_matrix,args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes, features=args.feature_type, norm = args.norm) train_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) dataset_sampler = GraphSampler(val_graphs, val_graphs_list, args.num_pool_matrix, args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes, features=args.feature_type, norm = args.norm) val_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if len(test_graphs)>0: dataset_sampler = GraphSampler(test_graphs, test_graphs_list, args.num_pool_matrix, args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes, features=args.feature_type, norm = args.norm) test_dataset_loader = torch.utils.data.DataLoader( dataset_sampler, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) return train_dataset_loader, val_dataset_loader, test_dataset_loader, \ dataset_sampler.max_num_nodes, dataset_sampler.feat_dim
def prepare_val_data(graphs, args, val_idx, max_nodes=0): random.shuffle(graphs) val_size = len(graphs) // 10 train_graphs = graphs[:val_idx * val_size] if val_idx < 9: train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:] val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size] print('Num training graphs: ', len(train_graphs), '; Num validation graphs: ', len(val_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print( 'Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) print("performing restrictions...") #Precomputation of Graphs. #We could only get 460 samples for training because the RAM of Colab does not permit us to do otherwise dataset_sampler_new = [] i = 0 maxDATA = 460 for data in dataset_sampler: #For each graph, compute the Prolongation Operator, the new Adjacency Matrix and the new Feature Matrix temp = {} temp['adj2'] = data['adj'] temp['feats2'] = data['feats'] temp['assign_feats2'] = data['assign_feats'] temp['num_nodes2'] = data['num_nodes'] #Compute Laplacian A, real_index = computeLaplacian(torch.from_numpy(data['adj'])) real_features = torch.from_numpy(data['feats'][real_index, :]) real_assign_feats = torch.from_numpy( data['assign_feats'][real_index, :]) real_features = real_features.cuda() real_assign_feats = real_assign_feats.cuda() #Prolongatiom P = (computePrologator(A, 1)).cuda() #Coarsen A A = torch.matmul(P.t(), torch.matmul(A, P)) A = computeAdjancency(A) A = A.cpu() #Coarsen X x_h = torch.matmul(P.t(), real_features) x_h = x_h.cpu() #Coarsen x_ass x_ass_h = torch.matmul(P.t(), real_assign_feats) x_ass_h = x_ass_h.cpu() #Compute Padding A_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1])) A_padded[:A.shape[0], :A.shape[1]] = A.numpy() x_h_padded = np.zeros(shape=(data['feats'].shape[0], data['feats'].shape[1])) x_h_padded[:x_h.shape[0], :x_h.shape[1]] = x_h.numpy() x_assigned_padded = np.zeros(shape=(data['assign_feats'].shape[0], data['assign_feats'].shape[1])) x_assigned_padded[:x_ass_h.shape[0], :x_ass_h. shape[1]] = x_ass_h.numpy() temp['num_nodes1'] = A.shape[0] temp['adj1'] = A_padded temp['feats1'] = x_h_padded temp['assign_feats1'] = x_assigned_padded temp['label'] = data['label'] dataset_sampler_new.append(temp) if (i > maxDATA): break else: i = i + 1 train_dataset_loader = torch.utils.data.DataLoader( dataset_sampler_new, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) #Same operations as for the training samples dataset_sampler_val = [] for data in dataset_sampler: temp = {} temp['adj2'] = data['adj'] temp['feats2'] = data['feats'] temp['assign_feats2'] = data['assign_feats'] temp['num_nodes2'] = data['num_nodes'] A, real_index = computeLaplacian(torch.from_numpy(data['adj'])) real_features = torch.from_numpy(data['feats'][real_index, :]) real_assign_feats = torch.from_numpy( data['assign_feats'][real_index, :]) real_features = real_features.cuda() real_assign_feats = real_assign_feats.cuda() P = (computePrologator(A, 1)).cuda() A = torch.matmul(P.t(), torch.matmul(A, P)) A = computeAdjancency(A) A = A.cpu() x_h = torch.matmul(P.t(), real_features) x_h = x_h.cpu() x_ass_h = torch.matmul(P.t(), real_assign_feats) x_ass_h = x_ass_h.cpu() A_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1])) A_padded[:A.shape[0], :A.shape[1]] = A.numpy() x_h_padded = np.zeros(shape=(data['feats'].shape[0], data['feats'].shape[1])) x_h_padded[:x_h.shape[0], :x_h.shape[1]] = x_h.numpy() x_assigned_padded = np.zeros(shape=(data['assign_feats'].shape[0], data['assign_feats'].shape[1])) x_assigned_padded[:x_ass_h.shape[0], :x_ass_h. shape[1]] = x_ass_h.numpy() temp['num_nodes1'] = A.shape[0] temp['adj1'] = A_padded temp['feats1'] = x_h_padded temp['assign_feats1'] = x_assigned_padded temp['label'] = data['label'] dataset_sampler_val.append(temp) print("Restrictions Computed") val_dataset_loader = torch.utils.data.DataLoader( dataset_sampler_val, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) return train_dataset_loader, val_dataset_loader, \ dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
len(val_graphs), '; Num testing graphs: ', len(test_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print('Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) # minibatch feature_type = 'default' max_nodes = 0 batch_size = 20 num_workers = 1 dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes, features=feature_type) train_dataset_loader = torch.utils.data.DataLoader(dataset_sampler, batch_size=batch_size, shuffle=True, num_workers=num_workers) dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes, features=feature_type) val_dataset_loader = torch.utils.data.DataLoader(dataset_sampler, batch_size=batch_size, shuffle=False, num_workers=num_workers)
def prepare_val_data(graphs, args, val_idx, max_nodes=0): random.shuffle(graphs) val_size = len(graphs) // 10 train_graphs = graphs[:val_idx * val_size] if val_idx < 9: train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:] val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size] print('Num training graphs: ', len(train_graphs), '; Num validation graphs: ', len(val_graphs)) print('Number of graphs: ', len(graphs)) print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) print( 'Max, avg, std of graph size: ', max([G.number_of_nodes() for G in graphs]), ', ' "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) # minibatch dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) #PERFORMING THE RESTRICTIONS usign AMG print("performing restrictions...") dataset_sampler_new = [] #Please note, I added a maxDATA variable because on Colab it doesn't manage to import everything on RAM i = 0 maxDATA = 490 for data in dataset_sampler: #since it was not possible to edit directly data_sample object, we recreated new ones by adding also the Prolungation Operator temp = {} temp['adj'] = data['adj'] temp['feats'] = data['feats'] temp['assign_feats'] = data['assign_feats'] temp['num_nodes'] = data['num_nodes'] #Compute Laplacian A, real_index = computeLaplacian(torch.from_numpy(data['adj'])) #Compute Prolungator P = (computePrologator(A, 1)).cuda() A = torch.matmul(P.t(), torch.matmul(A, P)) A = computeAdjancency(A) A = A.cpu() P = P.cpu() #Padding P_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1])) P_padded[:P.shape[0], :P.shape[1]] = P.numpy() temp['label'] = data['label'] #Prolongation operator Padded temp['prol'] = P_padded dataset_sampler_new.append(temp) if (i > maxDATA): break else: i = i + 1 #Create Train Dataset Loader train_dataset_loader = torch.utils.data.DataLoader( dataset_sampler_new, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes, features=args.feature_type) #Same procedure is computed for validation dataset_sampler_val = [] for data in dataset_sampler: temp = {} temp['adj'] = data['adj'] temp['feats'] = data['feats'] temp['assign_feats'] = data['assign_feats'] temp['num_nodes'] = data['num_nodes'] #compute Laplacian A, real_index = computeLaplacian(torch.from_numpy(data['adj'])) #Compute Prolungator P = (computePrologator(A, 1)).cuda() A = torch.matmul(P.t(), torch.matmul(A, P)) A = computeAdjancency(A) A = A.cpu() P = P.cpu() #Padding Prolungator P_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1])) P_padded[:P.shape[0], :P.shape[1]] = P.numpy() temp['label'] = data['label'] temp['prol'] = P_padded dataset_sampler_val.append(temp) print("restriction Computed\n") val_dataset_loader = torch.utils.data.DataLoader( dataset_sampler_val, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) return train_dataset_loader, val_dataset_loader, \ dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim