Exemplo n.º 1
0
def prepare_data(graphs, args, test_graphs=None, max_nodes=0):

    random.shuffle(graphs)
    if test_graphs is None:
        train_idx = int(len(graphs) * args.train_ratio)
        test_idx = int(len(graphs) * (1 - args.test_ratio))
        train_graphs = graphs[:train_idx]
        val_graphs = graphs[train_idx:test_idx]
        test_graphs = graphs[test_idx:]
    else:
        train_idx = int(len(graphs) * args.train_ratio)
        train_graphs = graphs[:train_idx]
        val_graphs = graphs[train_idx:]

    print('Num training graphs: ', len(train_graphs),
          '; Num validation graphs: ', len(val_graphs),
          '; Num testing graphs: ', len(test_graphs))

    print('Number of graphs: ', len(graphs))
    print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
    print(
        'Max, avg, std of graph size: ',
        max([G.number_of_nodes() for G in graphs]), ', '
        "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
        "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

    # minibatch
    dataset_sampler = GraphSampler(train_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    train_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers)

    dataset_sampler = GraphSampler(val_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    val_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    dataset_sampler = GraphSampler(test_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    test_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    return train_dataset_loader, val_dataset_loader, test_dataset_loader, \
            dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
Exemplo n.º 2
0
def prepare_val_data(graphs, args, val_idx, max_nodes=0):

    random.shuffle(graphs)
    val_size = len(graphs) // 10
    train_graphs = graphs[:val_idx * val_size]
    if val_idx < 9:
        train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:]
    val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size]
    print('Num training graphs: ', len(train_graphs),
          '; Num validation graphs: ', len(val_graphs))

    print('Number of graphs: ', len(graphs))
    print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
    print(
        'Max, avg, std of graph size: ',
        max([G.number_of_nodes() for G in graphs]), ', '
        "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
        "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

    # minibatch
    dataset_sampler = GraphSampler(train_graphs,
                                   args=args,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    train_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers)

    dataset_sampler = GraphSampler(val_graphs,
                                   args=args,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    val_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    return train_dataset_loader, val_dataset_loader, \
            dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
Exemplo n.º 3
0
def prepare_data(graphs, graphs_list, args, test_graphs=None, max_nodes=0, seed=0):



    zip_list = list(zip(graphs,graphs_list))
    random.Random(seed).shuffle(zip_list)
    graphs, graphs_list = zip(*zip_list)
    print('Test ratio: ', args.test_ratio)
    print('Train ratio: ', args.train_ratio)
    test_graphs_list = []

    if test_graphs is None:
        train_idx = int(len(graphs) * args.train_ratio)
        test_idx = int(len(graphs) * (1-args.test_ratio))
        train_graphs = graphs[:train_idx]
        val_graphs = graphs[train_idx: test_idx]
        test_graphs = graphs[test_idx:]
        train_graphs_list = graphs_list[:train_idx]
        val_graphs_list = graphs_list[train_idx: test_idx]
        test_graphs_list = graphs_list[test_idx:]
    else:
        train_idx = int(len(graphs) * args.train_ratio)
        train_graphs = graphs[:train_idx]
        train_graphs_list = graphs_list[:train_idx]    
        val_graphs = graphs[train_idx:]    
        val_graphs_list = graphs_list[train_idx: ] 
    print('Num training graphs: ', len(train_graphs), 
          '; Num validation graphs: ', len(val_graphs),
          '; Num testing graphs: ', len(test_graphs))

    print('Number of graphs: ', len(graphs))
    print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
    print('Max, avg, std of graph size: ', 
            max([G.number_of_nodes() for G in graphs]), ', '
            "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
            "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

    test_dataset_loader = []
 
    dataset_sampler = GraphSampler(train_graphs,train_graphs_list, args.num_pool_matrix,args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes,
            features=args.feature_type, norm = args.norm)
    train_dataset_loader = torch.utils.data.DataLoader(
            dataset_sampler, 
            batch_size=args.batch_size, 
            shuffle=True,
            num_workers=args.num_workers)

    dataset_sampler = GraphSampler(val_graphs, val_graphs_list, args.num_pool_matrix, args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes,
            features=args.feature_type, norm = args.norm)
    val_dataset_loader = torch.utils.data.DataLoader(
            dataset_sampler, 
            batch_size=args.batch_size, 
            shuffle=False,
            num_workers=args.num_workers)
    if len(test_graphs)>0:
        dataset_sampler = GraphSampler(test_graphs, test_graphs_list, args.num_pool_matrix, args.num_pool_final_matrix,normalize=False, max_num_nodes=max_nodes,
                features=args.feature_type, norm = args.norm)
        test_dataset_loader = torch.utils.data.DataLoader(
                dataset_sampler, 
                batch_size=args.batch_size, 
                shuffle=False,
                num_workers=args.num_workers)



    return train_dataset_loader, val_dataset_loader, test_dataset_loader, \
            dataset_sampler.max_num_nodes, dataset_sampler.feat_dim
Exemplo n.º 4
0
def prepare_val_data(graphs, args, val_idx, max_nodes=0):
    random.shuffle(graphs)
    val_size = len(graphs) // 10

    train_graphs = graphs[:val_idx * val_size]
    if val_idx < 9:
        train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:]

    val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size]
    print('Num training graphs: ', len(train_graphs),
          '; Num validation graphs: ', len(val_graphs))

    print('Number of graphs: ', len(graphs))
    print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
    print(
        'Max, avg, std of graph size: ',
        max([G.number_of_nodes() for G in graphs]), ', '
        "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
        "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

    dataset_sampler = GraphSampler(train_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)
    print("performing restrictions...")

    #Precomputation of Graphs.
    #We could only get 460 samples for training because the RAM of Colab does not permit us to do otherwise
    dataset_sampler_new = []
    i = 0
    maxDATA = 460
    for data in dataset_sampler:

        #For each graph, compute the Prolongation Operator, the new Adjacency Matrix and the new Feature Matrix
        temp = {}
        temp['adj2'] = data['adj']
        temp['feats2'] = data['feats']
        temp['assign_feats2'] = data['assign_feats']
        temp['num_nodes2'] = data['num_nodes']

        #Compute Laplacian
        A, real_index = computeLaplacian(torch.from_numpy(data['adj']))
        real_features = torch.from_numpy(data['feats'][real_index, :])
        real_assign_feats = torch.from_numpy(
            data['assign_feats'][real_index, :])

        real_features = real_features.cuda()
        real_assign_feats = real_assign_feats.cuda()

        #Prolongatiom
        P = (computePrologator(A, 1)).cuda()

        #Coarsen A
        A = torch.matmul(P.t(), torch.matmul(A, P))
        A = computeAdjancency(A)
        A = A.cpu()

        #Coarsen X
        x_h = torch.matmul(P.t(), real_features)
        x_h = x_h.cpu()

        #Coarsen x_ass
        x_ass_h = torch.matmul(P.t(), real_assign_feats)
        x_ass_h = x_ass_h.cpu()

        #Compute Padding
        A_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1]))
        A_padded[:A.shape[0], :A.shape[1]] = A.numpy()

        x_h_padded = np.zeros(shape=(data['feats'].shape[0],
                                     data['feats'].shape[1]))
        x_h_padded[:x_h.shape[0], :x_h.shape[1]] = x_h.numpy()

        x_assigned_padded = np.zeros(shape=(data['assign_feats'].shape[0],
                                            data['assign_feats'].shape[1]))
        x_assigned_padded[:x_ass_h.shape[0], :x_ass_h.
                          shape[1]] = x_ass_h.numpy()
        temp['num_nodes1'] = A.shape[0]
        temp['adj1'] = A_padded
        temp['feats1'] = x_h_padded
        temp['assign_feats1'] = x_assigned_padded
        temp['label'] = data['label']

        dataset_sampler_new.append(temp)
        if (i > maxDATA):
            break
        else:
            i = i + 1

    train_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler_new,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers)

    dataset_sampler = GraphSampler(val_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)

    #Same operations as for the training samples
    dataset_sampler_val = []
    for data in dataset_sampler:
        temp = {}
        temp['adj2'] = data['adj']
        temp['feats2'] = data['feats']
        temp['assign_feats2'] = data['assign_feats']
        temp['num_nodes2'] = data['num_nodes']

        A, real_index = computeLaplacian(torch.from_numpy(data['adj']))
        real_features = torch.from_numpy(data['feats'][real_index, :])
        real_assign_feats = torch.from_numpy(
            data['assign_feats'][real_index, :])

        real_features = real_features.cuda()
        real_assign_feats = real_assign_feats.cuda()

        P = (computePrologator(A, 1)).cuda()
        A = torch.matmul(P.t(), torch.matmul(A, P))
        A = computeAdjancency(A)
        A = A.cpu()
        x_h = torch.matmul(P.t(), real_features)
        x_h = x_h.cpu()
        x_ass_h = torch.matmul(P.t(), real_assign_feats)
        x_ass_h = x_ass_h.cpu()

        A_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1]))
        A_padded[:A.shape[0], :A.shape[1]] = A.numpy()

        x_h_padded = np.zeros(shape=(data['feats'].shape[0],
                                     data['feats'].shape[1]))
        x_h_padded[:x_h.shape[0], :x_h.shape[1]] = x_h.numpy()

        x_assigned_padded = np.zeros(shape=(data['assign_feats'].shape[0],
                                            data['assign_feats'].shape[1]))
        x_assigned_padded[:x_ass_h.shape[0], :x_ass_h.
                          shape[1]] = x_ass_h.numpy()
        temp['num_nodes1'] = A.shape[0]
        temp['adj1'] = A_padded
        temp['feats1'] = x_h_padded
        temp['assign_feats1'] = x_assigned_padded
        temp['label'] = data['label']
        dataset_sampler_val.append(temp)

    print("Restrictions Computed")
    val_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler_val,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    return train_dataset_loader, val_dataset_loader, \
            dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
Exemplo n.º 5
0
      len(val_graphs), '; Num testing graphs: ', len(test_graphs))

print('Number of graphs: ', len(graphs))
print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
print('Max, avg, std of graph size: ',
      max([G.number_of_nodes() for G in graphs]), ', '
      "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
      "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

# minibatch
feature_type = 'default'
max_nodes = 0
batch_size = 20
num_workers = 1
dataset_sampler = GraphSampler(train_graphs,
                               normalize=False,
                               max_num_nodes=max_nodes,
                               features=feature_type)
train_dataset_loader = torch.utils.data.DataLoader(dataset_sampler,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=num_workers)

dataset_sampler = GraphSampler(val_graphs,
                               normalize=False,
                               max_num_nodes=max_nodes,
                               features=feature_type)
val_dataset_loader = torch.utils.data.DataLoader(dataset_sampler,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=num_workers)
Exemplo n.º 6
0
def prepare_val_data(graphs, args, val_idx, max_nodes=0):
    random.shuffle(graphs)
    val_size = len(graphs) // 10

    train_graphs = graphs[:val_idx * val_size]
    if val_idx < 9:
        train_graphs = train_graphs + graphs[(val_idx + 1) * val_size:]

    val_graphs = graphs[val_idx * val_size:(val_idx + 1) * val_size]
    print('Num training graphs: ', len(train_graphs),
          '; Num validation graphs: ', len(val_graphs))

    print('Number of graphs: ', len(graphs))
    print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
    print(
        'Max, avg, std of graph size: ',
        max([G.number_of_nodes() for G in graphs]), ', '
        "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
        "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))

    # minibatch
    dataset_sampler = GraphSampler(train_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)

    #PERFORMING THE RESTRICTIONS usign AMG
    print("performing restrictions...")
    dataset_sampler_new = []

    #Please note, I added a maxDATA variable because on Colab it doesn't manage to import everything on RAM
    i = 0
    maxDATA = 490
    for data in dataset_sampler:
        #since it was not possible to edit directly data_sample object, we recreated new ones by adding also the Prolungation Operator
        temp = {}
        temp['adj'] = data['adj']
        temp['feats'] = data['feats']
        temp['assign_feats'] = data['assign_feats']
        temp['num_nodes'] = data['num_nodes']

        #Compute Laplacian
        A, real_index = computeLaplacian(torch.from_numpy(data['adj']))

        #Compute Prolungator
        P = (computePrologator(A, 1)).cuda()
        A = torch.matmul(P.t(), torch.matmul(A, P))
        A = computeAdjancency(A)
        A = A.cpu()
        P = P.cpu()

        #Padding
        P_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1]))
        P_padded[:P.shape[0], :P.shape[1]] = P.numpy()

        temp['label'] = data['label']
        #Prolongation operator Padded
        temp['prol'] = P_padded

        dataset_sampler_new.append(temp)
        if (i > maxDATA):
            break
        else:
            i = i + 1

    #Create Train Dataset Loader
    train_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler_new,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers)

    dataset_sampler = GraphSampler(val_graphs,
                                   normalize=False,
                                   max_num_nodes=max_nodes,
                                   features=args.feature_type)

    #Same procedure is computed for validation
    dataset_sampler_val = []
    for data in dataset_sampler:
        temp = {}
        temp['adj'] = data['adj']
        temp['feats'] = data['feats']
        temp['assign_feats'] = data['assign_feats']
        temp['num_nodes'] = data['num_nodes']

        #compute Laplacian
        A, real_index = computeLaplacian(torch.from_numpy(data['adj']))

        #Compute Prolungator
        P = (computePrologator(A, 1)).cuda()

        A = torch.matmul(P.t(), torch.matmul(A, P))
        A = computeAdjancency(A)
        A = A.cpu()
        P = P.cpu()

        #Padding Prolungator
        P_padded = np.zeros(shape=(data['adj'].shape[0], data['adj'].shape[1]))
        P_padded[:P.shape[0], :P.shape[1]] = P.numpy()

        temp['label'] = data['label']
        temp['prol'] = P_padded
        dataset_sampler_val.append(temp)

    print("restriction Computed\n")
    val_dataset_loader = torch.utils.data.DataLoader(
        dataset_sampler_val,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    return train_dataset_loader, val_dataset_loader, \
            dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim