def load_data_del_edges(prob_del=0.4,
                        seed=0,
                        to_dense=True,
                        enforce_connected=True,
                        dataset_name='cora'):
    res = graph_delete_connections(prob_del,
                                   seed,
                                   *load_data(dataset_name),
                                   to_dense=to_dense,
                                   enforce_connected=enforce_connected)
    return res
Exemplo n.º 2
0
def main(sample_name, epochs=200, get_probs=False):
    # Training settings
    valid = False
    no_cuda = False
    seed = 42
    lr = 1e-2
    weight_decay = 1e-5
    hidden = 32
    dropout = 0.5

    cuda = not no_cuda and torch.cuda.is_available()

    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed(seed)

    # Load data
    adj, features, labels, y_test, idx_train, idx_val, idx_test = load_data()

    # Model and optimizer
    model = GCN(nfeat=features.shape[1],
                nhid=hidden,
                nclass=1,
                dropout=dropout)
    optimizer = optim.Adam(model.parameters(),
                           lr=lr, weight_decay=weight_decay)

    if cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()
        y_test = y_test.cuda()

#   Training model
    torch.set_grad_enabled(True)
    t_total = time.time()
    model.eval()
    print("------- Training GCN")
    for epoch in range(epochs):
        if epoch == epochs - 1:
            valid = True
        train(model, optimizer, epoch, adj, features, labels, idx_train, idx_val, valid)
    print("Optimization Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    # Testing
    info = gcn_inference(sample_name, model, adj, features, y_test, idx_test, get_probs=get_probs)
    return info
Exemplo n.º 3
0
def loadGcnData(dataset):
    nxgraph, y_mtx = utils.load_data(dataset)
    print(f'\t\t {len(nxgraph)} nodes')

    if DEBUG:
        print("Graph", nxgraph, len(nxgraph))
        print("y", np.shape(y_mtx))

    y = np.argmax(y_mtx, axis=1)
    if DEBUG:
        print(y_mtx, y)

    gtcount = y_mtx.shape[1]
    print(f'\t\t Ground Truth: {gtcount} communities')
    return nxgraph, toIGraph(nxgraph), y, gtcount
Exemplo n.º 4
0
    def sample_pubmed(self):
        A, X, Y = load_data('pubmed')

        A = A + A.T
        A[A > 1] = 1
        

        nb_node = 4000
        nb_graph = 5
        graphs = []
        nodes = np.random.permutation(A.shape[0])
        g = 0
        for g in range(nb_graph):
            sel_nodes = nodes[nb_node * g: nb_node * (g +1)]
            _Y = Y[sel_nodes]
            _A = A[sel_nodes][:,sel_nodes]
            _X = X[sel_nodes]
            sel = utils.largest_connected_components(_A)
            _A = _A[sel][:,sel]
            _X = _X[sel]
            _Y = _Y[sel]
            print('Pubmed: num node {}, num edge {}'.format(len(list(sel)), _A.sum()))
            graphs.append([_A.astype(np.float32), sp.csr_matrix(_X), _Y])
        return graphs
Exemplo n.º 5
0
def load_data_test():
    d = gut.load_data()
    return d
Exemplo n.º 6
0
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=16,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data()
print(adj)

# Model and optimizer
model = GCN(nfeat=features.shape[1],
            nhid=args.hidden,
            nclass=labels.max().item() + 1,
            dropout=args.dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
args = Parser()
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
kwargs = {}

# load data
remote_dataset = [[], []]
# 画图用
loss_train_list = [[], [], [], [], []]
accuracy_train_list = [[], [], [], [], []]
loss_val_list = [[], [], [], []]
accuracy_val_list = [[], [], [], []]
adj, features, labels, idx_bob, idx_alice, idx_test, idx_val, idx_train = load_data(
)

if args.cuda:
    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()
    idx_bob = idx_bob.cuda()
    idx_alice = idx_alice.cuda()
    idx_test = idx_test.cuda()
    idx_val = idx_val.cuda()
    idx_train = idx_train.cuda()

bob_adj = adj.send(bob)
bob_features = features.send(bob)
bob_labels = labels[idx_bob].send(bob)
bob_idx = idx_bob.send(bob)
Exemplo n.º 8
0
 acc = []
 acc_class = []
 time_per_step = []
 smoothing = []
 total_time = []
 for r in range(configuration['repeating']):
     # Set random seed
     seed = model_config['random_seed']
     np.random.seed(seed)
     model_config[
         'random_seed'] = np.random.random_integers(
             1073741824)
     # Load data
     adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = \
         load_data(model_config['dataset'], train_size=model_config['train_size'],
                 validation_size=model_config['validation_size'],
                 model_config=model_config, shuffle=model_config['shuffle'], repeat_state=r)
     print('train size = ',
           model_config['train_size'], '\n',
           'lam = ', la, '\n',
           model_config['epochs'])
     # Initialize session
     with tf.Graph().as_default():
         tf.set_random_seed(seed)
         gpu_options = tf.GPUOptions(
             allow_growth=True)
         with tf.Session(config=tf.ConfigProto(
                 intra_op_parallelism_threads=
                 model_config['threads'],
                 inter_op_parallelism_threads=
                 2,  # model_config['threads'],
Exemplo n.º 9
0
        self.cost = norm * tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(
                logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)  # Adam Optimizer

        self.opt_op = self.optimizer.minimize(self.cost)
        self.grads_vars = self.optimizer.compute_gradients(self.cost)


# Given a training set of protein-protein interactions in yeast S. cerevisiae, our goal is to take these interactions
# and train a GCN model that can predict new protein-protein interactions. That is, we would like to predict new
# edges in the yeast protein interaction network.
print("Start")
# Check if regenerate_training_date is set to True: regenerate training/validation/test data
adj, adj_train, val_edges, val_edges_false, test_edges, test_edges_false = load_data()

num_nodes = adj.shape[0]
num_edges = adj.sum()

#
# Simple GCN: no node features (featureless). Substitute the identity matrix for the feature matrix: X = I
#
features = sparse_to_tuple(sp.identity(num_nodes))
num_features = features[2][1]
features_nonzero = features[1].shape[0]

#
# Store original adjacency matrix (without diagonal entries) for later
#
adj_orig = (adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape))
Exemplo n.º 10
0
flags.DEFINE_integer("op", 1, "Training or Test")
###############################
if_drop_edge = True
if_save_model = False
# if train the discriminator
if_train_dis = True
restore_trained_our = False
showed_target_idx = 0  # the target index group of targets you want to show
run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
###################################
### read and process the graph
model_str = FLAGS.model
dataset_str = FLAGS.dataset
# Load data
# _A_obs, _X_obs, _z_obs = utils.load_npz('data/citeseer.npz')
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    "citeseer")

# _A_obs = _A_obs + _A_obs.T #变GCN_ori as GCN
# _A_obs[_A_obs > 1] = 1
# adj = _A_obs

adj_norm, adj_norm_sparse = preprocess_graph(adj)

#_K = _z_obs.max()+1 #类别个数
_K = y_train.shape[1]
features_normlize = normalize(features.tocsr(), axis=0, norm='max')
features = sp.csr_matrix(features_normlize)

# adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
# adj = adj_train
if FLAGS.features == 0:
Exemplo n.º 11
0
def train(model_config, sess, seed, repeat_state, data_split=None):
    # Print model_config
    very_begining = time.time()
    print('',
          'name           : {}'.format(model_config['name']),
          'logdir         : {}'.format(model_config['logdir']),
          'dataset        : {}'.format(model_config['dataset']),
          'train_size     : {}'.format(model_config['train_size']),
          'learning_rate  : {}'.format(model_config['learning_rate']),
          'feature        : {}'.format(model_config['feature']),
          'logging        : {}'.format(model_config['logging']),
          sep='\n')

    if data_split:
        adj = data_split['adj']
        features = data_split['features']
        y_train = data_split['y_train']
        y_val = data_split['y_val']
        y_test = data_split['y_test']
        train_mask = data_split['train_mask']
        val_mask = data_split['val_mask']
        test_mask = data_split['test_mask']
        triplet = data_split['triplet']
    else:
        # Load data
        adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, size_of_each_class, triplet = \
            load_data(model_config['dataset'],train_size=model_config['train_size'],
                      validation_size=model_config['validation_size'],
                      model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state)
        stored_A = model_config['dataset']
        if model_config['drop_inter_class_edge']:
            adj = drop_inter_class_edge(adj)
            stored_A = model_config['dataset'] + '_drop'
        # preprocess_features
        begin = time.time()
        features = smooth(features,
                          adj,
                          model_config['smoothing'],
                          model_config,
                          stored_A=stored_A + '_A_I')
        print(time.time() - begin, 's')
        data_split = {
            'adj': adj,
            'features': features,
            'y_train': y_train,
            'y_val': y_val,
            'y_test': y_test,
            'train_mask': train_mask,
            'val_mask': val_mask,
            'test_mask': test_mask,
            'triplet': triplet
        }
    laplacian = sparse.diags(adj.sum(1).flat, 0) - adj
    laplacian = laplacian.astype(np.float32).tocoo()
    if type(model_config['t']) == int and model_config['t'] < 0:
        eta = adj.shape[0] / (adj.sum() / adj.shape[0])**len(
            model_config['connection'])
        model_config['t'] = (y_train.sum(axis=0) * 3 * eta /
                             y_train.sum()).astype(np.int64)
        print('t=', model_config['t'])

    # origin_adj = adj
    if model_config['Model'] == 0:
        pass
    elif model_config['Model'] in [1, 2, 3, 4]:
        # absorption probability
        print(
            'Calculating Absorption Probability...',
            # 's        :{}'.format(model_config['s']),
            'alpha    :{}'.format(model_config['alpha']),
            'type     :{}'.format(model_config['absorption_type']),
            sep='\n')
        if model_config['Model'] == 1:
            adj = Model1(adj, model_config['t'], model_config['alpha'],
                         model_config['absorption_type'])
        elif model_config['Model'] == 2:
            adj = Model2(adj, model_config['s'], model_config['alpha'],
                         y_train)
        elif model_config['Model'] == 3:
            # original_y_train = y_train
            y_train, train_mask = Model3(adj, model_config['s'],
                                         model_config['alpha'], y_train,
                                         train_mask)
        elif model_config['Model'] == 4:
            y_train, train_mask = Model4(adj, model_config['s'],
                                         model_config['alpha'], y_train,
                                         train_mask)
    elif model_config['Model'] == 5:
        adj = Model5(features, adj, model_config['mu'])
    elif model_config['Model'] == 6:
        adj = Model6(adj)
    elif model_config['Model'] == 7:
        y_train, train_mask = Model7(adj, model_config['s'],
                                     model_config['alpha'], y_train,
                                     train_mask, features)
    elif model_config['Model'] == 8:
        # original_y_train = y_train
        y_train, train_mask = Model8(adj, model_config['s'],
                                     model_config['alpha'], y_train,
                                     train_mask)
    elif model_config['Model'] == 9:
        y_train, train_mask = Model9(adj,
                                     model_config['t'],
                                     model_config['alpha'],
                                     y_train,
                                     train_mask,
                                     stored_A=stored_A + '_A_I')
    elif model_config['Model'] == 10:
        y_train, train_mask = Model10(adj,
                                      model_config['s'],
                                      model_config['t'],
                                      model_config['alpha'],
                                      y_train,
                                      train_mask,
                                      features,
                                      stored_A=stored_A + '_A_H')
    elif model_config['Model'] == 11:
        y = np.sum(train_mask)
        label_per_sample, sample2label = Model11(y, y_train, train_mask)
    elif model_config['Model'] == 12:
        pass
    elif model_config['Model'] == 13:
        y_train, train_mask = Model9(adj,
                                     model_config['t'],
                                     model_config['alpha'],
                                     y_train,
                                     train_mask,
                                     stored_A=stored_A + '_A_I')
        y = np.sum(train_mask)
        label_per_sample, sample2label = Model11(y, y_train, train_mask)
    elif model_config['Model'] == 14:
        y = np.sum(train_mask)
        label_per_sample, sample2label = Model11(y, y_train, train_mask)
    elif model_config['Model'] == 15:
        y_train, train_mask = Model9(adj,
                                     model_config['t'],
                                     model_config['alpha'],
                                     y_train,
                                     train_mask,
                                     stored_A=stored_A + '_A_I')
        y = np.sum(train_mask)
        label_per_sample, sample2label = Model11(y, y_train, train_mask)
    elif model_config['Model'] == 16:
        with tf.Graph().as_default():
            with tf.Session(config=tf.ConfigProto(
                    intra_op_parallelism_threads=model_config['threads'])
                            ) as sub_sess:
                tf.set_random_seed(seed)
                test_acc, test_acc_of_class, prediction = train(
                    model_config['Model_to_add_label'],
                    sub_sess,
                    seed,
                    data_split=data_split)
        y_train, train_mask = Model16(prediction, model_config['t'], y_train,
                                      train_mask)
        model_config = model_config['Model_to_predict']
        print('',
              'name           : {}'.format(model_config['name']),
              'logdir         : {}'.format(model_config['logdir']),
              'dataset        : {}'.format(model_config['dataset']),
              'train_size     : {}'.format(model_config['train_size']),
              'learning_rate  : {}'.format(model_config['learning_rate']),
              'feature        : {}'.format(model_config['feature']),
              'logging        : {}'.format(model_config['logging']),
              sep='\n')
    elif model_config['Model'] == 17:
        if model_config['smoothing'] is not None:
            stored_A = None
            adj = construct_knn_graph(features, model_config['k'])
        else:
            stored_A = stored_A + '_A_I'
        if model_config['drop_inter_class_edge']:
            stored_A = None
        test_acc, test_acc_of_class, prediction = Model17(
            adj,
            model_config['alpha'],
            y_train,
            train_mask,
            y_test,
            stored_A=stored_A)
        print("Test set results: accuracy= {:.5f}".format(test_acc))
        print("accuracy of each class=", test_acc_of_class)
        print("Total time={}s".format(time.time() - very_begining))
        return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time(
        ) - very_begining
    elif model_config['Model'] == 18:
        y_train, train_mask = Model9(adj,
                                     model_config['t'],
                                     model_config['alpha'],
                                     y_train,
                                     train_mask,
                                     stored_A=stored_A + '_A_I')
        alpha = 1e-6
        test_acc, test_acc_of_class, prediction = Model17(adj,
                                                          alpha,
                                                          y_train,
                                                          train_mask,
                                                          y_test,
                                                          stored_A=stored_A +
                                                          '_A_I')
        print("Test set results: accuracy= {:.5f}".format(test_acc))
        print("accuracy of each class=", test_acc_of_class)
        return test_acc, test_acc_of_class, prediction
    elif model_config['Model'] == 19:
        with tf.Graph().as_default():
            with tf.Session(config=tf.ConfigProto(
                    intra_op_parallelism_threads=model_config['threads'])
                            ) as sub_sess:
                tf.set_random_seed(seed)
                test_acc, test_acc_of_class, prediction = train(
                    model_config['Model_to_add_label'],
                    sub_sess,
                    seed,
                    data_split=data_split)
        stored_A = stored_A + '_A_I'
        # print(time.time()-very_begining)
        y_train, train_mask = Model19(prediction, model_config['t'], y_train,
                                      train_mask, adj, model_config['alpha'],
                                      stored_A, model_config['Model19'])
        # print(time.time()-very_begining)
        model_config = model_config['Model_to_predict']
        print('',
              'name           : {}'.format(model_config['name']),
              'logdir         : {}'.format(model_config['logdir']),
              'dataset        : {}'.format(model_config['dataset']),
              'train_size     : {}'.format(model_config['train_size']),
              'learning_rate  : {}'.format(model_config['learning_rate']),
              'feature        : {}'.format(model_config['feature']),
              'logging        : {}'.format(model_config['logging']),
              sep='\n')
    elif model_config['Model'] == 20:
        pass
    elif model_config['Model'] == 21:
        pass
    elif model_config['Model'] == 22:
        alpha = model_config['alpha']
        stored_A = stored_A + '_A_I'
        features = Model22(adj, features, alpha, stored_A)
    elif model_config['Model'] == 23:
        if model_config['classifier'] == 'tree':
            clf = tree.DecisionTreeClassifier(
                max_depth=model_config['tree_depth'])
            t = time.time()
            clf.fit(features[train_mask], np.argmax(y_train[train_mask],
                                                    axis=1))
            t = time.time() - t
            prediction = clf.predict(features[test_mask])
        elif model_config['classifier'] == 'svm':
            clf = svm.SVC(
            )  #kernel='rbf', gamma=model_config['gamma'], class_weight='balanced', degree=model_config['svm_degree'])
            t = time.time()
            clf.fit(features[train_mask], np.argmax(y_train[train_mask],
                                                    axis=1))
            t = time.time() - t
            prediction = clf.predict(features[test_mask])
        elif model_config['classifier'] == 'cnn':
            prediction, t = cnn.train(model_config, features, train_mask,
                                      y_train, test_mask, y_test)
        else:
            raise ValueError(
                "model_config['classifier'] should be in ['svm', 'tree']")
        test_acc = np.sum(prediction == np.argmax(y_test[test_mask],
                                                  axis=1)) / np.sum(test_mask)
        # test_acc = test_acc[0]
        one_hot_prediction = np.zeros(y_test[test_mask].shape)
        one_hot_prediction[np.arange(one_hot_prediction.shape[0]),
                           prediction] = 1
        test_acc_of_class = np.sum(one_hot_prediction * y_test[test_mask],
                                   axis=0) / np.sum(y_test[test_mask],
                                                    axis=0)  #TODO
        print("Test set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}".
              format(0., test_acc, 0.))
        print("accuracy of each class=", test_acc_of_class)
        print("Total time={}s".format(time.time() - very_begining))
        return test_acc, test_acc_of_class, prediction, size_of_each_class, t
    elif model_config['Model'] == 26:
        adj = Model26(adj,
                      model_config['t'],
                      model_config['alpha'],
                      y_train,
                      train_mask,
                      stored_A=stored_A + '_A_I')
    elif model_config['Model'] == 28:
        features = Model28(adj, features, stored_A, model_config['k'])
    else:
        raise ValueError(
            '''model_config['Model'] must be in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,'''
            ''' 11, 12, 13, 14, 15, 16, 17, 18], but is {} now'''.format(
                model_config['Model']))

    # Some preprocessing
    if model_config['connection'] == [
            'f' for i in range(len(model_config['connection']))
    ]:
        train_features = features[train_mask]
        val_features = features[val_mask]
        test_features = features[test_mask]
    else:
        train_features = features
        val_features = features
        test_features = features
    if sparse.issparse(features):
        train_features = sparse_to_tuple(train_features)
        val_features = sparse_to_tuple(val_features)
        test_features = sparse_to_tuple(test_features)
        features = sparse_to_tuple(features)

    if model_config['Model'] == 12:
        if model_config['k'] < 0:
            if hasattr(model_config['train_size'], '__getitem__'):
                eta = 0
                for i in model_config['train_size']:
                    eta += i
                eta /= adj.shape[0]
            else:
                eta = model_config['train_size'] / 100
            k = (1 / eta)**(1 / len(model_config['connection']))
            k = int(k)
        else:
            k = model_config['k']
        model_config['name'] += '_k{}'.format(k)
        support = Model12(adj, k)
        num_supports = len(support)
    elif model_config['conv'] == 'taubin':
        support = [
            sparse_to_tuple(
                taubin_smoothor(adj, model_config['taubin_lambda'],
                                model_config['taubin_mu'],
                                model_config['taubin_repeat']))
        ]
        num_supports = 1
    elif model_config['conv'] == 'test21':
        support = [
            sparse_to_tuple(
                Test21(adj,
                       model_config['alpha'],
                       beta=model_config['beta'],
                       stored_A=stored_A + '_A_I'))
        ]
        num_supports = 1
    elif model_config['conv'] == 'gcn':
        support = [preprocess_adj(adj)]
        num_supports = 1
    elif model_config['conv'] == 'gcn_unnorm':
        support = [sparse_to_tuple(adj.astype(np.float32))]
        num_supports = 1
    elif model_config['conv'] == 'gcn_noloop':
        support = [preprocess_adj(adj, loop=False)]
        num_supports = 1
    elif model_config['conv'] == 'gcn_rw':
        support = [preprocess_adj(adj, type='rw')]
        num_supports = 1
    elif model_config['conv'] in ['cheby', 'chebytheta']:
        # origin_adj_support = chebyshev_polynomials(origin_adj, model_config['max_degree'])
        support = chebyshev_polynomials(adj, model_config['max_degree'])
        num_supports = 1 + model_config['max_degree']
    else:
        raise ValueError('Invalid argument for model_config["conv"]: ' +
                         str(model_config['conv']))

    # Define placeholders
    placeholders = {
        'support': [
            tf.sparse_placeholder(tf.float32, name='support' + str(i))
            for i in range(num_supports)
        ],
        'features':
        tf.sparse_placeholder(tf.float32, name='features') if isinstance(
            features, tf.SparseTensorValue) else tf.placeholder(
                tf.float32, shape=[None, features.shape[1]], name='features'),
        'labels':
        tf.placeholder(tf.int32, name='labels',
                       shape=(None, y_train.shape[1])),
        'labels_mask':
        tf.placeholder(tf.int32, name='labels_mask'),
        'dropout':
        tf.placeholder_with_default(0., name='dropout', shape=()),
        'num_features_nonzero':
        tf.placeholder(tf.int32, name='num_features_nonzero'),
        # helper variable for sparse dropout
        'laplacian':
        tf.SparseTensor(indices=np.vstack([laplacian.row,
                                           laplacian.col]).transpose(),
                        values=laplacian.data,
                        dense_shape=laplacian.shape),
        'triplet':
        tf.placeholder(tf.int32, name='triplet', shape=(None, None)),
        'noise_sigma':
        tf.placeholder(tf.float32, name='noise_sigma'),
        'noise':
        tf.sparse_placeholder(tf.float32, name='features') if isinstance(
            features, tf.SparseTensorValue) else tf.placeholder(
                tf.float32, shape=[None, features.shape[1]], name='features')
    }
    if model_config['Model'] in [11, 13, 14, 15]:
        placeholders['label_per_sample'] = tf.placeholder(
            tf.float32,
            name='label_per_sample',
            shape=(None, label_per_sample.shape[1]))
        placeholders['sample2label'] = tf.placeholder(
            tf.float32,
            name='sample2label',
            shape=(label_per_sample.shape[1], y_train.shape[1]))

    # Create model
    model = GCN_MLP(model_config, placeholders, input_dim=train_features[2][1])

    # Random initialize
    sess.run(tf.global_variables_initializer())

    # Initialize FileWriter, saver & variables in graph
    train_writer = None
    valid_writer = None
    saver = None

    # Construct feed dictionary
    if model_config['connection'] == [
            'f' for i in range(len(model_config['connection']))
    ]:
        train_feed_dict = construct_feed_dict(
            train_features, support, y_train[train_mask],
            np.ones(train_mask.sum(), dtype=np.bool), triplet,
            model_config['noise_sigma'], placeholders)
        train_feed_dict.update(
            {placeholders['dropout']: model_config['dropout']})
        valid_feed_dict = construct_feed_dict(
            val_features, support, y_val[val_mask],
            np.ones(val_mask.sum(), dtype=np.bool), triplet, 0, placeholders)
        test_feed_dict = construct_feed_dict(
            test_features, support, y_test[test_mask],
            np.ones(test_mask.sum(), dtype=np.bool), triplet, 0, placeholders)
    else:
        train_feed_dict = construct_feed_dict(train_features, support, y_train,
                                              train_mask, triplet,
                                              model_config['noise_sigma'],
                                              placeholders)
        train_feed_dict.update(
            {placeholders['dropout']: model_config['dropout']})
        valid_feed_dict = construct_feed_dict(val_features, support, y_val,
                                              val_mask, triplet, 0,
                                              placeholders)
        test_feed_dict = construct_feed_dict(test_features, support, y_test,
                                             test_mask, triplet, 0,
                                             placeholders)

    if model_config['Model'] in [11, 13, 14, 15]:
        train_feed_dict.update(
            {placeholders['label_per_sample']: label_per_sample})
        train_feed_dict.update({placeholders['sample2label']: sample2label})
        valid_feed_dict.update(
            {placeholders['label_per_sample']: label_per_sample})
        valid_feed_dict.update({placeholders['sample2label']: sample2label})
        test_feed_dict.update(
            {placeholders['label_per_sample']: label_per_sample})
        test_feed_dict.update({placeholders['sample2label']: sample2label})

    # tmp = sess.run([model.prediction, model.sample2label], feed_dict=test_feed_dict)

    # Some support variables
    valid_loss_list = []
    max_valid_acc = 0
    max_train_acc = 0
    t_test = time.time()
    test_cost, test_acc, test_acc_of_class, prediction = sess.run(
        [
            model.loss, model.accuracy, model.accuracy_of_class,
            model.prediction
        ],
        feed_dict=test_feed_dict)
    test_duration = time.time() - t_test
    timer = 0
    begin = time.time()

    # print(time.time() - very_begining)
    if model_config['train']:
        # Train model
        print('training...')
        for step in range(model_config['epochs']):
            if model_config['Model'] in [
                    20, 21
            ] and step == model_config['epochs'] / 2:
                stored_A = stored_A + '_A_I'
                y_train, train_mask = Model20(prediction, model_config['t'],
                                              y_train, train_mask, adj,
                                              model_config['alpha'], stored_A)
                if model_config['Model'] == 21:
                    y_train, train_mask = Model16(prediction,
                                                  model_config['t2'], y_train,
                                                  train_mask)
                train_feed_dict = construct_feed_dict(
                    features, support, y_train, train_mask,
                    model_config['noise_sigma'], placeholders)
                train_feed_dict.update(
                    {placeholders['dropout']: model_config['dropout']})
                max_valid_acc = 0
                max_train_acc = 0

            # Training step
            if model_config['logdir'] and step % 100 == 0:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                t = time.time()
                sess.run(model.opt_op,
                         feed_dict=train_feed_dict,
                         options=run_options,
                         run_metadata=run_metadata)
                t = time.time() - t
                train_writer.add_run_metadata(run_metadata, 'step%d' % step)
                # Create the Timeline object, and write it to a json
                with open(path.join(model_config['logdir'], 'timeline.json'),
                          'w') as f:
                    f.write(
                        timeline.Timeline(run_metadata.step_stats).
                        generate_chrome_trace_format())
            else:
                t = time.time()
                if isinstance(train_features, tf.SparseTensorValue):
                    train_feed_dict.update({
                        placeholders['features']:
                        tf.SparseTensorValue(
                            train_features.indices, train_features.values +
                            np.random.normal(0, model_config['noise_sigma'],
                                             train_features.indices.shape[0]),
                            train_features.dense_shape)
                    })
                else:
                    train_feed_dict.update({
                        placeholders['features']:
                        train_features +
                        np.random.normal(0, model_config['noise_sigma'],
                                         train_features.shape)
                    })
                sess.run(model.opt_op, feed_dict=train_feed_dict)
                t = time.time() - t
            timer += t
            train_loss, train_acc, train_summary = sess.run(
                [model.loss, model.accuracy, model.summary],
                feed_dict=train_feed_dict)

            # Logging
            if model_config['logdir']:
                global_step = model.global_step.eval(session=sess)
                train_writer.add_summary(train_summary, global_step)
                valid_writer.add_summary(valid_summary, global_step)

            # If it's best performence so far, evalue on test set
            if model_config['validate']:
                valid_loss, valid_acc, valid_summary = sess.run(
                    [model.loss, model.accuracy, model.summary],
                    feed_dict=valid_feed_dict)
                valid_loss_list.append(valid_loss)
                if valid_acc >= max_valid_acc:
                    max_valid_acc = valid_acc
                    t_test = time.time()
                    test_cost, test_acc, test_acc_of_class = sess.run(
                        [model.loss, model.accuracy, model.accuracy_of_class],
                        feed_dict=test_feed_dict)
                    test_duration = time.time() - t_test
                    prediction = sess.run(model.prediction, train_feed_dict)
                    if args.verbose:
                        print('*', end='')
            else:
                if train_acc >= max_train_acc:
                    max_train_acc = train_acc
                    t_test = time.time()
                    test_cost, test_acc, test_acc_of_class = sess.run(
                        [model.loss, model.accuracy, model.accuracy_of_class],
                        feed_dict=test_feed_dict)
                    test_duration = time.time() - t_test
                    prediction = sess.run(model.prediction, train_feed_dict)
                    if args.verbose:
                        print('*', end='')

            # Print results
            if args.verbose:
                print("Epoch: {:04d}".format(step),
                      "train_loss= {:.3f}".format(train_loss),
                      "train_acc= {:.3f}".format(train_acc),
                      end=' ')
                if model_config['validate']:
                    print("val_loss=",
                          "{:.3f}".format(valid_loss),
                          "val_acc= {:.3f}".format(valid_acc),
                          end=' ')
                print("time=", "{:.5f}".format(t))

            if 0 < model_config['early_stopping'] < step \
                    and valid_loss_list[-1] > np.mean(valid_loss_list[-(model_config['early_stopping'] + 1):-1]):
                print("Early stopping...")
                break
        else:
            print("Optimization Finished!")

        # Testing
        print("Test set results:", "cost=", "{:.5f}".format(test_cost),
              "accuracy=", "{:.5f}".format(test_acc), "time=",
              "{:.5f}".format(test_duration))
        print("accuracy of each class=", test_acc_of_class)

        # Saving
        if model_config['logdir']:
            print('Save model to "{:s}"'.format(
                saver.save(sess=sess,
                           save_path=path.join(model_config['logdir'],
                                               'model.ckpt'),
                           global_step=global_step)))
    print("Total time={}s".format(time.time() - very_begining))
    return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time(
    ) - begin
Exemplo n.º 12
0
# In[2]:

from gcn.utils import load_data
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import networkx as nx
from scipy.sparse import coo_matrix
from scipy.sparse import csgraph
from tqdm import tqdm

from sklearn.svm import SVC

# In[3]:

adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    'cora')
features = features.A

# In[4]:


def one_hot_to_cat(X):
    '''
    shape of X = (n_samples, n_classes)
    '''
    return np.apply_along_axis(arr=X, axis=1, func1d=lambda x: np.argmax(x))


# In[34]:

model = SVC(decision_function_shape='ovr', C=1, kernel='linear')
Exemplo n.º 13
0
def train(model_config, sess, repeat_state):
    # Print model_name
    very_begining = time.time()
    print('',
          'name           : {}'.format(model_config['name']),
          'dataset        : {}'.format(model_config['dataset']),
          sep='\n')

    # Load data
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = \
        load_data(model_config['dataset'], train_size=model_config['train_size'],
                  validation_size=model_config['validation_size'],
                  model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state)

    if model_config['Model'] == 'LP':
        train_time = time.time()
        test_acc, test_acc_of_class = Model17(adj, model_config['alpha'],
                                              y_train, y_test)
        train_time = time.time() - train_time
        print("Test set results: accuracy= {:.5f}".format(test_acc))
        print("Total time={}s".format(time.time() - very_begining))
        return test_acc, test_acc_of_class, 0, train_time, train_time

    # preprocess_features
    if model_config['smooth_config']['type'] is not None:
        if model_config['connection'] == [
                'f' for i in range(len(model_config['connection']))
        ]:
            fetch = train_mask + val_mask + test_mask
            new_features = np.zeros(features.shape, dtype=features.dtype)
            new_features[fetch], smoothing_time = graphconv(
                features, adj, model_config['smooth_config'], fetch=fetch)
            features = new_features
        else:
            features, smoothing_time = graphconv(features, adj,
                                                 model_config['smooth_config'])
    else:
        smoothing_time = 0

    support = [preprocess_adj(adj)]
    num_supports = 1

    # Speed up for MLP
    is_mlp = model_config['connection'] == [
        'f' for _ in range(len(model_config['connection']))
    ]
    if is_mlp:
        train_features = features[train_mask]
        y_train = y_train[train_mask]
        y_train = y_train.astype(np.int32)

        val_features = features[val_mask]
        test_features = features[test_mask]
        labels_mask = np.ones(train_mask.sum(), dtype=np.int32)
    else:
        train_features = features
        val_features = features
        test_features = features
        labels_mask = train_mask.astype(np.int32)
        y_train = y_train.astype(np.int32)

    input_dim = features.shape[1]
    if sparse.issparse(features):
        train_features = sparse_to_tuple(train_features)
        val_features = sparse_to_tuple(val_features)
        test_features = sparse_to_tuple(test_features)
        features = sparse_to_tuple(features)

    # Define placeholders
    placeholders = {
        'labels':
        tf.placeholder_with_default(y_train,
                                    name='labels',
                                    shape=(None, y_train.shape[1])),
        'labels_mask':
        tf.placeholder_with_default(labels_mask,
                                    shape=(None),
                                    name='labels_mask'),
        'dropout':
        tf.placeholder_with_default(0., name='dropout', shape=()),
        'adj_nnz':
        tf.placeholder_with_default(support[0].values.shape,
                                    shape=(1),
                                    name='adj_nnz'),
    }
    if not is_mlp:
        placeholders['support'] = [
            tf.sparse_placeholder(tf.float32, name='support' + str(i))
            for i in range(num_supports)
        ]
    if isinstance(train_features, tf.SparseTensorValue):
        placeholders['num_features_nonzero'] = tf.placeholder_with_default(
            train_features[1].shape, shape=(1), name='num_features_nonzero')
        placeholders['features'] = tf.sparse_placeholder(tf.float32,
                                                         name='features')
    else:
        placeholders['num_features_nonzero'] = tf.placeholder_with_default(
            [0], shape=(1), name='num_features_nonzero')
        placeholders['features'] = tf.placeholder_with_default(
            train_features, shape=[None, features.shape[1]], name='features')

    # Create model
    model = IGCN(model_config, placeholders, input_dim=input_dim)

    # Random initialize
    sess.run(tf.global_variables_initializer())

    # Initialize FileWriter, saver & variables in graph
    train_writer = None
    valid_writer = None
    saver = tf.train.Saver()

    # Construct feed dictionary
    if is_mlp:
        if isinstance(features, tf.SparseTensorValue):
            train_feed_dict = {
                placeholders['features']: train_features,
                placeholders['dropout']: model_config['dropout'],
            }
        else:
            train_feed_dict = {
                placeholders['dropout']: model_config['dropout']
            }

        valid_feed_dict = construct_feed_dict(
            val_features, support, y_val[val_mask],
            np.ones(val_mask.sum(), dtype=np.bool), 0, placeholders)

        test_feed_dict = construct_feed_dict(
            test_features, support, y_test[test_mask],
            np.ones(test_mask.sum(), dtype=np.bool), 0, placeholders)
    else:
        train_feed_dict = construct_feed_dict(train_features, support, y_train,
                                              train_mask,
                                              model_config['dropout'],
                                              placeholders)
        valid_feed_dict = construct_feed_dict(val_features, support, y_val,
                                              val_mask, 0, placeholders)
        test_feed_dict = construct_feed_dict(test_features, support, y_test,
                                             test_mask, 0, placeholders)

    # Some support variables
    acc_list = []
    max_valid_acc = 0
    min_train_loss = 1000000
    t_test = time.time()

    sess.run(model.assign_data, feed_dict=test_feed_dict)
    test_cost, test_acc, test_acc_of_class = sess.run(
        [model.cross_entropy_loss, model.accuracy, model.accuracy_of_class])
    sess.run(model.assign_data, feed_dict=train_feed_dict)

    valid_loss, valid_acc, valid_summary = sess.run(
        [model.cross_entropy_loss, model.accuracy, model.summary],
        feed_dict=valid_feed_dict)
    test_duration = time.time() - t_test
    train_time = 0

    step = model_config['epochs']
    if model_config['train']:
        # Train model
        print('training...')
        for step in range(model_config['epochs']):

            # Training step
            t = time.time()
            sess.run(model.opt_op)
            t = time.time() - t
            train_time += t

            train_loss, train_acc = sess.run(
                [model.cross_entropy_loss, model.accuracy])

            # if True:
            if step > model_config['epochs'] * 0.9 or step % 20 == 0:
                # If it's best performence so far, evalue on test set
                if model_config['validate']:
                    sess.run(model.assign_data, feed_dict=valid_feed_dict)
                    valid_loss, valid_acc = sess.run(
                        [model.cross_entropy_loss, model.accuracy])

                    acc_list.append(valid_acc)
                    if valid_acc >= max_valid_acc:
                        max_valid_acc = valid_acc

                        t_test = time.time()
                        sess.run(model.assign_data, feed_dict=test_feed_dict)
                        test_cost, test_acc, test_acc_of_class = \
                            sess.run([model.cross_entropy_loss, model.accuracy, model.accuracy_of_class])
                        test_duration = time.time() - t_test
                        if args.verbose:
                            print('*', end='')
                else:
                    acc_list.append(train_acc)
                    if train_loss < min_train_loss:
                        min_train_loss = train_loss
                        t_test = time.time()
                        sess.run(model.assign_data, feed_dict=test_feed_dict)
                        test_cost, test_acc, test_acc_of_class = \
                            sess.run([model.cross_entropy_loss, model.accuracy, model.accuracy_of_class])
                        test_duration = time.time() - t_test
                        if args.verbose:
                            print('*', end='')
                sess.run(model.assign_data, feed_dict=train_feed_dict)

            # Print results
            if args.verbose:
                print("Epoch: {:04d}".format(step),
                      "train_loss= {:.3f}".format(train_loss),
                      "train_acc= {:.3f}".format(train_acc),
                      end=' ')
                if model_config['validate']:
                    print("val_loss=",
                          "{:.3f}".format(valid_loss),
                          "val_acc= {:.3f}".format(valid_acc),
                          end=' ')
                else:
                    print("test_loss=",
                          "{:.3f}".format(test_cost),
                          "test_acc= {:.3f}".format(test_acc),
                          end=' ')
                print("time=", "{:.5f}".format(t))

        print("Test set results:", "cost=", "{:.5f}".format(test_cost),
              "accuracy=", "{:.5f}".format(test_acc), "time=",
              "{:.5f}".format(test_duration))

        # Saving
        if model_config['logdir']:
            print('Save model to "{:s}"'.format(
                saver.save(sess=sess,
                           save_path=path.join(model_config['logdir'],
                                               'model.ckpt'))))
    print("Total time={}s".format(time.time() - very_begining))

    return test_acc, test_acc_of_class, train_time / step * 1000, smoothing_time, train_time + smoothing_time
Exemplo n.º 14
0
flags.DEFINE_float('learning_rate', 0.1, 'Initial learning rate.')  # 0.01
flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')  # 16
flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.')  # 16
flags.DEFINE_integer('hidden3', 16, 'Number of units in hidden layer 3.')  # 16
flags.DEFINE_integer('hidden4', 16, 'Number of units in hidden layer 4.')  # 16
flags.DEFINE_float('dropout', 0.01,
                   'Dropout rate (1 - keep probability).')  # 0.5
flags.DEFINE_float('weight_decay', 5e-4,
                   'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 20,
                     'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data(
    FLAGS.dataset)

# Some preprocessing
features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = models.GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = models.GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = models.MLP
Exemplo n.º 15
0
# parser.add_argument('--path', type=str, default='../data/cora/', help='data path')
parser.add_argument('--dataset', type=str, default='cora', help='dataset name')
parser.add_argument('--sub_dataset', type=str, default='', help='dataset name')
opt = parser.parse_args()
opt.cuda = not opt.no_cuda and torch.cuda.is_available()

np.random.seed(opt.seed)
torch.manual_seed(opt.seed)
if opt.cuda:
    torch.cuda.manual_seed(opt.seed)

# Download data
download_data(opt.dataset)

# Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data(opt.dataset,opt.sub_dataset)

# Model and optimizer
model = GCN(nfeat=features.shape[1],
            nhid=opt.hidden,
            nclass=labels.max().item() + 1,
            dropout=opt.dropout)
optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)

if opt.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()