Exemplo n.º 1
0
 def process_data(self):
     data = load_data('cora')
     adj, feas = data[:2]
     self.adj = adj.todense()
     self.normed_adj = preprocess_adj(adj)
     self.feas = preprocess_features(feas, False)
     self.y_train, self.y_val, self.y_test = data[2:5]
     self.train_mask, self.val_mask, self.test_mask = data[5:]
Exemplo n.º 2
0
def sgc_precompute(features, adj, K=2):
    adj = preprocess_adj(adj).tocoo()
    features = features.tocoo()
    t = perf_counter()
    for _ in range(K):
        features = adj.dot(features)
    precompute_time = perf_counter() - t

    return features, precompute_time
Exemplo n.º 3
0
def run(args):
    (
        adj,
        features,
        y_train,
        y_val,
        y_test,
        train_mask,
        val_mask,
        test_mask,
        train_size,
        test_size,
    ) = load_corpus(args.select_data)

    train_mask = train_mask + val_mask
    y_train = y_train + y_val

    adj_dense = preprocess_adj(adj).toarray().astype(np.float32)
    features_dense = preprocess_features(features).toarray().astype(np.float32)

    y_train = y_train.astype(np.float32)
    y_test = y_test.astype(np.float32)
    train_mask = train_mask.astype(np.float32)
    test_mask = test_mask.astype(np.float32)

    gcn_model = GCN(
        tf.convert_to_tensor(adj_dense),
        layers=args.layers,
        hidden_size=args.hidden_size,
        dropout=args.dropout,
    )

    loss_fn = masked_softmax_cross_entropy

    # acc_fn = masked_accuracy

    optimizer = Adam(learning_rate=args.lr)
    # print("Model Layers: ", gcn_model.trainable_variables)
    model_textGCN = TextGCN(model=gcn_model,
                            loss=loss_fn,
                            optimizer=optimizer,
                            args=args)

    model_textGCN.train(features_dense, y_train, train_mask)

    sns.distplot(model_textGCN.train_accuracy)
    plt.savefig("train_acc.png")

    plt.clf()

    sns.distplot(model_textGCN.train_losses)
    plt.savefig("train_losses.png")

    eval_result = model_textGCN.evaluate(features_dense, y_test, test_mask)

    print(f"Final Evaluation Result: {eval_result}")
Exemplo n.º 4
0
 def __init__(self, adj, x, y, W, b, K=2, normalize_grad=True):
     self.num_classes = y.max() + 1
     self.normalize_grad = normalize_grad
     self.num_nodes = adj.shape[0]
     self.K = K
     self.surrogate = Surrogate(x @ W, b, K=K)
     self.shape = (self.num_nodes, self.num_nodes)
     self.adj = adj
     self.adj_sparse = utils.sparse_to_tuple(utils.preprocess_adj(adj))
     self.y = y
     self.y_onehot = np.eye(int(self.num_classes))[y]
Exemplo n.º 5
0
    def __init__(self, args):
        print("prepare data")
        self.graph_path = "data/graph"
        self.args = args

        # graph
        graph = nx.read_weighted_edgelist(f"{self.graph_path}/{args.dataset}.txt"
                                          , nodetype=int)
        print_graph_detail(graph)
        adj = nx.to_scipy_sparse_matrix(graph,
                                        nodelist=list(range(graph.number_of_nodes())),
                                        weight='weight',
                                        dtype=np.float)

        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

        self.adj = preprocess_adj(adj, is_sparse=True)

        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

        # features
        self.nfeat_dim = graph.number_of_nodes()
        row = list(range(self.nfeat_dim))
        col = list(range(self.nfeat_dim))
        value = [1.] * self.nfeat_dim
        shape = (self.nfeat_dim, self.nfeat_dim)
        indices = th.from_numpy(
                np.vstack((row, col)).astype(np.int64))
        values = th.FloatTensor(value)
        shape = th.Size(shape)

        self.features = th.sparse.FloatTensor(indices, values, shape)

        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
        # target

        target_fn = f"data/text_dataset/{self.args.dataset}.txt"
        target = np.array(pd.read_csv(target_fn,
                                      sep="\t",
                                      header=None)[2])
        target2id = {label: indx for indx, label in enumerate(set(target))}
        self.target = [target2id[label] for label in target]
        self.nclass = len(target2id)

        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
        # train val test split

        self.train_lst, self.test_lst = get_train_test(target_fn)
Exemplo n.º 6
0
def main(args):
    #
    save_dir = args.save_dir
    log_dir = args.log_dir
    train_dir = args.data_dir

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data(
        args.data_type)
    features = utils.preprocess_features(features)
    support = [utils.preprocess_adj(adj)]
    args.num_supports = 1
    args.input_size, args.features_size = features[2][1], features[2]
    args.output_size = y_train.shape[1]

    config_proto = utils.get_config_proto()
    sess = tf.Session(config=config_proto)
    model = GCN(args, sess, name="gcn")
    summary_writer = tf.summary.FileWriter(log_dir)

    for epoch in range(1, args.nb_epoch + 1):
        epoch_start_time = time.time()

        feed_dict = utils.construct_feed_dict(model, features, support,
                                              y_train, train_mask)
        _, train_loss, train_acc, summaries = model.train(feed_dict)

        if epoch % args.summary_epoch == 0:
            summary_writer.add_summary(summaries, epoch)

        if epoch % args.print_epoch == 0:
            feed_dict_val = utils.construct_feed_dict(model, features, support,
                                                      y_val, val_mask)
            val_loss, val_acc = model.evaluate(feed_dict_val)
            print "epoch %d, train_loss %f, train_acc %f, val_loss %f, val_acc %f, time %.5fs" % \
              (epoch, train_loss, train_acc, val_loss, val_acc, time.time()-epoch_start_time)

        if args.anneal and epoch >= args.anneal_start:
            sess.run(model.lr_decay_op)

    model.saver.save(sess, os.path.join(save_dir, "model.ckpt"))
    print "Model stored...."
Exemplo n.º 7
0
def mics_graph_matrix(num_subject, graph_folder, GRAPH_ADJ, FILTER,
                      MAX_DEGREE):
    """Generate graph matrix for GCNN

    Args:
        num_subject (int): number of subject for data
        graph_folder (str): location of folder for graph
        GRAPH_ADJ (str): the filename of graph
        FILTER (str): type of gcnn filter
        MAX_DEGREE (int): degree of Chebyshev polynomial

    Returns:
        Tuple: contains the graph_matrix and number of support used for GCNN

    Raises:
        Exception: invalid FILTER type
    """
    SYM_NORM = True  # symmetric (True) vs. left-only (False) normalization

    # build the graph
    A = load_graph(dimension=num_subject, path=graph_folder, graph=GRAPH_ADJ)

    # estimate the laplacian
    if FILTER == 'localpool':
        """ Local pooling filters
        (see 'renormalization trick' in Kipf & Welling, arXiv 2016)
        """
        print('Using local pooling filters...')
        A_ = preprocess_adj(A, SYM_NORM)
        support = 1
        graph_matrix = [A_]
    elif FILTER == 'chebyshev':
        """ Chebyshev polynomial basis filters
        (Defferard et al., NIPS 2016)
        """
        print('Using Chebyshev polynomial basis filters...')
        L = normalized_laplacian(A, SYM_NORM)
        L_scaled = rescale_laplacian(L)
        T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)
        support = MAX_DEGREE + 1
        graph_matrix = T_k
    else:
        raise Exception('Invalid filter type.')

    return graph_matrix, support
Exemplo n.º 8
0
def get_feature(dataset):

    Features_decrease, adj_decrease, edge_decrease, full_feature_decrease = [], [], [], []
    Interactions, smiles = [], []

    for x, label, w, smile in dataset.itersamples():

        # The smile is used to extract molecular fingerprints
        smiles.append(smile)

        interaction = label
        Interactions.append(interaction)

        mol = Chem.MolFromSmiles(smile)

        if not mol:
            raise ValueError("Could not parse SMILES string:", smile)

        # increased order
        feature_increase = x.get_atom_features()
        iAdjTmp_increase = create_adjacency(mol)

        # decreased order
        # Turn the data upside down
        feature_decrease = flip(feature_increase, 0)
        iAdjTmp_decrease = flip(iAdjTmp_increase, 0)

        # Obtaining fixed-size molecular input data
        iFeature_decrease, adjacency_decrease = fix_input(
            feature_decrease, iAdjTmp_decrease)

        Features_decrease.append(np.array(iFeature_decrease))
        normed_adj_decrease = preprocess_adj(adjacency_decrease)
        adj_decrease.append(normed_adj_decrease)

        #Transforms data into PyTorch Geometrics specific data format.
        index = np.array(np.where(iAdjTmp_decrease == 1))
        edge_index = torch.from_numpy(index).long()
        edge_decrease.append(edge_index)

        feature = torch.from_numpy(feature_decrease.copy()).float()
        full_feature_decrease.append(feature)

    return Features_decrease, adj_decrease, edge_decrease, full_feature_decrease, Interactions, smiles
Exemplo n.º 9
0
def build_model(adj, features, n_classes):
    placeholders = {
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, n_classes)),
        'labels_mask':
        tf.placeholder(tf.int32),
        'noise':
        tf.placeholder(tf.float32, shape=()),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
        'alfa':
        tf.placeholder(tf.float32, shape=()),
        'beta':
        tf.placeholder(tf.float32, shape=()),
    }

    if FLAGS.model == 'COOL':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = GCN

    elif FLAGS.model == 'COOLnorm':
        support = [
            sparse_to_tuple(
                preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold))
        ]
        model_func = GCN

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    placeholders['support'] = [
        tf.sparse_placeholder(tf.float32) for _ in support
    ]

    model = model_func(placeholders)
    return model, support, placeholders
Exemplo n.º 10
0
    def __init__(self, smiles):
        featurizer = dc.feat.graph_features.ConvMolFeaturizer()
        self.Full_features, self.Full_normed_adj, self.Full_fringer, self.Full_interactions = [], [], [], []
        for i, smile in enumerate(smiles):
            mol = Chem.MolFromSmiles(str(smile))
            if not mol:
                raise ValueError("Could not parse SMILES string:", smile)

            x = featurizer.featurize([mol])[0]

            # increased order
            feature_increase = x.get_atom_features()
            iAdjTmp_increase = create_adjacency(mol)

            # decreased order
            # Turn the data upside down
            feature_decrease = flip(feature_increase, 0)
            iAdjTmp_decrease = flip(iAdjTmp_increase, 0)

            # Obtaining fixed-size molecular input data
            iFeature_decrease, adjacency_decrease = fix_input(feature_decrease, iAdjTmp_decrease)

            Features_decrease = np.array(iFeature_decrease)
            adj_decrease = preprocess_adj(adjacency_decrease)
            fingerprints = calc(mol)[:fingerprint_size]

            self.Full_features.append(Features_decrease)
            self.Full_normed_adj.append(adj_decrease)
            self.Full_fringer.append(fingerprints)
            self.Full_interactions.append([0])

        self.Full_features = tensoring(self.Full_features)
        self.Full_normed_adj = tensoring(self.Full_normed_adj)
        self.Full_fringer = tensoring(self.Full_fringer)
        self.Full_interactions = tensoring(self.Full_interactions)

        self.dataset = list(zip(np.array(self.Full_features), np.array(self.Full_normed_adj), np.array(self.Full_fringer), np.array(self.Full_interactions)))
def get_data(dataset):
    # Load output_data
    (adj, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size,
     test_size) = utils.load_data(dataset)

    features = sparse.identity(adj.shape[1])

    # Some preprocessing
    features = utils.preprocess_features(features)
    support = [utils.preprocess_adj(adj)]

    # Define placeholders
    t_features = torch.from_numpy(features)
    t_y_train = torch.from_numpy(y_train)
    t_y_val = torch.from_numpy(y_val)
    t_y_test = torch.from_numpy(y_test)
    t_train_mask = torch.from_numpy(train_mask.astype(np.float32))

    t_support = []
    for i in range(len(support)):
        t_support.append(torch.Tensor(support[i]))

    return (t_features, t_y_train, t_y_val, t_y_test, t_train_mask, t_support,
            val_mask, test_mask, train_size, test_size)
Exemplo n.º 12
0
def get_model_and_support(model_string, adj, initial_train, train_mask, val_mask, test_mask, with_test):
    if model_string == 'gcn':
        support = [preprocess_adj(adj)]
        num_supports = 1
        model_func = GCN
        if with_test:
            sub_sampled_support = support
        else:  # cut the test and validation features
            initial_sample_list = get_list_from_mask(initial_train)
            sub_sampled_support = get_masked_adj(adj, initial_sample_list)
            sub_sampled_support = [preprocess_adj(sub_sampled_support)]
            # A = adj.toarray()[initial_sample_list]
            # one_hop_sample_list = np.argwhere(np.sum(A, axis=0))
            # sub_sampled_support_second = [
            #     get_sub_sampled_support(complete_support=support[0], node_to_keep=one_hop_sample_list)
            # ]

            return model_func, support, sub_sampled_support, num_supports

    elif model_string == 'gcn_cheby':
        support = chebyshev_polynomials(adj, FLAGS.max_degree)
        sub_sampled_support = support
        num_supports = 1 + FLAGS.max_degree
        model_func = GCN
    elif model_string == 'dense':
        support = [preprocess_adj(adj)]  # Not used
        sub_sampled_support = support  # Not used
        num_supports = 1
        model_func = MLP
    elif model_string == 'k-nn':
        support = [preprocess_adj(adj)]  # Not used
        sub_sampled_support = support  # Not used
        num_supports = 1
        model_func = None
    elif model_string == 'gcn_subsampled':  # FLOFLO's making
        

        support = [preprocess_adj(adj)]
       
        num_supports = 1
        model_func = GCN
        if with_test:
            initial_sample_list = get_list_from_mask(train_mask + val_mask + test_mask)
        else:
            initial_sample_list = get_list_from_mask(train_mask)
        sub_sampled_support = get_masked_adj(adj, initial_sample_list)
        sub_sampled_support = [preprocess_adj(sub_sampled_support)]
       
        # A = adj.toarray()[initial_sample_list]
        # one_hop_sample_list = np.argwhere(np.sum(A, axis=0))
       
        # sub_sampled_support_second = [
        #     get_sub_sampled_support(complete_support=support[0], node_to_keep=one_hop_sample_list)
        # ]

        return model_func, support, sub_sampled_support, num_supports

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    return model_func, support, sub_sampled_support, num_supports
Exemplo n.º 13
0
# =============================================================================
# Data loading
# =============================================================================
train_df = utils.datafeeder2(np.load("trainX.npy"),\
                            np.load("trainY.npy"))
valid_df = utils.datafeeder2(np.load("validX.npy"),\
                            np.load("validY.npy"))
test_df = utils.datafeeder2(np.load("testX.npy"),\
                           np.load("testY.npy"))

# =============================================================================
# PPI network loading and D^(-1/2)AD^(-1/2) matrix calculation
# =============================================================================
#Real ppi
ppi_matrix = np.load("ppi2.npy")
nom_adj_matrix = utils.preprocess_adj(ppi_matrix)
nom_adj_matrix2 = utils.preprocess_adj2(ppi_matrix)

# Fake ppi
fake_ppi = utils.ransomize_ppi(ppi_matrix)
nom_fake = utils.preprocess_adj(fake_ppi)
nom_fake2 = utils.preprocess_adj2(fake_ppi)

# No interaction at all
nom_noitx = utils.preprocess_adj(np.zeros((16559, 16559)))
nom_noitx2 = utils.preprocess_adj2(np.zeros((16559, 16559)))

# =============================================================================
# Training models
# =============================================================================
model1 = PPiConv(nom_adj_matrix, gc.convolutionGraph)
Exemplo n.º 14
0
    def __init__(self, adj, x, y, hidden=16, name="",
                 with_relu=True, params_dict={'dropout': 0.5}, gpu_id=None,
                 seed=None):
        adj = utils.preprocess_adj(adj)
        num_features = x.shape[1]
        num_classes = y.max() + 1

        self.graph = tf.Graph()
        with self.graph.as_default():
            if seed:
                tf.set_random_seed(seed)

            with tf.variable_scope(name) as scope:
                w_init = glorot_uniform
                self.name = name

                self.dropout = params_dict. get('dropout', 0.)
                if not with_relu:
                    self.dropout = 0

                self.learning_rate = params_dict. get('learning_rate', 0.01)

                self.weight_decay = params_dict. get('weight_decay', 5e-4)
                self.N, self.D = x.shape

                self.node_ids = tf.placeholder(tf.int32, [None], 'node_ids')
                self.node_labels = tf.placeholder(tf.int32, [None, num_classes], 'node_labels')

                # bool placeholder to turn on dropout during training
                self.training = tf.placeholder_with_default(False, shape=())

                self.labels = np.eye(num_classes)[y]
                self.adj = tf.SparseTensor(*utils.sparse_to_tuple(adj))
                self.adj = tf.cast(self.adj, tf.float32)
                self.X_sparse = tf.SparseTensor(*utils.sparse_to_tuple(x))
                self.X_sparse = tf.cast(self.X_sparse, tf.float32)
                self.X_dropout = sparse_dropout(self.X_sparse, 1 - self.dropout,
                                                (int(self.X_sparse.values.get_shape()[0]),))
                # only use drop-out during training
                self.X_comp = tf.cond(self.training,
                                      lambda: self.X_dropout,
                                      lambda: self.X_sparse) if self.dropout > 0. else self.X_sparse

                self.W1 = tf.get_variable('W1', [self.D, hidden], tf.float32, initializer=w_init())
                self.b1 = tf.get_variable('b1', dtype=tf.float32, initializer=tf.zeros(hidden))

                self.h1 = spdot(self.adj, spdot(self.X_comp, self.W1))

                if with_relu:
                    self.h1 = tf.nn.relu(self.h1 + self.b1)

                self.h1_dropout = tf.nn.dropout(self.h1, rate=self.dropout)

                self.h1_comp = tf.cond(self.training,
                                       lambda: self.h1_dropout,
                                       lambda: self.h1) if self.dropout > 0. else self.h1

                self.W2 = tf.get_variable('W2', [hidden, num_classes], tf.float32, initializer=w_init())
                self.b2 = tf.get_variable('b2', dtype=tf.float32, initializer=tf.zeros(num_classes))

                self.logits = spdot(self.adj, dot(self.h1_comp, self.W2))
                if with_relu:
                    self.logits += self.b2
                self.logits_gather = tf.gather(self.logits, self.node_ids)

                self.predictions = tf.nn.softmax(self.logits_gather)

                self.loss_per_node = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_gather,
                                                                                labels=self.node_labels)
                self.loss = tf.reduce_mean(self.loss_per_node)

                # weight decay only on the first layer, to match the original implementation
                if with_relu:
                    self.loss += self.weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in [self.W1, self.b1]])

                var_l = [self.W1, self.W2]
                if with_relu:
                    var_l.extend([self.b1, self.b2])
                self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss,
                                                                                                  var_list=var_l)

                self.varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
                self.local_init_op = tf.variables_initializer(self.varlist)

                if gpu_id is None:
                    config = tf.ConfigProto(
                        device_count={'GPU': 0}
                    )
                else:
                    gpu_options = tf.GPUOptions(visible_device_list='{}'.format(gpu_id), allow_growth=True)
                    config = tf.ConfigProto(gpu_options=gpu_options)

                self.session = tf.Session(config=config)
                self.init_op = tf.global_variables_initializer()
                self.session.run(self.init_op)
Exemplo n.º 15
0
def build_model(adj, features, n_classes, subgraphs):
    perturbation = None
    placeholders = {
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, n_classes)),
        'labels_mask':
        tf.placeholder(tf.int32),
        'noise':
        tf.placeholder(tf.float32, shape=()),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
    }

    if FLAGS.model == 'gcn':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = GCN

    elif FLAGS.model == 'gcnR':
        support = [sparse_to_tuple(adj)]
        model_func = GCN

    elif FLAGS.model == 'gcnT':
        support = [
            sparse_to_tuple(
                preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold))
        ]
        model_func = GCN

    elif FLAGS.model == 'fishergcn' or FLAGS.model == 'fishergcnT':

        if FLAGS.model == 'fishergcn':
            A = preprocess_adj(adj)
        else:
            A = preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold)

        N = adj.shape[0]
        L = sp.eye(N) - A

        if FLAGS.fisher_freq == 0:
            #nsubgraphs = subgraphs.shape[1]
            #V = block_krylov( A, FLAGS.fisher_rank+nsubgraphs )
            #V = V[:,:FLAGS.fisher_rank]

            V = block_krylov(A, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 1:
            # if the graph contains one large component and small isolated components
            # only perturb the largest connected component
            subgraph_sizes = subgraphs.sum(0)
            largest_idx = np.argmax(subgraph_sizes)
            isolated = np.nonzero(1 - subgraphs[:, largest_idx])[0]
            L = L.tolil()
            L[:, isolated] = 0
            L[isolated, :] = 0
            L = L.tocsr()

            V = block_krylov(L, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 2:
            V, _ = np.linalg.qr(np.random.randn(N, FLAGS.fisher_rank))
            w = np.ones(FLAGS.fisher_rank)

        else:
            print('unknown frequency:', FLAGS.fisher_freq)
            sys.exit(0)

        perturbation = make_perturbation(V, w, placeholders['noise'],
                                         FLAGS.fisher_adversary)
        support = [sparse_to_tuple(A)]
        model_func = GCN

    elif FLAGS.model == 'chebynet':
        support = chebyshev_polynomials(adj, FLAGS.max_degree)
        model_func = GCN

    elif FLAGS.model == 'mlp':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = MLP

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    try:
        _, _values, _shape = support[0]
        print("sparsity: {0:.2f}%".format(100 * (_values > 0).sum() /
                                          (_shape[0] * _shape[1])))
    except:
        pass
    placeholders['support'] = [
        tf.sparse_placeholder(tf.float32) for _ in support
    ]

    model = model_func(placeholders,
                       perturbation=perturbation,
                       subgraphs=subgraphs)
    return model, support, placeholders
Exemplo n.º 16
0
    val_mask = np.zeros(n, dtype=bool)
    test_mask = np.zeros(n, dtype=bool)

    train_mask[train_index[0:val_cut]] = True
    val_mask[train_index[val_cut:]] = True
    test_mask[test_index] = True

    y_train = np.zeros(labels.shape, dtype=int)
    y_val = np.zeros(labels.shape, dtype=int)
    y_test = np.zeros(labels.shape, dtype=int)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    masked_adjacency = get_masked_adj(adj, train_index[0:val_cut])
    masked_adjacency = preprocess_adj(masked_adjacency)
    adjacency = preprocess_adj(adj)

    # masked_adjacency = preprocess_adj(masked_adjacency)
    #Remove links for the first adjacency

    hyperparam_search = []

    # Define model evaluation function
    def evaluate(sess, features, adjacency, masked_adjacency, labels, mask,
                 placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, adjacency, labels, mask,
                                            masked_adjacency, placeholders)
        outs_val = sess.run([model.loss, model.accuracy,
                             model.predict()],
Exemplo n.º 17
0
def train(G):
    print(G.graph)
    print('Extracting user graph...')
    userG = user_graph(G)

    # labels
    print('Obtaining labels...')
    future = pd.read_csv(
        constants.DATA_HOME +
        "user_scores/{}_2014_wf{:02d}.csv".format("politics", 1))
    future = future.set_index("user")
    future['label'] = np.sign(future["sum"] -
                              np.percentile(future["sum"], 90) - 1e-10)
    labels = []
    for userid in userG.nodes():
        if userid in future.index:
            labels.append(future.loc[userid]['label'])
        else:
            #print('%s does not have label.' % userid)
            userG.remove_node(userid)
    labels = np.array(labels, dtype=np.int)
    # convert to 0/1 labels
    labels = [0 if l < 0 else 1 for l in labels]

    print('Extracting user features...')
    max_deg = max(G.degree(userG.nodes()).values())
    print('Max user degree: ', max_deg)
    features = []
    neighbor_dict = {}
    feature_size = 0
    for node in userG.nodes():
        #feature_size, feature_idx = extract_user_features_simple(G, node, max_deg, neighbor_dict)
        feature_size, feature_idx = extract_user_features_simple(
            G, node, max_deg)
        features.append(feature_idx)

    # a list of all features corresponding to posts/comments
    #neighbor_features = np.zeros((len(neighbor_dict), feature_size))
    #for idx, vec in neighbor_dict.values():
    #  neighbor_features[idx] = vec
    features = np.stack(features, axis=0)
    #features = np.array(features)
    print('Feature dimensions: ', features.shape)

    # data split
    n = userG.number_of_nodes()
    n1 = int(math.ceil(n * 0.7))
    n2 = int(math.ceil(n * 0.8))
    train_mask = np.array([1 if i < n1 else 0 for i in range(n)])
    val_mask = np.array([1 if n1 <= i < n2 else 0 for i in range(n)])
    test_mask = np.array([1 if n2 <= i else 0 for i in range(n)])

    train_labels = np.zeros((n, 2))
    train_labels[np.arange(n1), labels[:n1]] = 1
    val_labels = np.zeros((n, 2))
    val_labels[np.arange(n1, n2), labels[n1:n2]] = 1
    test_labels = np.zeros((n, 2))
    test_labels[np.arange(n2, n), labels[n2:]] = 1

    adj = nx.adjacency_matrix(userG)

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32)],
        'features':
        tf.placeholder(tf.float32,
                       shape=(None, features.shape[1], features.shape[2])),
        #'features': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        #'neighbor_features': tf.placeholder(tf.float32, shape=neighbor_features.shape),
        'labels':
        tf.placeholder(tf.float32, shape=(None, train_labels.shape[1])),
        'labels_mask':
        tf.placeholder(tf.int32),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
    }

    # neural network model
    layer_sizes = [features.shape[2], 10, 1]
    model = GCN_multipartite(placeholders, layer_sizes, logging=True)

    # Memory usage options
    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION

    init = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    # Start running operations on the Graph.
    sess = tf.Session(config=config)
    sess.run(init)
    summary_writer = tf.summary.FileWriter(FLAGS.train_log_dir, sess.graph)

    cost_val = []

    # Train model
    print('Training...')
    for epoch in range(FLAGS.epochs):

        support = [utils.preprocess_adj(adj)]
        # Construct feed dictionary
        feed_dict = utils.construct_feed_dict(features,
                                              support,
                                              train_labels,
                                              train_mask,
                                              placeholders,
                                              sparse_inputs=False)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})

        # Training step
        start_time = time.time()
        outs = sess.run([model.opt_op, model.loss, model.accuracy],
                        feed_dict=feed_dict)
        duration = time.time() - start_time
        print(duration)

        # Validation
        cost, acc, y_pred, duration_val = evaluate(sess, model, features,
                                                   support, val_labels,
                                                   val_mask, placeholders)
        cost_val.append(cost)

        y_true = np.argmax(val_labels, 1)
        y_pred = y_pred[n1:n2]
        y_true = y_true[n1:n2]
        precision = sk.metrics.precision_score(y_true, y_pred)
        recall = sk.metrics.recall_score(y_true, y_pred)
        f1 = sk.metrics.f1_score(y_true, y_pred)

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]),
              "train_time=", "{:.5f}".format(duration), "val_acc=",
              "{:.5f}".format(acc), 'val_f1=', '{:.5f}'.format(f1),
              "val_time=", "{:.5f}".format(duration_val))

        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, epoch)
Exemplo n.º 18
0
if dataset == 'nell.0.001':
    features = load_nell(dataset)[1]
else:
    features = load_data(dataset)[1]

with open(changedadj_path, 'rb') as load_cha_adj:
    changed_adj = pickle.load(load_cha_adj)

# Some preprocessing
if FLAGS.features == 0:
    changed_features = preprocess_features(changed_adj +
                                           sp.eye(changed_adj.shape[0]))
else:
    changed_features = preprocess_features(features)

support = [preprocess_adj(changed_adj)]
num_supports = 1
model_func = GCN

# Define placeholders
placeholders = {
    'support':
    [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features':
    tf.sparse_placeholder(tf.float32,
                          shape=tf.constant(changed_features[2],
                                            dtype=tf.int64)),
    'dropout':
    tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero':
    tf.placeholder(tf.int32),
Exemplo n.º 19
0
        'adjacency':
        tf.sparse_placeholder(tf.float32),
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, labels.shape[1])),
        'labels_mask':
        tf.placeholder(tf.int32),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero':
        tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    adjacency = preprocess_adj(adj)
    # Create model
    model = GCNN(placeholders, input_dim=features[2][1])

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, adjacency, labels, mask, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, adjacency, labels, mask,
                                            adjacency, placeholders)
        outs_val = sess.run([model.loss, model.accuracy,
                             model.predict()],
                            feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test), outs_val[2]
Exemplo n.º 20
0
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4,
                   'Weight for L2 loss on embedding matrix.')
flags.DEFINE_string('gpu', '1', 'GPU selection.')

os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    '../data', 'cora')

# Some preprocessing
features_dense, features = preprocess_features(features)
support = [preprocess_adj(adj)]
num_supports = 1
model_func = GCN

# Define placeholders
placeholders = {
    'support':
    [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.placeholder(tf.float32, shape=features[2]),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero':
    tf.placeholder(tf.int32)  # helper variable for sparse dropout
}
Exemplo n.º 21
0
                   'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10,
                     'Tolerance for early stopping (# of epochs).')

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    FLAGS.dataset)
total_edges = adj.data.shape[0]
n_node = adj.shape[0]
# Some preprocessing
features = preprocess_features(features)
# for non sparse
features = sp.coo_matrix((features[1], (features[0][:, 0], features[0][:, 1])),
                         shape=features[2]).toarray()

support = preprocess_adj(adj)
# for non sparse
support = [
    sp.coo_matrix((support[1], (support[0][:, 0], support[0][:, 1])),
                  shape=support[2]).toarray()
]
num_supports = 1
model_func = GCN

save_name = 'nat_' + FLAGS.dataset
if not os.path.exists(save_name):
    os.makedirs(save_name)
# Define placeholders
placeholders = {
    's': [
        tf.sparse_placeholder(tf.float32, shape=(n_node, n_node))
Exemplo n.º 22
0
import torch 
import numpy as np
import pickle
from utils import load_data,preprocess_features,preprocess_adj,tuple_to_torchSparseTensor
from gcn_model import GCN

adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data("cora")

adj_hat = preprocess_adj(adj)
features = preprocess_features(features)
# features[0].shape == (49216, 2) 
# features[1].sahpe == (49216,) 
# features[2] === (2708, 1433)  

# Convert to torch.Tensor
sparse_adj_hat = tuple_to_torchSparseTensor(adj_hat)
sparse_features = tuple_to_torchSparseTensor(features)

y_train = torch.FloatTensor(y_train)  # dtype = torch.float32
y_val = torch.FloatTensor(y_val)
y_test = torch.FloatTensor(y_test)

train_mask = torch.from_numpy(train_mask)  # dtype = torch.bool
val_mask = torch.from_numpy(val_mask)
test_mask = torch.from_numpy(test_mask)
model_file = 'training_dir/gcn_model.pkl'
model = torch.load(model_file)
output = model(sparse_adj_hat,sparse_features)
test_loss = model.loss(output,y_test,test_mask)
test_acc = model.accuracy(output,y_test,test_mask)
print("model_file={},test_loss={},test_acc={}".format(model_file,test_loss.item(),test_acc.item()))
Exemplo n.º 23
0
flags.DEFINE_integer('early_stopping', 10,
                     'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')
flags.DEFINE_string('gpu', '1', 'GPU selection.')
flags.DEFINE_string('method', args.method, 'Adversarial attack method')

os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    FLAGS.dataset_dir, FLAGS.dataset)

# Some preprocessing
features_dense, features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

# Define placeholders
placeholders = {
Exemplo n.º 24
0
import numpy as np
from tensorflow.python.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.layers import Lambda
from tensorflow.python.keras.models import  Model
from gcn import GCN
from utils import preprocess_adj,plot_embeddings, load_data_v1

if __name__ == "__main__":

    FEATURE_LESS = False

    A, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_v1(
        'cora')

    A = preprocess_adj(A)
    features /= features.sum(axis=1, ).reshape(-1, 1)

    if FEATURE_LESS:
        X = np.arange(A.shape[-1])
        feature_dim = A.shape[-1]
    else:
        X = features
        feature_dim = X.shape[-1]
    model_input = [X, A]

    # Compile model
    model = GCN(A.shape[-1], feature_dim, 16, y_train.shape[1],  dropout_rate=0.5, l2_reg=2.5e-4,
                feature_less=FEATURE_LESS, )
    model.compile(optimizer=Adam(0.01), loss='categorical_crossentropy',
                  weighted_metrics=['categorical_crossentropy', 'acc'])
Exemplo n.º 25
0
    print("dropout:", FLAGS.dropout)
    sys.stdout.flush()


print_args(FLAGS)

# Load data
adj, total_train_x,total_train_y,\
total_val_x,total_val_y,total_test_x,total_test_y,inputs_features = load_data(FLAGS.dataset,FLAGS.filepath)

print(type(adj), adj.shape)
print("total number of samples in train,val and test:", len(total_train_x),
      len(total_val_x), len(total_test_x))
sys.stdout.flush()

support = preprocess_adj(adj, FLAGS.normalize)
(init_indices, init_values, shape) = support

model_func = CoupledGNN

placeholders = {
    'support_indices': tf.placeholder(tf.int64, shape=(None, 2)),
    'Xs': tf.placeholder(tf.float32, shape=(None, adj.shape[0], 1)),
    'y': tf.placeholder(tf.float32, shape=(None, adj.shape[0])),
    'dropout': tf.placeholder_with_default(0., shape=()),
}

# Create model
model = model_func(FLAGS,
                   init_values,
                   placeholders,