示例#1
0
    def test_graph_save(self):
        n_samples = 10
        n_features = 11
        n_tasks = 1
        batch_size = 10
        X = np.random.rand(batch_size, n_samples, n_features)
        y = np.ones(shape=(n_samples, n_tasks))
        ids = np.arange(n_samples)

        dataset = dc.data.NumpyDataset(X, y, None, ids)
        g = TensorGraph(model_dir='/tmp/tmpss5_ki5_')

        inLayer = Input(shape=(None, n_samples, n_features))
        g.add_feature(inLayer)

        flatten = Flatten()
        g.add_layer(flatten, parents=[inLayer])

        dense = Dense(out_channels=1)
        g.add_layer(dense, parents=[flatten])
        g.add_output(dense)

        label_out = Input(shape=(None, 1))
        g.add_label(label_out)

        loss = LossLayer()
        g.add_layer(loss, parents=[dense, label_out])
        g.set_loss(loss)

        g.fit(dataset, nb_epoch=100)
        g.save()
        g1 = TensorGraph.load_from_dir('/tmp/tmpss5_ki5_')
        print(g1)
        print(g1.predict_on_batch(X))
示例#2
0
  def test_graph_save(self):
    n_samples = 10
    n_features = 11
    n_tasks = 1
    batch_size = 10
    X = np.random.rand(batch_size, n_samples, n_features)
    y = np.ones(shape=(n_samples, n_tasks))
    ids = np.arange(n_samples)

    dataset = dc.data.NumpyDataset(X, y, None, ids)
    g = TensorGraph(model_dir='/tmp/tmpss5_ki5_')

    inLayer = Input(shape=(None, n_samples, n_features))
    g.add_feature(inLayer)

    flatten = Flatten()
    g.add_layer(flatten, parents=[inLayer])

    dense = Dense(out_channels=1)
    g.add_layer(dense, parents=[flatten])
    g.add_output(dense)

    label_out = Input(shape=(None, 1))
    g.add_label(label_out)

    loss = LossLayer()
    g.add_layer(loss, parents=[dense, label_out])
    g.set_loss(loss)

    g.fit(dataset, nb_epoch=100)
    g.save()
    g1 = TensorGraph.load_from_dir('/tmp/tmpss5_ki5_')
    print(g1)
    print(g1.predict_on_batch(X))
class MLP:
    def __init__(self, batch_size):
        # save parameters
        self.batch_size = batch_size

        # define tensorgraph
        self.tg = TensorGraph(use_queue=False)
        self.feature = Feature(shape=(None, 1024))

        # build graph
        self.build_graph()

    def build_graph(self):
        d1 = Dense(out_channels=256, activation_fn=tf.nn.relu, in_layers=[self.feature])
        d2 = Dense(out_channels=64, activation_fn=tf.nn.relu, in_layers=[d1])
        d3 = Dense(out_channels=16, activation=None, in_layers=[d2])
        d4 = Dense(out_channels=2, activation=None, in_layers=[d3])
        softmax = SoftMax(in_layers=[d4])
        self.tg.add_output(softmax)

        self.label = Label(shape=(None, 2))
        cost = SoftMaxCrossEntropy(in_layers=[self.label, d4])
        loss = ReduceMean(in_layers=[cost])
        self.tg.set_loss(loss)

    def fit(self, dataset, epochs):
        self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs))

    def predict(self, dataset):
        pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size))
        return np.expand_dims(pred, axis=0)

    def data_generator(self, dataset, batch_size, epochs=1):
        for e in range(epochs):
            for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True):
                feed_dict = {self.label: to_one_hot(y[:, 0]), self.feature: X}  # data for feed

                yield feed_dict
示例#4
0
def graph_conv_net(batch_size, prior, num_task):
    """
    Build a tensorgraph for multilabel classification task

    Return: features and labels layers
    """
    tg = TensorGraph(use_queue=False)
    if prior == True:
        add_on = num_task
    else:
        add_on = 0
    atom_features = Feature(shape=(None, 75 + 2 * add_on))
    circular_features = Feature(shape=(batch_size, 256), dtype=tf.float32)

    degree_slice = Feature(shape=(None, 2), dtype=tf.int32)
    membership = Feature(shape=(None, ), dtype=tf.int32)
    deg_adjs = []
    for i in range(0, 10 + 1):
        deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32)
        deg_adjs.append(deg_adj)

    gc1 = GraphConv(64 + add_on,
                    activation_fn=tf.nn.elu,
                    in_layers=[atom_features, degree_slice, membership] +
                    deg_adjs)
    batch_norm1 = BatchNorm(in_layers=[gc1])
    gp1 = GraphPool(in_layers=[batch_norm1, degree_slice, membership] +
                    deg_adjs)

    gc2 = GraphConv(64 + add_on,
                    activation_fn=tf.nn.elu,
                    in_layers=[gc1, degree_slice, membership] + deg_adjs)
    batch_norm2 = BatchNorm(in_layers=[gc2])
    gp2 = GraphPool(in_layers=[batch_norm2, degree_slice, membership] +
                    deg_adjs)

    add = Concat(in_layers=[gp1, gp2])
    add = Dropout(0.5, in_layers=[add])
    dense = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[add])
    batch_norm3 = BatchNorm(in_layers=[dense])
    readout = GraphGather(batch_size=batch_size,
                          activation_fn=tf.nn.tanh,
                          in_layers=[batch_norm3, degree_slice, membership] +
                          deg_adjs)
    batch_norm4 = BatchNorm(in_layers=[readout])

    dense1 = Dense(out_channels=128,
                   activation_fn=tf.nn.elu,
                   in_layers=[circular_features])
    dense1 = BatchNorm(in_layers=[dense1])
    dense1 = Dropout(0.5, in_layers=[dense1])
    dense1 = Dense(out_channels=128,
                   activation_fn=tf.nn.elu,
                   in_layers=[circular_features])
    dense1 = BatchNorm(in_layers=[dense1])
    dense1 = Dropout(0.5, in_layers=[dense1])
    merge_feat = Concat(in_layers=[dense1, batch_norm4])
    merge = Dense(out_channels=256,
                  activation_fn=tf.nn.elu,
                  in_layers=[merge_feat])
    costs = []
    labels = []
    for task in range(num_task):
        classification = Dense(out_channels=2,
                               activation_fn=None,
                               in_layers=[merge])
        softmax = SoftMax(in_layers=[classification])
        tg.add_output(softmax)
        label = Label(shape=(None, 2))
        labels.append(label)
        cost = SoftMaxCrossEntropy(in_layers=[label, classification])
        costs.append(cost)
    all_cost = Stack(in_layers=costs, axis=1)
    weights = Weights(shape=(None, num_task))
    loss = WeightedError(in_layers=[all_cost, weights])
    tg.set_loss(loss)
    #if prior == True:
    #    return tg, atom_features,circular_features, degree_slice, membership, deg_adjs, labels, weights#, prior_layer
    return tg, atom_features, circular_features, degree_slice, membership, deg_adjs, labels, weights
示例#5
0
    classification = Dense(out_channels=2,
                           activation_fn=None,
                           in_layers=[readout])

    softmax = SoftMax(in_layers=[classification])
    tg.add_output(softmax)

    label = Label(shape=(None, 2))
    labels.append(label)
    cost = SoftMaxCrossEntropy(in_layers=[label, classification])
    costs.append(cost)

all_cost = Stack(in_layers=costs, axis=1)
weights = Weights(shape=(None, len(current_tasks)))
loss = WeightedError(in_layers=[all_cost, weights])
tg.set_loss(loss)
# Data splits
# Tox21 is treated differently: we manually (randomly) split into test, train, and valid directly from train_dataset.X
#   (rather than letting deepchem provide the data directly)
# Reason: In the early stages of developing the code, the valid_dataset and test_dataset were empty for tox and
#         we observed a comment in the deepchem source code leading us to believe this was intended.
#         Thus, when we access valid_dataset.X and test_dataset.X, we don't do it for tox21. We only later
#         found that we could access tox21 validation and test. But we do this for all models, so the treatment is fair
#
#
# This treatment is done for all models, so the comparison is fair.
#
if TASK != 'tox_21':
    new_train_data = generate_new_X(train_dataset.X, K, technique)
    new_train_dataset = dc.data.datasets.DiskDataset.from_numpy(
        new_train_data,
class GCN:
    def __init__(self, batch_size=50):
        # save parameters
        self.batch_size = batch_size

        # define tensorgraph
        self.tg = TensorGraph(use_queue=False)

        # define features
        self.atom_features = Feature(shape=(None, 75))  # feature of atom. ex) atom / degree / is aromatic and so on
        self.indexing = Feature(shape=(None, 2), dtype=tf.int32)  # index of atoms in molecules sorted by degree
        self.membership = Feature(shape=(None,), dtype=tf.int32)  # membership of atoms in molecule
        self.deg_adj_list = [Feature(shape=(None, i), dtype=tf.int32) for i in range(1, 12)]  # adj list with degree

        # build graph
        self.build_graph()

    def build_graph(self):
        # Layer 1
        gc1_input = [self.atom_features, self.indexing, self.membership] + self.deg_adj_list
        gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc1_input)
        bn1 = BatchNorm(in_layers=[gc1])
        gp1_input = [bn1, self.indexing, self.membership] + self.deg_adj_list
        gp1 = GraphPool(in_layers=gp1_input)

        # Layer 2
        gc2_input = [gp1, self.indexing, self.membership] + self.deg_adj_list
        gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc2_input)
        bn2 = BatchNorm(in_layers=[gc2])
        gp2_input = [bn2, self.indexing, self.membership] + self.deg_adj_list
        gp2 = GraphPool(in_layers=gp2_input)

        # Dense layer 1
        d1 = Dense(out_channels=128, activation_fn=tf.nn.relu, in_layers=[gp2])
        bn3 = BatchNorm(in_layers=[d1])

        # Graph gather layer
        gg1_input = [bn3, self.indexing, self.membership] + self.deg_adj_list
        gg1 = GraphGather(batch_size=self.batch_size, activation=tf.nn.tanh, in_layers=gg1_input)

        # Output dense layer
        d2 = Dense(out_channels=2, activation_fn=None, in_layers=[gg1])
        softmax = SoftMax(in_layers=[d2])
        self.tg.add_output(softmax)

        # Set loss function
        self.label = Label(shape=(None, 2))
        cost = SoftMaxCrossEntropy(in_layers=[self.label, d2])
        self.weight = Weights(shape=(None, 1))
        loss = WeightedError(in_layers=[cost, self.weight])
        self.tg.set_loss(loss)

    def fit(self, dataset, epochs:int):
        self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs))

    def predict(self, dataset):
        pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size))
        return np.expand_dims(pred, axis=0)

    def data_generator(self, dataset, batch_size:int, epochs=1):
        for e in range(epochs):
            for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True):
                feed_dict = {self.label: to_one_hot(y[:, 0]), self.weight: w}  # data for feed
                ConvMolList = ConvMol.agglomerate_mols(X)
                feed_dict[self.atom_features] = ConvMolList.get_atom_features()
                feed_dict[self.indexing] = ConvMolList.deg_slice
                feed_dict[self.membership] = ConvMolList.membership
                deg_adj_list = ConvMolList.get_deg_adjacency_lists()
                for i in range(1, len(deg_adj_list)):
                    feed_dict[self.deg_adj_list[i - 1]] = deg_adj_list[i]

                yield feed_dict