Пример #1
0
def test_simple_numeric_predictor():
    predictor = Predictor(inputs=[NumericInput(dim=30)],
                          outputs=[Output(dim=1, activation="sigmoid")],
                          dense_layer_sizes=[30],
                          dense_activation="relu")
    y = predictor.predict(randn(10, 30))
    eq_(len(y), 10)
Пример #2
0
def test_fixed_length_hotshot():
    model = Predictor(inputs=SequenceInput(length=9,
                                           variable_length=False,
                                           encoding="onehot"),
                      outputs=Output(1, activation="sigmoid"))
    seqs = ["A" * 9, "L" * 9]
    y = model.predict(seqs)
    eq_(len(y), 2)
Пример #3
0
def test_simple_numeric_predictor_named():
    predictor = Predictor(
        inputs=[NumericInput(name="x", dim=30)],
        outputs=[Output(dim=1, name="y", activation="sigmoid")],
        hidden_layer_sizes=[30],
        hidden_activation="relu")
    y = predictor.predict({"x": randn(10, 30)})["y"]
    eq_(len(y), 10)
Пример #4
0
def test_simple_sequence_predictor_named():
    predictor = Predictor(
        inputs=[SequenceInput(length=4, name="x", variable_length=True)],
        outputs=[Output(dim=1, activation="sigmoid", name="y")],
        hidden_layer_sizes=[30],
        hidden_activation="relu")
    y = predictor.predict({"x": ["SFY-"] * 10})["y"]
    eq_(len(y), 10)
Пример #5
0
def test_simple_sequence_predictor():
    predictor = Predictor(
        inputs=[SequenceInput(length=4, variable_length=True)],
        outputs=[Output(dim=1, activation="sigmoid")],
        dense_layer_sizes=[30],
        dense_activation="relu")
    y = predictor.predict(["SFY-"] * 10)
    eq_(len(y), 10)
Пример #6
0
def test_embedding_conv_1_layer():
    model = Predictor(inputs=SequenceInput(length=3,
                                           variable_length=False,
                                           conv_filter_sizes={2: 3}),
                      outputs=Output(1, activation="sigmoid"))
    X = ["SAY", "FFQ"]
    Y = np.array([True, False])
    model.fit(X, Y)
Пример #7
0
def test_two_input_predictor():
    predictor = Predictor(
        inputs=[
            SequenceInput(length=4, name="x1", variable_length=True),
            NumericInput(dim=30, name="x2")
        ],
        outputs=[Output(name="y", dim=1, activation="sigmoid")],
        dense_layer_sizes=[30],
        dense_activation="relu")
    y = predictor.predict({"x1": ["SFY-"] * 10, "x2": randn(10, 30)})["y"]
    eq_(len(y), 10)
Пример #8
0
def test_basic_rnn():
    pred = Predictor(inputs=SequenceInput(name="x",
                                          length=4,
                                          variable_length=True,
                                          encoding="embedding",
                                          rnn_layer_sizes=[20],
                                          rnn_type="lstm",
                                          rnn_bidirectional=True),
                     outputs=Output(dim=1, activation="sigmoid", name="y"))
    x = ["SF", "Y", "AALL"]
    y = pred.predict({"x": x})["y"]
    eq_(len(x), len(y))
    found_rnn_layer = any("bidirectional" in layer.name
                          for layer in pred.model.layers)
    assert found_rnn_layer
Пример #9
0
def test_predictor_output_transform():
    predictor = Predictor(inputs=[NumericInput(dim=30, name="x")],
                          outputs=[
                              Output(name="y",
                                     dim=1,
                                     activation="sigmoid",
                                     transform=log,
                                     inverse_transform=exp)
                          ],
                          dense_layer_sizes=[30],
                          dense_activation="relu")
    y = predictor.predict({"x": randn(10, 30)})["y"]
    eq_(len(y), 10)
    # make sure transformed outputs are within given bounds
    assert exp(0.0) <= y.min() <= exp(1.0)
    assert exp(0.0) <= y.max() <= exp(1.0)
def make_predictors(widths=[9],
                    layer_sizes=[8],
                    n_conv_layers=[2],
                    conv_dropouts=[0.25],
                    conv_activation="relu",
                    global_pooling_batch_normalization=True):
    return {(
        width, layer_size, n_layers, dropout
    ): Predictor(inputs=SequenceInput(
        name="peptide",
        length=45,
        add_start_tokens=True,
        add_stop_tokens=True,
        variable_length=True,
        conv_filter_sizes=[1, 3, 5, 7, width, 11
                           ],
        n_conv_layers=n_layers,
        conv_output_dim=layer_size,
        conv_activation=conv_activation,
        conv_dropout=dropout,
        global_pooling=True,
        global_pooling_batch_normalization=global_pooling_batch_normalization),
                 outputs=Output(1, activation="sigmoid"))
            for width in widths for layer_size in layer_sizes
            for n_layers in n_conv_layers for dropout in conv_dropouts}
Пример #11
0
 def _make_nn_model(self,
                    n_features,
                    dense_layer_sizes=[20],
                    batch_normalization=True):
     return Predictor(inputs=NumericInput(
         n_features,
         dense_layer_sizes=dense_layer_sizes,
         dense_batch_normalization=batch_normalization),
                      outputs=Output(1, activation="sigmoid"))
Пример #12
0
    def from_dict(cls, d):
        # importing locally to avoid adding a significant delay to
        # the import of every module
        from pepnet import Predictor

        model_dicts = d["models"]
        model_weights = d.get("model_weights")
        models = [Predictor.from_dict(d) for d in model_dicts]
        return cls(models=models, model_weights=model_weights)
Пример #13
0
def test_predictor_on_more_data():
    predictor = Predictor(
        inputs=[SequenceInput(length=20, name="x", variable_length=True)],
        outputs=[Output(dim=1, activation="sigmoid", name="y")],
        dense_layer_sizes=[30],
        dense_activation="relu")

    train_df = synthetic_peptides_by_subsequence(1000)
    test_df = synthetic_peptides_by_subsequence(1000)
    predictor.fit({"x": train_df.index.values},
                  train_df.binder.values,
                  epochs=20)
    y_pred = predictor.predict({"x": test_df.index.values})['y']
    y_pred = pandas.Series(y_pred, index=test_df.index)
    binder_mean_pred = y_pred[test_df.binder].mean()
    nonbinder_mean_pred = y_pred[~test_df.binder].mean()
    print(binder_mean_pred, nonbinder_mean_pred)
    assert binder_mean_pred > nonbinder_mean_pred * 2, (binder_mean_pred,
                                                        nonbinder_mean_pred)
Пример #14
0
def test_model_with_fixed_length_context():
    model = Predictor(inputs={
        "upstream":
        SequenceInput(length=1, variable_length=False),
        "downstream":
        SequenceInput(length=1, variable_length=False),
        "peptide":
        SequenceInput(length=3, variable_length=True)
    },
                      outputs=Output(1, activation="sigmoid"))

    Y = np.array([True, False, True, False])
    input_dict = {
        "upstream": ["Q", "A", "L", "I"],
        "downstream": ["S"] * 4,
        "peptide": ["SYF", "QQ", "C", "GLL"]
    }
    model.fit(input_dict, Y, epochs=20)
    Y_pred = model.predict(input_dict)
    assert (Y == (Y_pred > 0.5)).all(), (Y, Y_pred)
Пример #15
0
def make_model(sufficiently_large_output_names):
    mhc = SequenceInput(length=34,
                        name="mhc",
                        encoding="index",
                        variable_length=True,
                        embedding_dim=20,
                        embedding_mask_zero=False,
                        dense_layer_sizes=[64],
                        dense_batch_normalization=True)

    peptide = SequenceInput(
        length=50,
        name="peptide",
        encoding="index",
        embedding_dim=20,
        embedding_mask_zero=True,
        variable_length=True,
        conv_filter_sizes=[1, 9, 10],
        conv_activation="relu",
        conv_output_dim=32,
        n_conv_layers=2,
        # conv_weight_source=mhc,
        global_pooling=True,
        global_pooling_batch_normalization=True)

    outputs = []
    for output_name in sufficiently_large_output_names:
        if "IC50" in output_name or "EC50" in output_name:
            transform = from_ic50
            inverse = to_ic50
            activation = "sigmoid"
        elif "half life" in output_name:
            transform = (lambda x: np.log10(x + 1))
            inverse = (lambda x: (10.0**x) - 1)
            activation = "relu"
        else:
            transform = None
            inverse = None
            activation = "sigmoid"
        output = Output(name=output_name,
                        transform=transform,
                        inverse_transform=inverse,
                        activation=activation)
        print(output)
        outputs.append(output)
    return Predictor(inputs=[mhc, peptide],
                     outputs=outputs,
                     merge_mode="concat",
                     dense_layer_sizes=[32],
                     dense_activation="tanh",
                     dense_batch_normalization=True)
Пример #16
0
def test_predictor_weights_all_ones():
    predictor = Predictor(inputs=[
        SequenceInput(length=2, variable_length=False, encoding="onehot")
    ],
                          outputs=[Output(dim=1, activation="sigmoid")])
    weights = predictor.get_weights()
    for w in weights:
        w.fill(1)
    predictor.set_weights(weights)
    predictor2 = Predictor.from_json(predictor.to_json())
    for w in predictor2.get_weights():
        assert (w == np.ones_like(w)).all(), "Expected %s to be all 1s" % (w, )
Пример #17
0
def make_predictors(widths=[9],
                    layer_sizes=[16],
                    n_conv_layers=[2],
                    conv_dropouts=[0]):
    return {(width, layer_size, n_layers, dropout):
            Predictor(inputs=SequenceInput(name="peptide",
                                           length=22,
                                           variable_length=True,
                                           conv_filter_sizes=[1, 3, width],
                                           n_conv_layers=n_layers,
                                           conv_output_dim=layer_size,
                                           conv_dropout=dropout,
                                           global_pooling=True),
                      outputs=Output(1, activation="sigmoid"))
            for width in widths for layer_size in layer_sizes
            for n_layers in n_conv_layers for dropout in conv_dropouts}
def make_predictors(
        widths=[8, 9, 10],
        layer_sizes=[4, 16, 32],
        n_conv_layers=[1, 2],
        conv_dropouts=[0, 0.25]):
    return {
        "width=%d, layer_size=%d, n_layers=%d, conv=%0.2f" % (
            width, layer_size, n_layers, dropout): Predictor(
            inputs=SequenceInput(
                name="peptide",
                length=22,
                variable_length=True,
                conv_filter_sizes=[width],
                n_conv_layers=n_layers,
                conv_output_dim=layer_size,
                conv_dropout=dropout,
                global_pooling=True),
            outputs=Output(1, activation="sigmoid"))
        for width in widths
        for layer_size in layer_sizes
        for n_layers in n_conv_layers
        for dropout in conv_dropouts
    }
def make_predictor(conv_filter_sizes,
                   conv_dropout=0.25,
                   conv_activation="relu",
                   global_pooling_batch_normalization=True,
                   dense_layer_sizes=[],
                   dense_dropout=0.25,
                   dense_activation="relu"):
    return Predictor(inputs=SequenceInput(
        name="peptide",
        length=MAX_LENGTH,
        add_start_tokens=True,
        add_stop_tokens=True,
        variable_length=True,
        embedding_dim=24,
        conv_filter_sizes=conv_filter_sizes,
        conv_activation=conv_activation,
        conv_dropout=conv_dropout,
        global_pooling=True,
        global_pooling_batch_normalization=global_pooling_batch_normalization),
                     outputs=Output(1,
                                    activation="sigmoid",
                                    dense_layer_sizes=dense_layer_sizes,
                                    dense_activation="relu",
                                    dense_dropout=dense_dropout))
def make_model(output_names):
    mhc = SequenceInput(
        length=34,
        name="mhc",
        encoding="index",
        variable_length=True,
        embedding_dim=32,
        embedding_mask_zero=False,
        dense_layer_sizes=[32],
        dense_activation="tanh",
        dense_batch_normalization=MERGE_BATCH_NORMALIZATION,
        dense_dropout=MERGE_DROPOUT)

    peptide = SequenceInput(
        length=45,
        name="peptide",
        encoding="index",
        add_start_tokens=True,
        add_stop_tokens=True,
        embedding_dim=32,
        embedding_mask_zero=True,
        variable_length=True,
        conv_filter_sizes=[9],
        conv_activation="relu",
        conv_output_dim=32,
        conv_dropout=CONV_DROPOUT,
        conv_batch_normalization=CONV_BATCH_NORMALIZATION,
        n_conv_layers=2,
        # conv_weight_source=mhc,
        global_pooling=True,
        global_pooling_batch_normalization=True,
        global_pooling_dropout=0.25,
        dense_layer_sizes=[32],
        dense_activation="sigmoid",
        dense_batch_normalization=MERGE_BATCH_NORMALIZATION,
        dense_dropout=MERGE_DROPOUT)

    outputs = []
    for output_name in output_names:
        if "IC50" in output_name or "EC50" in output_name:
            transform = from_ic50
            inverse = to_ic50
            activation = "sigmoid"
        elif "half life" in output_name:
            transform = (lambda x: np.log10(x + 1))
            inverse = (lambda x: (10.0 ** x) - 1)
            activation = "relu"
        else:
            transform = None
            inverse = None
            activation = "sigmoid"
        output = Output(
            name=output_name,
            transform=transform,
            inverse_transform=inverse,
            activation=activation,
            loss=LOSS)
        print(output)
        outputs.append(output)
    return Predictor(
        inputs=[mhc, peptide],
        outputs=outputs,
        merge_mode=MERGE,
        training_metrics=["accuracy"])
Пример #21
0
def test_discrete_input_with_str_tokens():
    pred = Predictor(inputs=DiscreteInput(choices=["x", "y", "z"],
                                          embedding_dim=2),
                     outputs=Output(1, "sigmoid"))
    pred.fit(["x", "x", "y", "z"], [0, 0, 0.5, 1.0], epochs=20)
    assert pred.predict(["x"]) < pred.predict(["z"])
Пример #22
0
def make_predictors():
    return {
        "pool":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       global_pooling=True),
                  outputs=Output(1, activation="sigmoid")),
        "rnn":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       rnn_layer_sizes=[32]),
                  outputs=Output(1, activation="sigmoid")),
        "rnn2":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       rnn_layer_sizes=[32, 32]),
                  outputs=Output(1, activation="sigmoid")),
        "conv-pool":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[9],
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       global_pooling=True),
                  outputs=Output(1, activation="sigmoid")),
        "conv2-pool":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[9],
                                       n_conv_layers=2,
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       global_pooling=True),
                  outputs=Output(1, activation="sigmoid")),
        "conv-rnn":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[9],
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       rnn_layer_sizes=[32]),
                  outputs=Output(1, activation="sigmoid")),
        "multiconv-pool":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[3, 9],
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       global_pooling=True),
                  outputs=Output(1, activation="sigmoid")),
        "multiconv2-pool":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[3, 9],
                                       n_conv_layers=2,
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       global_pooling=True),
                  outputs=Output(1, activation="sigmoid")),
        "multiconv-rnn":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[3, 9],
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       rnn_layer_sizes=[32]),
                  outputs=Output(1, activation="sigmoid")),
        "multiconv2-rnn":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[3, 9],
                                       n_conv_layers=2,
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       rnn_layer_sizes=[32]),
                  outputs=Output(1, activation="sigmoid")),
        "multiconv2-rnn2":
        Predictor(inputs=SequenceInput(name="peptide",
                                       length=22,
                                       variable_length=True,
                                       conv_filter_sizes=[3, 9],
                                       n_conv_layers=2,
                                       conv_output_dim=16,
                                       conv_dropout=0.1,
                                       rnn_layer_sizes=[32, 32]),
                  outputs=Output(1, activation="sigmoid"))
    }
Пример #23
0
def test_predictor_json_identity():
    predictor = Predictor(inputs=[
        SequenceInput(length=2, variable_length=False, encoding="onehot")
    ],
                          outputs=[Output(dim=1, activation="sigmoid")])
    eq_(predictor, Predictor.from_json(predictor.to_json()))