def test_simple_numeric_predictor(): predictor = Predictor(inputs=[NumericInput(dim=30)], outputs=[Output(dim=1, activation="sigmoid")], dense_layer_sizes=[30], dense_activation="relu") y = predictor.predict(randn(10, 30)) eq_(len(y), 10)
def test_simple_sequence_predictor_named(): predictor = Predictor( inputs=[SequenceInput(length=4, name="x", variable_length=True)], outputs=[Output(dim=1, activation="sigmoid", name="y")], hidden_layer_sizes=[30], hidden_activation="relu") y = predictor.predict({"x": ["SFY-"] * 10})["y"] eq_(len(y), 10)
def test_simple_numeric_predictor_named(): predictor = Predictor( inputs=[NumericInput(name="x", dim=30)], outputs=[Output(dim=1, name="y", activation="sigmoid")], hidden_layer_sizes=[30], hidden_activation="relu") y = predictor.predict({"x": randn(10, 30)})["y"] eq_(len(y), 10)
def test_simple_sequence_predictor(): predictor = Predictor( inputs=[SequenceInput(length=4, variable_length=True)], outputs=[Output(dim=1, activation="sigmoid")], dense_layer_sizes=[30], dense_activation="relu") y = predictor.predict(["SFY-"] * 10) eq_(len(y), 10)
def test_fixed_length_hotshot(): model = Predictor(inputs=SequenceInput(length=9, variable_length=False, encoding="onehot"), outputs=Output(1, activation="sigmoid")) seqs = ["A" * 9, "L" * 9] y = model.predict(seqs) eq_(len(y), 2)
def test_two_input_predictor(): predictor = Predictor( inputs=[ SequenceInput(length=4, name="x1", variable_length=True), NumericInput(dim=30, name="x2") ], outputs=[Output(name="y", dim=1, activation="sigmoid")], dense_layer_sizes=[30], dense_activation="relu") y = predictor.predict({"x1": ["SFY-"] * 10, "x2": randn(10, 30)})["y"] eq_(len(y), 10)
def test_basic_rnn(): pred = Predictor(inputs=SequenceInput(name="x", length=4, variable_length=True, encoding="embedding", rnn_layer_sizes=[20], rnn_type="lstm", rnn_bidirectional=True), outputs=Output(dim=1, activation="sigmoid", name="y")) x = ["SF", "Y", "AALL"] y = pred.predict({"x": x})["y"] eq_(len(x), len(y)) found_rnn_layer = any("bidirectional" in layer.name for layer in pred.model.layers) assert found_rnn_layer
def test_predictor_output_transform(): predictor = Predictor(inputs=[NumericInput(dim=30, name="x")], outputs=[ Output(name="y", dim=1, activation="sigmoid", transform=log, inverse_transform=exp) ], dense_layer_sizes=[30], dense_activation="relu") y = predictor.predict({"x": randn(10, 30)})["y"] eq_(len(y), 10) # make sure transformed outputs are within given bounds assert exp(0.0) <= y.min() <= exp(1.0) assert exp(0.0) <= y.max() <= exp(1.0)
def test_predictor_on_more_data(): predictor = Predictor( inputs=[SequenceInput(length=20, name="x", variable_length=True)], outputs=[Output(dim=1, activation="sigmoid", name="y")], dense_layer_sizes=[30], dense_activation="relu") train_df = synthetic_peptides_by_subsequence(1000) test_df = synthetic_peptides_by_subsequence(1000) predictor.fit({"x": train_df.index.values}, train_df.binder.values, epochs=20) y_pred = predictor.predict({"x": test_df.index.values})['y'] y_pred = pandas.Series(y_pred, index=test_df.index) binder_mean_pred = y_pred[test_df.binder].mean() nonbinder_mean_pred = y_pred[~test_df.binder].mean() print(binder_mean_pred, nonbinder_mean_pred) assert binder_mean_pred > nonbinder_mean_pred * 2, (binder_mean_pred, nonbinder_mean_pred)
def test_model_with_fixed_length_context(): model = Predictor(inputs={ "upstream": SequenceInput(length=1, variable_length=False), "downstream": SequenceInput(length=1, variable_length=False), "peptide": SequenceInput(length=3, variable_length=True) }, outputs=Output(1, activation="sigmoid")) Y = np.array([True, False, True, False]) input_dict = { "upstream": ["Q", "A", "L", "I"], "downstream": ["S"] * 4, "peptide": ["SYF", "QQ", "C", "GLL"] } model.fit(input_dict, Y, epochs=20) Y_pred = model.predict(input_dict) assert (Y == (Y_pred > 0.5)).all(), (Y, Y_pred)
def test_discrete_input_with_str_tokens(): pred = Predictor(inputs=DiscreteInput(choices=["x", "y", "z"], embedding_dim=2), outputs=Output(1, "sigmoid")) pred.fit(["x", "x", "y", "z"], [0, 0, 0.5, 1.0], epochs=20) assert pred.predict(["x"]) < pred.predict(["z"])