Exemplo n.º 1
0
def test_sequential_model_loading():
    m2 = kipoi.get_model("example/models/extended_coda", source='dir')
    m1 = kipoi.get_model("example/models/kipoi_dataloader_decorator",
                         source='dir')

    with cd(m2.source_dir):
        next(m2.default_dataloader.init_example().batch_iter())
    with cd(m1.source_dir):
        next(m1.default_dataloader.init_example().batch_iter())
Exemplo n.º 2
0
def test_predict_pipeline():
    model = kipoi.get_model("Basset", source="kipoi")
    dl_kwargs = model.default_dataloader.example_kwargs
    with cd(model.source_dir):
        ret = model.pipeline.predict(dl_kwargs)
    assert isinstance(ret, np.ndarray)
    with cd(model.source_dir):
        ret = model.pipeline.predict(dl_kwargs, layer="11")
    assert isinstance(ret, list)
    # with a model that does not implement LayerActivationMixin it should fail:
    hal_model = kipoi.get_model("HAL", source="kipoi")
    hal_dl_kwargs = hal_model.default_dataloader.example_kwargs
    with pytest.raises(Exception):
        ret = model.pipeline.predict(hal_dl_kwargs, layer="11")
Exemplo n.º 3
0
def test_load_models_kipoi():
    k = kipoi.config.get_source("kipoi")

    ls = k.list_models()  # all the available models

    assert "HAL" in list(ls.model)
    model = "HAL"
    k.pull_model(model)

    # load the model
    kipoi.get_model(os.path.join(k.local_path, "HAL"), source="dir")

    kipoi.get_model(model, source="kipoi")
    kipoi.get_dataloader_factory(model)
Exemplo n.º 4
0
def test_get_model(source):
    # model correctly instentiated
    assert kipoi.get_dataloader_factory("pyt", source).info.doc
    assert kipoi.get_model("pyt", source).info.doc

    assert kipoi.get_model("multiple_models/model1", source).dummy_add == 1
    assert kipoi.get_model("multiple_models/submodel/model2", source).dummy_add == 2

    # model examples correctly performed
    m = kipoi.get_model("multiple_models/model1", source)
    assert np.all(m.pipeline.predict_example() == 1)

    m = kipoi.get_model("multiple_models/submodel/model2", source)
    assert np.all(m.pipeline.predict_example() == 2)
Exemplo n.º 5
0
def test_load_models_kipoi():
    k = kipoi.config.get_source("kipoi")

    l = k.list_models()  # all the available models

    assert "HAL" in list(l.model)
    model = "HAL"
    mpath = k.pull_model(model)
    m_dir = os.path.dirname(mpath)

    # load the model
    kipoi.get_model(m_dir, source="dir")

    kipoi.get_model(model, source="kipoi")
    kipoi.get_dataloader_factory(model)
Exemplo n.º 6
0
def cli_info(command, raw_args):
    """CLI interface to predict
    """
    assert command == "info"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Prints dataloader" +
                                                 " keyword arguments.")
    add_model(parser)
    add_dataloader(parser, with_args=False)
    args = parser.parse_args(raw_args)

    # --------------------------------------------
    # load model & dataloader
    model = kipoi.get_model(args.model, args.source)

    if args.dataloader is not None:
        dl_info = "dataloader '{0}' from source '{1}'".format(str(args.dataloader), str(args.dataloader_source))
        Dl = kipoi.get_dataloader_factory(args.dataloader, args.dataloader_source)
    else:
        dl_info = "default dataloader for model '{0}' from source '{1}'".format(str(model.name), str(args.source))
        Dl = model.default_dataloader

    print("-" * 80)
    print("Displaying keyword arguments for {0}".format(dl_info))
    print(Dl.print_args())
    print("-" * 80)
Exemplo n.º 7
0
def cli_test(command, raw_args):
    """Runs test on the model
    """
    assert command == "test"
    # setup the arg-parsing
    parser = argparse.ArgumentParser(
        'kipoi {}'.format(command),
        description='script to test model zoo submissions. Example usage:\n'
        '`kipoi test model/directory`, where `model/directory` is the '
        'path to a directory containing a model.yaml file.')
    add_model(parser, source="dir")
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='Batch size to use in prediction')
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    mh = kipoi.get_model(args.model, args.source)

    if not mh._sufficient_deps(mh.dependencies):
        # model requirements should be installed
        logger.warn(
            "Required package '{0}' for model type: {1} is not listed in the dependencies"
            .format(mh.MODEL_PACKAGE, mh.type))

    # Load the test files from model source
    # with cd(mh.source_dir):
    mh.pipeline.predict_example(batch_size=args.batch_size)
    # if not match:
    #     # logger.error("Expected targets don't match model predictions")
    #     raise Exception("Expected targets don't match model predictions")

    logger.info('Successfully ran test_predict')
Exemplo n.º 8
0
def get_example_data(example, layer, writer=None):
    example_dir = "examples/{0}".format(example)
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(example_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    with open(example_dir + "/example_files/test.json", "r") as ifh:
        dataloader_arguments = json.load(ifh)

    for k in dataloader_arguments:
        dataloader_arguments[k] = "example_files/" + dataloader_arguments[k]

    outputs = []
    with cd(model.source_dir):
        dl = Dataloader(**dataloader_arguments)
        it = dl.batch_iter(batch_size=32, num_workers=0)

        # Loop through the data, make predictions, save the output
        for i, batch in enumerate(tqdm(it)):

            # make the prediction
            pred_batch = model.input_grad(batch['inputs'], avg_func="sum", layer=layer,
                                          final_layer=False)
            # write out the predictions, metadata (, inputs, targets)
            # always keep the inputs so that input*grad can be generated!
            output_batch = prepare_batch(batch, pred_batch, keep_inputs=True)
            if writer is not None:
                writer.batch_write(output_batch)
            outputs.append(output_batch)
        if writer is not None:
            writer.close()
    return numpy_collate(outputs)
Exemplo n.º 9
0
def test_extractor_model(example):
    """Test extractor
    """
    if example == "rbp" and sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    #
    example_dir = "examples/{0}".format(example)
    # install the dependencies
    # - TODO maybe put it implicitly in load_dataloader?
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)
    #
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    test_kwargs = get_test_kwargs(example_dir)
    #
    # install the dependencies
    # - TODO maybe put it implicitly in load_extractor?
    if INSTALL_REQ:
        install_model_requirements(example_dir, source="dir")
    #
    # get model
    model = kipoi.get_model(example_dir, source="dir")
    #
    with cd(example_dir + "/example_files"):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)
        #
        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
        model.pred_grad(batch["inputs"], Slice_conv()[:, 0])
Exemplo n.º 10
0
def get_pwm_model(verbose = False):
    '''
    Get subset of DeepSEA model used to construct PWMs from input sequences
    
    Returns
    -------
    pwm_model : 1st convolutional layer + activation stage model
    rc_model : revese complement data set model
    '''
    
    # load in model
    deep_sea = kipoi.get_model("DeepSEA/predict")
    
    # freeze all weights in pre-trained model
    for name, param in deep_sea.model.named_parameters():
        param.requires_grad = False
        if verbose:
            print('name: ', name)
            print(type(param))
            print('param.shape: ', param.shape)
            print('param.requires_grad: ', param.requires_grad)
            print('=====')
    
    pwm_model = nn.Sequential(*list(deep_sea.model[0:2]), 
                              nn.Sequential(*deep_sea.model[2][0:2]))
    
    rc_model = nn.Sequential(*list(deep_sea.model[0:2]))
    
    return pwm_model, rc_model
Exemplo n.º 11
0
def test_dataloader_model(example):
    """Test dataloader
    """
    if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2:
        pytest.skip("example not supported on python 2 ")

    example_dir = "example/models/{0}".format(example)

    # install the dependencies
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")

    test_kwargs = Dl.example_kwargs

    # get dataloader

    # get model
    model = kipoi.get_model(example_dir, source="dir")

    with kipoi_utils.utils.cd(example_dir):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)

        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
Exemplo n.º 12
0
def test_var_eff_pred_varseq(tmpdir):
    model_name = "DeepSEA/variantEffects"
    if INSTALL_REQ:
        install_model_requirements(model_name, "kipoi", and_dataloaders=True)
    #
    model = kipoi.get_model(model_name, source="kipoi")
    # The preprocessor
    Dataloader = SeqIntervalDl
    #
    dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
                            "fasta_file": "example_files/hg38_chr22.fa",
                            "required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
    dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
                            dataloader_arguments.items()}

    vcf_path = "tests/data/variants.vcf"
    out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
    #
    vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
    model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
    writer = kipoi_veff.VcfWriter(
        model, vcf_path, out_vcf_fpath, standardise_var_id=True)
    vcf_to_region = kipoi_veff.SnvCenteredRg(model_info)
    res = sp.predict_snvs(model, Dataloader, vcf_path, dataloader_args=dataloader_arguments,
                          batch_size=32,
                          vcf_to_region=vcf_to_region,
                          sync_pred_writer=writer)
    writer.close()
    assert os.path.exists(out_vcf_fpath)
Exemplo n.º 13
0
def test_loading():
    model_path = "example/models/pyt/model_files/"
    model_path_class_model = "example/models/pyt_class/"
    # load model and weights explcitly
    with pytest.raises(Exception):
        m1 = PyTorchModel(weights=model_path + "only_weights.pth")
        m1 = PyTorchModel(module_file=model_path + "pyt.py",
                          weights=model_path + "only_weights.pth")
    with cd(model_path):
        m1 = PyTorchModel(module_obj="pyt.simple_model",
                          weights="only_weights.pth")
    m1 = PyTorchModel(module_file=model_path + "pyt.py",
                      weights=model_path + "only_weights.pth",
                      module_obj="simple_model")
    m1 = PyTorchModel(module_file=THISFILE,
                      weights=PYT_NET_MODEL_WEIGHTS_FILE,
                      module_class="PyTNet")
    m1 = PyTorchModel(module_file=THISFILE,
                      weights=PYT_NET_MODEL_WEIGHTS_FILE,
                      module_class="PyTNet",
                      module_kwargs={})
    m1 = PyTorchModel(module_file=THISFILE,
                      weights=PYT_NET_MODEL_WEIGHTS_FILE,
                      module_class="PyTNet",
                      module_kwargs="{}")
    # test loading class from full yaml definition with module_kargs:
    mh = kipoi.get_model(model_path_class_model, "dir")
    # Load the test files from model source
    mh.pipeline.predict_example(batch_size=3)
Exemplo n.º 14
0
def cli_info(command, raw_args):
    """CLI interface to predict
    """
    assert command == "info"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Prints dataloader" +
                                                 " keyword arguments.")
    parser.add_argument("-i", "--install_req", action='store_true',
                        help="Install required packages from requirements.txt")
    add_model(parser)
    add_dataloader(parser, with_args=False)
    args = parser.parse_args(raw_args)

    # --------------------------------------------
    # install args
    if args.install_req:
        kipoi.pipeline.install_model_requirements(args.model,
                                                  args.source,
                                                  and_dataloaders=True)
    # load model & dataloader
    model = kipoi.get_model(args.model, args.source)

    if args.dataloader is not None:
        dl_info = "dataloader '{0}' from source '{1}'".format(str(args.dataloader), str(args.dataloader_source))
        Dl = kipoi.get_dataloader_factory(args.dataloader, args.dataloader_source)
    else:
        dl_info = "default dataloader for model '{0}' from source '{1}'".format(str(model.name), str(args.source))
        Dl = model.default_dataloader

    print("-" * 80)
    print("Displaying keyword arguments for {0}".format(dl_info))
    print(kipoi.print_dl_kwargs(Dl))
    print("-" * 80)
Exemplo n.º 15
0
def cli_test(command, raw_args):
    """Runs test on the model
    """
    assert command == "test"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='script to test model zoo submissions')
    add_model(parser, source="dir")
    parser.add_argument('--batch_size', type=int, default=32,
                        help='Batch size to use in prediction')
    parser.add_argument("-i", "--install_req", action='store_true',
                        help="Install required packages from requirements.txt")
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    if args.install_req:
        kipoi.pipeline.install_model_requirements(args.model,
                                                  args.source,
                                                  and_dataloaders=True)
    mh = kipoi.get_model(args.model, args.source)

    if not mh._sufficient_deps(mh.dependencies):
        # model requirements should be installed
        logger.warn("Required package '{0}' for model type: {1} is not listed in the dependencies".
                    format(mh.MODEL_PACKAGE, mh.type))

    # Load the test files from model source
    # with cd(mh.source_dir):
    mh.pipeline.predict_example(batch_size=args.batch_size)
    # if not match:
    #     # logger.error("Expected targets don't match model predictions")
    #     raise Exception("Expected targets don't match model predictions")

    logger.info('Successfully ran test_predict')
Exemplo n.º 16
0
def test_gradient_pipeline():
    model = kipoi.get_model("Basset", source="kipoi")
    dl_kwargs = model.default_dataloader.example_kwargs
    with cd(model.source_dir):
        ret = model.pipeline.input_grad(dl_kwargs,
                                        final_layer=True,
                                        avg_func="sum")
    assert all(k in ret for k in ['targets', 'metadata', 'inputs', 'grads'])
Exemplo n.º 17
0
def test_parameter_overriding(tmpdir):
    if sys.version_info[0] == 2:
        pytest.skip("example not supported on python 2 ")

    model = "example/models/kipoi_dataloader_decorator"
    m = kipoi.get_model(cp_tmpdir(model, tmpdir), source='dir')
    dl = m.default_dataloader.init_example()
    assert dl.dummy == 10
Exemplo n.º 18
0
def test_predict_to_file(tmpdir):
    h5_tmpfile = str(tmpdir.mkdir("example").join("out.h5"))
    model = kipoi.get_model("Basset", source="kipoi")
    dl_kwargs = model.default_dataloader.example_kwargs
    with cd(model.source_dir):
        model.pipeline.predict_to_file(h5_tmpfile, dl_kwargs)
    preds = kipoi.readers.HDF5Reader.load(h5_tmpfile)
    assert 'preds' in preds
Exemplo n.º 19
0
def test_deeplift():
    # return True
    example = "tal1_model"
    layer = predict_activation_layers[example]
    example_dir = "tests/models/{0}".format(example)
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(example_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    with open(example_dir + "/example_files/test.json", "r") as ifh:
        dataloader_arguments = json.load(ifh)

    for k in dataloader_arguments:
        dataloader_arguments[k] = "example_files/" + dataloader_arguments[k]

    d = DeepLift(model,
                 output_layer=-2,
                 task_idx=0,
                 preact=None,
                 mxts_mode='grad_times_inp')

    new_ofname = model.source_dir + "/example_files/deeplift_grads_pred.hdf5"
    if os.path.exists(new_ofname):
        os.unlink(new_ofname)

    writer = writers.HDF5BatchWriter(file_path=new_ofname)

    with kipoi.utils.cd(model.source_dir):
        dl = Dataloader(**dataloader_arguments)
        it = dl.batch_iter(batch_size=32, num_workers=0)
        # Loop through the data, make predictions, save the output
        for i, batch in enumerate(tqdm(it)):
            # make the prediction
            pred_batch = d.score(batch['inputs'], None)

            # Using Avanti's recommendation to check whether the model conversion has worked.
            pred_batch_fwd = d.predict_on_batch(batch['inputs'])
            orig_pred_batch_fwd = model.predict_on_batch(batch['inputs'])
            assert np.all(pred_batch_fwd == orig_pred_batch_fwd)

        output_batch = batch
        output_batch["input_grad"] = pred_batch
        writer.batch_write(output_batch)
    writer.close()

    new_res = readers.HDF5Reader.load(new_ofname)
    ref_res = readers.HDF5Reader.load(model.source_dir +
                                      "/example_files/grads.hdf5")
    assert np.all(
        np.isclose(new_res['input_grad'],
                   (ref_res['inputs'] * ref_res['grads'])))

    if os.path.exists(new_ofname):
        os.unlink(new_ofname)
Exemplo n.º 20
0
 def __init__(self,
              model_name,
              model_base_dir="DeepBind/Homo_sapiens/TF/",
              snp_vcf_path='../data/snp_vcfs'):
     self.model_name = model_name
     self.model_path = model_base_dir + model_name
     print(self.model_path)
     self.model = kipoi.get_model(self.model_path)
     self.snp_vcf_path = snp_vcf_path
Exemplo n.º 21
0
 def __init__(self):
     self.model_names = read_txt("models.txt")
     # hard-code the path to this models
     # if we'd use `source='dir'`, then the models wouldn't
     # be updated
     self.models = [
         kipoi.get_model("CpGenie/{0}".format(m),
                         source='kipoi',
                         with_dataloader=False) for m in self.model_names
     ]
Exemplo n.º 22
0
 def __init__(self):
     self.don_model = load_don_model()
     self.acc_model = load_acc_model()
     self.features_metadata = load_features_metadata()
     # acceptor and donor site indexes are unified across SOI
     # NB! This indexes are pos=1 of the region, and index-1 is already pos=-1, not 0!
     self.don_i = 3
     self.acc_i = -21
     self.labranchor = kipoi.get_model("labranchor", with_dataloader=False)
     # add current dir to python path for multiprocessing
     sys.path.append(this_dir)
Exemplo n.º 23
0
 def __init__(self, acc_model, don_model, features_path=None):
     self.don_model = joblib.load(don_model)
     self.acc_model = joblib.load(acc_model)
     if features_path is None:
         features_path = os.path.join(this_dir, "../features.json")
     self.features_metadata = read_json(features_path)
     # acceptor and donor site indexes are unified across SOI
     # NB! This indexes are pos=1 of the region, and index-1 is already pos=-1, not 0!
     self.don_i = 3
     self.acc_i = -21
     self.labranchor = kipoi.get_model("labranchor", with_dataloader=False)
     # add current dir to python path for multiprocessing
     sys.path.append(this_dir)
Exemplo n.º 24
0
    def __init__(self, model_name):
        self.model_description = kipoi.get_model_descr(model_name)

        if not is_sequence_model(self.model_description):
            raise ValueError(
                f'Model {model_name} is not a sequence model.'
                f'Its dataloader is: {self.model_description.default_dataloader()}'
            )

        self.model = kipoi.get_model(model_name, with_dataloader=False)
        self.transform = helpers.get_transform(
            **self.model_description.default_dataloader.default_args)
        self.sequence_length = helpers.get_sequence_length(model_name)
Exemplo n.º 25
0
def test_mutation_map():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")

    # Take the rbp model
    model_dir = "examples/rbp/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
    }
    dataloader_arguments = {
        k: model_dir + v
        for k, v in dataloader_arguments.items()
    }
    #
    # Run the actual predictions
    vcf_path = model_dir + "example_files/first_variant.vcf"
    #
    model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
        model, Dataloader)
    vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg(
        model_info)
    mdmm = mm._generate_mutation_map(
        model,
        Dataloader,
        vcf_path,
        dataloader_args=dataloader_arguments,
        evaluation_function=analyse_model_preds,
        batch_size=32,
        vcf_to_region=vcf_to_region,
        evaluation_function_kwargs={'diff_types': {
            'diff': Diff("mean")
        }})
    with cd(model.source_dir):
        mdmm.save_to_file("example_files/first_variant_mm_totest.hdf5")
        from kipoi.postprocessing.variant_effects.utils.generic import read_hdf5
        reference = read_hdf5("example_files/first_variant_mm.hdf5")
        obs = read_hdf5("example_files/first_variant_mm.hdf5")
        compare_rec(reference[0], obs[0])
        import matplotlib
        matplotlib.pyplot.switch_backend('agg')
        mdmm.plot_mutmap(0, "seq", "diff", "rbp_prb")
        os.unlink("example_files/first_variant_mm_totest.hdf5")
Exemplo n.º 26
0
def test_var_eff_pred_varseq():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    model_dir = "examples/var_seqlen_model/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)
    #
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
        "intervals_file": "example_files/variant_centered_intervals.tsv"
    }
    vcf_path = "example_files/variants.vcf"
    out_vcf_fpath = "example_files/variants_generated.vcf"
    ref_out_vcf_fpath = "example_files/variants_ref_out.vcf"
    #
    with cd(model.source_dir):
        vcf_path = kipoi.postprocessing.variant_effects.ensure_tabixed_vcf(
            vcf_path)
        model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
            model, Dataloader)
        writer = kipoi.postprocessing.variant_effects.VcfWriter(
            model, vcf_path, out_vcf_fpath)
        vcf_to_region = None
        with pytest.raises(Exception):
            # This has to raise an exception as the sequence length is None.
            vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg(
                model_info)
        res = sp.predict_snvs(
            model,
            Dataloader,
            vcf_path,
            dataloader_args=dataloader_arguments,
            evaluation_function=analyse_model_preds,
            batch_size=32,
            vcf_to_region=vcf_to_region,
            evaluation_function_kwargs={'diff_types': {
                'diff': Diff("mean")
            }},
            sync_pred_writer=writer)
        writer.close()
        # pass
        # assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath)
        compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath)
        os.unlink(out_vcf_fpath)
Exemplo n.º 27
0
def test_var_eff_pred2():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    # Take the rbp model
    model_dir = "examples/rbp/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)
    #
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
    }
    #
    # Run the actual predictions
    vcf_path = "example_files/variants.vcf"
    out_vcf_fpath = "example_files/variants_generated2.vcf"
    ref_out_vcf_fpath = "example_files/variants_ref_out2.vcf"
    restricted_regions_fpath = "example_files/restricted_regions.bed"
    #
    with cd(model.source_dir):
        pbd = pb.BedTool(restricted_regions_fpath)
        model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
            model, Dataloader)
        vcf_to_region = kipoi.postprocessing.variant_effects.SnvPosRestrictedRg(
            model_info, pbd)
        writer = kipoi.postprocessing.variant_effects.utils.io.VcfWriter(
            model, vcf_path, out_vcf_fpath)
        res = sp.predict_snvs(
            model,
            Dataloader,
            vcf_path,
            dataloader_args=dataloader_arguments,
            evaluation_function=analyse_model_preds,
            batch_size=32,
            vcf_to_region=vcf_to_region,
            evaluation_function_kwargs={'diff_types': {
                'diff': Diff("mean")
            }},
            sync_pred_writer=writer)
        writer.close()
        # pass
        #assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath)
        compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath)
        os.unlink(out_vcf_fpath)
Exemplo n.º 28
0
def test_ref_seq():
    model_root = "./"
    model_dir = model_root+"./"
    model = kipoi.get_model(model_dir, source="dir")

    ifh = h5py.File("test_files/encode_roadmap_short.h5", "r")
    ref_in  = ifh["test_in"][:]
    ifh.close()
    ref_in = np.swapaxes(ref_in, 2,3)
    res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/encode_roadmap_short_pred.txt", "\t", header=None)

    for i in tqdm(range(int(np.ceil(ref_in.shape[0]/32)))):
        irange = [i*32, min((i+1)*32, ref_in.shape[0])]
        preds = model.predict_on_batch(ref_in[irange[0]:irange[1],...])
        assert np.isclose(preds, res_orig.values[irange[0]:irange[1],...], atol=1e-3).all()
Exemplo n.º 29
0
def test_ref_seq():
    ### Get pure fasta predictions
    model_dir = model_root+"./"
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    dataloader_arguments = {
        "fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa",
        "intervals_file": "test_files/encode_roadmap.bed"
    }
    # predict using results
    preds = model.pipeline.predict(dataloader_arguments)
    #
    res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/encode_roadmap_short_pred.txt", "\t", header=None)
    assert np.isclose(preds, res_orig.values, atol=1e-3).all()
Exemplo n.º 30
0
def test_gradient_function_model(example):
    """Test extractor
    """
    if example == "rbp" and sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")

    import keras
    backend = keras.backend._BACKEND
    if backend == 'theano' and example == "rbp":
        pytest.skip("extended_coda example not with theano ")
    #
    example_dir = "examples/{0}".format(example)
    # install the dependencies
    # - TODO maybe put it implicitly in load_dataloader?
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)
    #
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    test_kwargs = get_test_kwargs(example_dir)
    #
    # install the dependencies
    # - TODO maybe put it implicitly in load_extractor?
    if INSTALL_REQ:
        install_model_requirements(example_dir, source="dir")
    #
    # get model
    model = kipoi.get_model(example_dir, source="dir")
    #
    with cd(example_dir + "/example_files"):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)
        #
        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
        if backend != 'theano':
            model.input_grad(batch["inputs"],
                             Slice_conv()[:, 0],
                             pre_nonlinearity=True)
        model.input_grad(batch["inputs"],
                         Slice_conv()[:, 0],
                         pre_nonlinearity=False)
        model.input_grad(batch["inputs"], 0,
                         pre_nonlinearity=False)  # same as Slice_conv()[:, 0]
        model.input_grad(batch["inputs"], avg_func="sum")