示例#1
0
def install_dataloader_requirements(dataloader, source="kipoi"):
    """Install dataloader dependencies

    # Arguments
        datalaoder (str): dataloader name
        source (str): model source
    """
    kipoi.get_source(source).get_model_descr(dataloader).dependencies.install()
示例#2
0
def test_generate_env_db_entry():
    # test in general and test whether the automatic generation of sub-models works, also in combination
    # with a clearly defined model
    import yaml
    import kipoi
    import time
    from kipoi.cli.parser_utils import parse_source_name
    kwargs = {
        "dataloader": [],
        "env": "test_env",
        "gpu": True,
        "model": None,
        "source": "dir",
        "tmpdir": "something",
        "vep": True
    }
    source_path = kipoi.get_source("dir").local_path
    kipoi_path = kipoi.get_source("kipoi").local_path
    for model in [["example/models/pyt"],
                  [
                      "example/models/shared/envs/kipoi-py3-keras1.2",
                      "example/models/pyt"
                  ]]:
        kwargs['model'] = model
        db_entry = generate_env_db_entry(get_args(kwargs)())
        assert all(
            [kwargs[k] == getattr(db_entry.create_args, k) for k in kwargs])

        # generate the reference output
        special_envs, only_models = split_models_special_envs(model)
        sub_models = []
        for model in only_models:
            parsed_source, parsed_model = parse_source_name(
                kwargs["source"], model)
            sub_models.extend([
                os.path.join(source_path, e) for e in list_subcomponents(
                    parsed_model, parsed_source, "model")
            ])
        if len(special_envs) != 0:
            with open("example/models/shared/envs/models.yaml", "r") as fh:
                special_env_models = yaml.load(fh)
            for special_env in special_envs:
                for model_group_name in special_env_models[os.path.basename(
                        special_env)]:
                    sub_models.extend([
                        os.path.join(kipoi_path,
                                     e) for e in list_subcomponents(
                                         model_group_name, "kipoi", "model")
                    ])

        assert set(db_entry.compatible_models) == set(sub_models)
        assert db_entry.cli_path is None
        assert db_entry.successful == False
        assert db_entry.kipoi_version == kipoi.__version__
        assert db_entry.timestamp < time.time()
示例#3
0
def list_groups(group_name=None):
    """ Group list view """
    source = current_app.config['SOURCE']
    if group_name is None:
        group_name = ""
    group_name = group_name.rstrip('/')
    group_df = get_model_groups(source, group_name)
    group_list = group_df.to_dict(orient='records')
    # parse cite_as
    group_list = [update_cite_as_dict(x) for x in group_list]

    # update contributors
    group_list = [update_contributors_as_dict(x) for x in group_list]

    # update authors
    group_list = [update_authors_as_dict(x) for x in group_list]

    # get readme file
    readme_dir = os.path.join(
        kipoi.get_source(current_app.config['SOURCE']).local_path, group_name)
    try:
        # python doesnt handle case sensetive path. so:
        filelists = os.listdir(readme_dir)
        readmeindx = [x.lower() for x in filelists].index("readme.md")
        filecontent = open(os.path.join(readme_dir, filelists[readmeindx]),
                           "r").read()
        readmecontent = render_markdown(filecontent)
    except IOError:
        readmecontent = ""
    except ValueError:
        readmecontent = ""

    return render_template("models/index_groups.html",
                           groups=group_list,
                           readmecontent=readmecontent)
示例#4
0
def cli_get_example(command, raw_args):
    """Downloads the example files to the desired directory
    """
    assert command == "get-example"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Get example files')
    add_model(parser, source="kipoi")
    parser.add_argument("-o", "--output", default="example", required=False,
                        help="Output directory where to store the examples. Default: 'example'")
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
        with cd(src.get_model_dir(args.model)):
            dl_descr = md.default_dataloader.get()
    else:
        # load from directory
        # attach the default dataloader already to the model
        dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader),
                                              source=args.source)

    kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False)

    logger.info("Example files downloaded to: {}".format(args.output))
    logger.info("use the following dataloader kwargs:")
    print(json.dumps(kwargs))
示例#5
0
def get_environments(source):
    """Cache for kipoi's environments"""
    import os
    from kipoi.utils import read_yaml
    src = kipoi.get_source(source)
    environments = read_yaml(
        os.path.join(src.local_path, 'shared/envs/models.yaml'))
    return environments
示例#6
0
def test_list_models_group():
    dfg = kipoi.get_source("kipoi").list_models_by_group()
    dfg_columns = ["group", "N_models", "N_subgroups", "is_group", "authors",
                   "contributors",
                   "veff_score_variants",
                   "type", "license", "cite_as", "tags"]
    assert dfg_columns == list(dfg.columns)
    assert len(dfg) > 0
    assert dfg.group.str.contains("^CpGenie$").sum() == 1
示例#7
0
def get_dataloader_descr(model_name, source):
    from kipoi.utils import cd
    src = kipoi.get_source(source)
    md = kipoi.get_model_descr(model_name, source=source)
    if isinstance(md.default_dataloader, str):
        dl_path = os.path.join(model_name, md.default_dataloader)
        return kipoi.get_dataloader_descr(dl_path, source=source)
    else:
        with cd(src.get_model_dir(model_name)):
            return md.default_dataloader.get()
示例#8
0
def container_remote_url(model, source='kipoi'):
    src = get_source(source)
    singularity_container_json = os.path.join(src.local_path, CONTAINER_PREFIX, "model-to-singularity.json")
    with open(singularity_container_json, 'r') as singularity_container_json_filehandle:
        model_to_singularity_container_dict = json.load(singularity_container_json_filehandle)
    if model in model_to_singularity_container_dict: # Exact match such as MMSplice/mtsplice and APARENT/veff, Basset
        return model_to_singularity_container_dict[model]
    elif model.split('/')[0] in model_to_singularity_container_dict:
        return model_to_singularity_container_dict[model.split('/')[0]]
    else:
        return {}
示例#9
0
def generate_env_db_entry(args, args_env_overload=None):
    from collections import OrderedDict
    from kipoi.conda.env_db import EnvDbEntry
    from kipoi.conda import get_conda_version

    special_envs, only_models = split_models_special_envs(args.model)

    sub_models = []
    for model in only_models:
        parsed_source, parsed_model = parse_source_name(args.source, model)
        source_path = kipoi.get_source(parsed_source).local_path
        models = list_subcomponents(parsed_model, parsed_source, "model")
        sub_models.extend([os.path.join(source_path, m) for m in models])

    if len(special_envs) != 0:
        # for the special envs load the corresponding models:
        for special_env in special_envs:
            special_env_folder = "/".join(
                special_env.rstrip("/").split("/")[:-1])
            source_path = kipoi.get_source(args.source).local_path
            with open(
                    os.path.join(source_path, special_env_folder,
                                 "models.yaml"), "r") as fh:
                special_env_models = yaml.load(fh)
            # extend the sub_models by all the submodels covered by the handcrafted environments (special_envs)
            # Those models **always** refer to the kipoi source
            for model_group_name in special_env_models[os.path.basename(
                    special_env)]:
                source_path = kipoi.get_source("kipoi").local_path
                models = list_subcomponents(model_group_name, "kipoi", "model")
                sub_models.extend(
                    [os.path.join(source_path, m) for m in models])

    entry = EnvDbEntry(conda_version=get_conda_version(),
                       kipoi_version=kipoi.__version__,
                       timestamp=time.time(),
                       compatible_models=sub_models,
                       create_args=OrderedDict(args._get_kwargs()))
    if args_env_overload is not None:
        entry.create_args.env = args_env_overload
    return entry
示例#10
0
def test_list_softlink_dependencies():
    """Test if finding model dependencies works
    """
    component_dir = kipoi.get_source("kipoi").local_path
    deps = list_softlink_dependencies(os.path.join(component_dir, 'HAL'),
                                      component_dir)
    # one of these two, depending on the model source
    assert (deps == {'MaxEntScan'}) or (deps == {'MaxEntScan/template',
                                                 'MaxEntScan/template/example_files',
                                                 'labranchor/example_files'})
    assert list_softlink_dependencies(os.path.join(component_dir, 'deepTarget'),
                                      component_dir) == set()
示例#11
0
def install_model_requirements(model, source="kipoi", and_dataloaders=True):
    md = kipoi.get_source(source).get_model_descr(model)
    md.dependencies.install()
    if and_dataloaders:
        if ":" in md.default_dataloader:
            dl_source, dl_path = md.default_dataloader.split(":")
        else:
            dl_source = source
            dl_path = md.default_dataloader

        default_dataloader_path = os.path.join("/" + model, dl_path)[1:]
        dl = kipoi.config.get_source(dl_source).get_dataloader_descr(
            default_dataloader_path)
        dl.dependencies.install()
示例#12
0
def all_urls():
    df = kipoi.get_source("kipoi").list_models()
    model = df.model
    urls = set()
    for m in model:
        while m:
            urls.add(m)
            m = os.path.dirname(m)
    groups = {x for x in urls if get_view(x, df)[0] == "group_list"}
    # exclude the final models
    groups = groups - set(model)

    return ["/", "/groups/"] + ["/groups/{0}/".format(x) for x in groups
                                ] + ["/models/{0}/".format(x) for x in urls]
示例#13
0
def get_envs_by_model(models, source, only_most_recent=True, only_valid=False):
    source_path = kipoi.get_source(source).local_path
    entries = []
    db = get_model_env_db()
    for m in models:
        res = db.get_entry_by_model(os.path.join(source_path, m),
                                    only_most_recent=only_most_recent,
                                    only_valid=only_valid)
        if only_most_recent:
            entries.append(res)
        else:
            entries.extend(res)
    entries = [e for e in entries if e is not None]
    return entries
示例#14
0
文件: sources.py 项目: yynst2/kipoi
def list_subcomponents(component, source, which="model"):
    """List all the available submodels

    Args:
      model: model name or a subname: e.g. instaead of
        Model1/CTCF we can give Model1 and then all the sub-models would be included
      source: model source
    """
    src = kipoi.get_source(source)
    if src._is_component(component, which):
        return [component]
    else:
        return [x for x in src._list_components(which)
                if x.startswith(component) and "/template" not in x]
示例#15
0
def cli_ls(command, raw_args):
    """List all kipoi models
    """
    assert command == "ls"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Lists available models")
    parser.add_argument("group_filter", nargs='?', default='',
                        help="A relative path to the model group used to subset the model list. Use 'all' to show all models")
    parser.add_argument("--tsv", action='store_true',
                        help="Print the output in the tsv format.")
    add_source(parser)
    args = parser.parse_args(raw_args)
    grp = kipoi.get_source(args.source)
    df = grp.list_models()
    ls_helper(df, args.group_filter, args.tsv)
示例#16
0
def test_list_softlink_dependencies():
    """Test if finding model dependencies works
    """
    component_dir = kipoi.get_source("kipoi").local_path
    assert list_softlink_dependencies(
        os.path.join(component_dir, 'rbp_eclip/UPF1'),
        component_dir) == {'rbp_eclip/template'}
    assert list_softlink_dependencies(os.path.join(component_dir, 'HAL'),
                                      component_dir) == {
                                          'MaxEntScan/template',
                                          'MaxEntScan/template/example_files',
                                          'labranchor/example_files'
                                      }
    assert list_softlink_dependencies(
        os.path.join(component_dir, 'deepTarget'), component_dir) == set()
示例#17
0
文件: env.py 项目: VolkerH/kipoi
def get_envs_by_model(models, source, only_most_recent=True, only_valid=False):
    if isinstance(models, str):
        models = [models]

    source_path = kipoi.get_source(source).local_path
    entries = []
    db = env_db.get_model_env_db()
    for m in models:
        res = db.get_entry_by_model(_env_db_model_name(source, m), only_most_recent=only_most_recent,
                                    only_valid=only_valid)
        if only_most_recent:
            entries.append(res)
        else:
            entries.extend(res)
    entries = [e for e in entries if e is not None]
    return entries
示例#18
0
def test_env_db_kipoi(tmpdir, monkeypatch):
    # Test the kipoi vs. dir path ambiguation
    # Test the DeepSEA model using the `kipoi` and the `dir` sources
    # Test the `shared/envs/kipoi-py3-keras1.2.yaml` model using the `kipoi` and the `dir` sources
    json_file = os.path.join(str(tmpdir), "db.json")
    sample_cli_path = os.path.join(str(tmpdir), "sample")
    with open(sample_cli_path, "w") as fh:
        fh.write("")

    db = EnvDb(json_file)
    kwargs = {"dataloader": [], "gpu": True, "model": None, "source": "kipoi",
              "tmpdir": "something", "vep": True}

    # generate the kipoi entries
    kipoi_entries = []
    for model in [["DeepSEA"], ["shared/envs/kipoi-py3-keras1.2"]]:
        kwargs['model'] = model
        db_entry = generate_env_db_entry(get_args(kwargs)())
        db.append(db_entry)
        kipoi_entries.append(db_entry)

    # generate the kipoi entries
    dir_entries = []
    local_path = kipoi.get_source("dir").local_path
    kwargs["source"] = "dir"
    for model in [["example/models/pyt"], ["example/models/shared/envs/kipoi-py3-keras1.2"]]:
        kwargs['model'] = [os.path.join(local_path,model[0])]
        db_entry = generate_env_db_entry(get_args(kwargs)())
        db.append(db_entry)
        dir_entries.append(db_entry)

    # make sure there is no mixup between the kipoi and dir models and make sure the full path is only used
    # for dir models

    assert db.get_entry_by_model("DeepSEA", only_most_recent=False) == [kipoi_entries[0]]
    assert db.get_entry_by_model("CpGenie/merged", only_most_recent=False) == [dir_entries[1], kipoi_entries[1]]
    assert db.get_entry_by_model(os.path.join(local_path, "example/models/pyt"),
                                 only_most_recent=False) == [dir_entries[0]]

    # monkeypatch the get_model_env_db()
    monkeypatch.setattr(kipoi.conda.env_db, 'get_model_env_db', lambda: db)

    assert get_envs_by_model(['DeepSEA'], "kipoi", only_most_recent=False, only_valid=False) == [kipoi_entries[0]]
    assert get_envs_by_model(["CpGenie/merged"], "kipoi", only_most_recent=False,
                             only_valid=False) == [dir_entries[1],kipoi_entries[1]]
    assert get_envs_by_model(["example/models/pyt"], "dir", only_most_recent=False,
                             only_valid=False) == [dir_entries[0]]
示例#19
0
def get_dataloader_descr(model_name, source='kipoi'):
    """Not yet nicely integrated with Kipoi

    Args:
      model_name: model name as a string

    Returns:
      (model output schema, list of required files)
    """
    dl_skip_arguments = {
        "kipoiseq.dataloaders.SeqIntervalDl":
        ['alphabet_axis', 'dummy_axis', 'alphabet', 'dtype']
    }
    md = kipoi.get_model_descr(model_name)
    src = kipoi.get_source(source)

    # get dataloader
    if isinstance(md.default_dataloader, str):
        dataloader = kipoi.get_dataloader_descr(os.path.join(
            model_name, md.default_dataloader),
                                                source=source)
        dataloader_name = md.default_dataloader
        dataloader_args = dataloader.args
    else:
        with cd(src.get_model_dir(model_name)):
            dataloader = md.default_dataloader.get()
        dataloader_name = md.default_dataloader.defined_as
        dataloader_args = OrderedDict([
            (k, v) for k, v in dataloader.args.items()
            if k not in list(md.default_dataloader.default_args) +
            dl_skip_arguments.get(dataloader_name, [])
        ])

        if md.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl':
            # HACK - cleanup some values for SeqIntervalDl
            if md.default_dataloader.default_args.get("ignore_targets", False):
                dataloader_args.pop('label_dtype', None)

    required_files = []
    if 'fasta_file' in dataloader.args:
        required_files.append("fasta_file")
    if 'gtf_file' in dataloader.args:
        required_files.append("gtf_file")

    return get_output_schema(md.schema.targets), required_files
示例#20
0
def cli_info(command, raw_args):
    """CLI interface to predict
    """
    assert command == "info"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Prints dataloader" +
                                     " keyword arguments.")
    add_model(parser)
    add_dataloader(parser, with_args=False)
    args = parser.parse_args(raw_args)

    # --------------------------------------------
    # load model & dataloader
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    try:
        if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
            with cd(src.get_model_dir(args.model)):
                dl_descr = md.default_dataloader.get()
        else:
            # load from directory
            # attach the default dataloader already to the model
            dl_descr = kipoi.get_dataloader_descr(os.path.join(
                args.model, md.default_dataloader),
                                                  source=args.source)
    # if kipoiseq is not installed you get an ImportError
    except ImportError:
        dl_descr = None

    print("-" * 80)
    print("'{0}' from source '{1}'".format(str(args.model), str(args.source)))
    print("")
    print("Model information")
    print("-----------")
    print(md.info.get_config_as_yaml())
    if dl_descr:
        print("Dataloader arguments")
        print("--------------------")
        dl_descr.print_args()
    print("--------------------\n")
    print("Run `kipoi get-example {} -o example` to download example files.\n".
          format(args.model))
示例#21
0
def install_model_requirements(model, source="kipoi", and_dataloaders=True):
    """Install model dependencies

    # Arguments
        model (str): model name
        source (str): model source
        and_dataloaders (bool): if True, install also the dependencies
            for the default dataloader
    """
    md = kipoi.get_source(source).get_model_descr(model)
    md.dependencies.install()
    if and_dataloaders:
        if ":" in md.default_dataloader:
            dl_source, dl_path = md.default_dataloader.split(":")
        else:
            dl_source = source
            dl_path = md.default_dataloader

        default_dataloader_path = os.path.join("/" + model, dl_path)[1:]
        dl = kipoi.config.get_source(dl_source).get_dataloader_descr(
            default_dataloader_path)
        dl.dependencies.install()
示例#22
0
文件: env.py 项目: k3nnywilliam/kipoi
def _env_db_model_name(source, model):
    ret = model
    if source != "kipoi":
        source_path = kipoi.get_source(source).local_path
        ret = os.path.join(source_path, model)
    return ret
示例#23
0
文件: env.py 项目: k3nnywilliam/kipoi
def merge_deps(models,
               dataloaders=None,
               source="kipoi",
               vep=False,
               interpret=False,
               gpu=False):
    """Setup the dependencies
    """

    special_envs, only_models = split_models_special_envs(models)
    deps = Dependencies()

    # Treat the handcrafted environments differently
    for special_env in special_envs:
        from related import from_yaml
        logger.info("Loading environment definition: {0}".format(special_env))

        # Load and merge the handcrafted deps.
        yaml_path = os.path.join(
            kipoi.get_source(source).local_path, special_env + ".yaml")

        if not os.path.exists(yaml_path):
            raise ValueError(
                "Environment definition file {0} not found in source {1}".
                format(yaml_path, source))

        with open(yaml_path, "r", encoding="utf-8") as fh:
            special_env_deps = Dependencies.from_env_dict(from_yaml(fh))
        deps = deps.merge(special_env_deps)

    for model in only_models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)

        sub_models = list_subcomponents(parsed_model, parsed_source, "model")
        if len(sub_models) == 0:
            raise ValueError("Model {0} not found in source {1}".format(
                parsed_model, parsed_source))
        if len(sub_models) > 1:
            logger.info(
                "Found {0} models under the model name: {1}. Merging dependencies for all"
                .format(len(sub_models), parsed_model))

        for sub_model in sub_models:
            model_descr = kipoi.get_model_descr(sub_model, parsed_source)
            model_dir = kipoi.get_source(parsed_source).get_model_dir(
                sub_model)
            deps = deps.merge(model_descr.dependencies)

            # handle the dataloader=None case
            if dataloaders is None or not dataloaders:
                if isinstance(model_descr.default_dataloader,
                              DataLoaderImport):
                    # dataloader specified by the import
                    deps = deps.merge(
                        model_descr.default_dataloader.dependencies)
                    if model_descr.default_dataloader.parse_dependencies:
                        # add dependencies specified in the yaml file
                        # load from the dataloader description if you can
                        try:
                            with cd(model_dir):
                                dataloader_descr = model_descr.default_dataloader.get(
                                )
                            deps = deps.merge(dataloader_descr.dependencies)
                        except ImportError as e:
                            # package providing the dataloader is not installed yet
                            if model_descr.default_dataloader.defined_as.startswith(
                                    "kipoiseq."):
                                logger.info(
                                    "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                                deps = deps.merge(KIPOISEQ_DEPS)
                            else:
                                logger.warning(
                                    "Unable to extract dataloader description. "
                                    "Make sure the package containing the dataloader `{}` is installed"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                else:
                    dataloader = os.path.normpath(
                        os.path.join(sub_model,
                                     str(model_descr.default_dataloader)))
                    logger.info("Inferred dataloader name: {0} from".format(
                        dataloader) + " the model.")
                    dataloader_descr = kipoi.get_dataloader_descr(
                        dataloader, parsed_source)
                    deps = deps.merge(dataloader_descr.dependencies)
    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(
                source, dataloader)
            sub_dataloaders = list_subcomponents(parsed_dataloader,
                                                 parsed_source, "dataloader")
            if len(sub_dataloaders) == 0:
                raise ValueError(
                    "Dataloader: {0} not found in source {1}".format(
                        parsed_dataloader, parsed_source))

            if len(sub_dataloaders) > 1:
                logger.info(
                    "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all"
                    .format(len(sub_dataloaders), parsed_dataloader))
            for sub_dataloader in sub_dataloaders:
                dataloader_descr = kipoi.get_dataloader_descr(
                    sub_dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if interpret:
        # add vep dependencies
        logger.info("Adding the interpret dependencies")
        deps = INTERPRET_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps
示例#24
0
def cli_test(command, raw_args):
    """Runs test on the model
    """
    assert command == "test"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='script to test model zoo submissions. Example usage:\n'
                                     '`kipoi test model/directory`, where `model/directory` is the '
                                     'path to a directory containing a model.yaml file.')
    add_model(parser, source="dir")
    parser.add_argument('--batch_size', type=int, default=32,
                        help='Batch size to use in prediction')
    parser.add_argument("-o", "--output", default=None, required=False,
                        help="Output hdf5 file")
    parser.add_argument("-s", "--skip-expect", action='store_true',
                        help="Skip validating the expected predictions if test.expect field is specified under model.yaml")
    parser.add_argument("-e", "--expect", default=None,
                        help="File path to the hdf5 file of predictions produced by kipoi test -o file.h5 "
                        "or kipoi predict -o file.h5 --keep_inputs. Overrides test.expect in model.yaml")
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    mh = kipoi.get_model(args.model, args.source)

    if not mh._sufficient_deps(mh.dependencies):
        # model requirements should be installed
        logger.warning("Required package '{0}' for model type: {1} is not listed in the dependencies".
                    format(mh.MODEL_PACKAGE, mh.type))

    # Load the test files from model source
    mh.pipeline.predict_example(batch_size=args.batch_size, output_file=args.output)

    if (mh.test.expect is not None or args.expect is not None) \
            and not args.skip_expect and args.output is None:
        if args.expect is not None:
            # `expect` specified from the CLI
            expect = args.expect
        else:
            # `expect` taken from model.yaml
            if isinstance(mh.test.expect, kipoi.specs.RemoteFile):
                # download the file
                output_dir = kipoi.get_source(args.source).get_model_download_dir(args.model)
                makedir_exist_ok(output_dir)
                mh.test.expect = mh.test.expect.get_file(os.path.join(output_dir, 'test.expect.h5'))
            expect = mh.test.expect
        logger.info('Testing if the predictions match the expected ones in the file: {}'.format(expect))
        logger.info('Desired precision (number of matching decimal places): {}'.format(mh.test.precision_decimal))

        # iteratively load the expected file
        expected = kipoi.readers.HDF5Reader(expect)
        expected.open()
        it = expected.batch_iter(batch_size=args.batch_size)
        for i, batch in enumerate(tqdm(it, total=len(expected) // args.batch_size)):
            if i == 0 and ('inputs' not in batch or 'preds' not in batch):
                raise ValueError("test.expect file requires 'inputs' and 'preds' "
                                 "to be specified. Available keys: {}".format(list(expected)))
            pred_batch = mh.predict_on_batch(batch['inputs'])
            # compare to the predictions
            # import ipdb
            # ipdb.set_trace()
            try:
                compare_numpy_dict(pred_batch, batch['preds'], exact=False, decimal=mh.test.precision_decimal)
            except Exception as e:
                logger.error("Model predictions don't match the expected predictions."
                             "expected: {}\nobserved: {}. Exception: {}".format(batch['preds'], pred_batch, e))
                expected.close()
                sys.exit(1)
        expected.close()
        logger.info('All predictions match')
    logger.info('Successfully ran test_predict')
示例#25
0
文件: env.py 项目: rraadd88/kipoi
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False):
    """Setup the dependencies
    """
    deps = Dependencies()
    for model in models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)

        sub_models = list_subcomponents(parsed_model, parsed_source, "model")
        if len(sub_models) == 0:
            raise ValueError("Model {0} not found in source {1}".format(
                parsed_model, parsed_source))
        if len(sub_models) > 1:
            logger.info(
                "Found {0} models under the model name: {1}. Merging dependencies for all"
                .format(len(sub_models), parsed_model))

        for sub_model in sub_models:
            model_descr = kipoi.get_model_descr(sub_model, parsed_source)
            model_dir = kipoi.get_source(parsed_source).get_model_dir(
                sub_model)
            deps = deps.merge(model_descr.dependencies)

            # handle the dataloader=None case
            if dataloaders is None or not dataloaders:
                if isinstance(model_descr.default_dataloader,
                              DataLoaderImport):
                    # dataloader specified by the import
                    deps = deps.merge(
                        model_descr.default_dataloader.dependencies)
                    if model_descr.default_dataloader.parse_dependencies:
                        # add dependencies specified in the yaml file
                        # load from the dataloader description if you can
                        try:
                            with cd(model_dir):
                                dataloader_descr = model_descr.default_dataloader.get(
                                )
                            deps = deps.merge(dataloader_descr.dependencies)
                        except ImportError as e:
                            # package providing the dataloader is not installed yet
                            if model_descr.default_dataloader.defined_as.startswith(
                                    "kipoiseq."):
                                logger.info(
                                    "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                                deps = deps.merge(KIPOISEQ_DEPS)
                            else:
                                logger.warn(
                                    "Unable to extract dataloader description. "
                                    "Make sure the package containing the dataloader `{}` is installed"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                else:
                    dataloader = os.path.normpath(
                        os.path.join(sub_model,
                                     str(model_descr.default_dataloader)))
                    logger.info("Inferred dataloader name: {0} from".format(
                        dataloader) + " the model.")
                    dataloader_descr = kipoi.get_dataloader_descr(
                        dataloader, parsed_source)
                    deps = deps.merge(dataloader_descr.dependencies)
    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(
                source, dataloader)
            sub_dataloaders = list_subcomponents(parsed_dataloader,
                                                 parsed_source, "dataloader")
            if len(sub_dataloaders) == 0:
                raise ValueError(
                    "Dataloader: {0} not found in source {1}".format(
                        parsed_dataloader, parsed_source))

            if len(sub_dataloaders) > 1:
                logger.info(
                    "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all"
                    .format(len(sub_dataloaders), parsed_dataloader))
            for sub_dataloader in sub_dataloaders:
                dataloader_descr = kipoi.get_dataloader_descr(
                    sub_dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps
示例#26
0
def test_env_db(tmpdir):
    json_file = os.path.join(str(tmpdir), "db.json")
    sample_cli_path = os.path.join(str(tmpdir), "sample")
    with open(sample_cli_path, "w") as fh:
        fh.write("")

    db = EnvDb(json_file)
    kwargs = {"dataloader": [], "env": "test_env", "gpu": True, "model": None, "source": "dir",
              "tmpdir": "something", "vep": True}

    entries = []
    source_path = kipoi.get_source("dir").local_path
    for model in [["example/models/pyt"], ["example/models/shared/envs/kipoi-py3-keras1.2", "example/models/pyt"]]:
        kwargs['model'] = model
        db_entry = generate_env_db_entry(get_args(kwargs)())
        db.append(db_entry)
        entries.append(db_entry)

    pyt_query_name = os.path.join(source_path, "example/models/pyt")

    assert db.get_entry_by_model(pyt_query_name) == entries[1]
    assert db.get_entry_by_model(pyt_query_name + "_class") is None
    assert db.get_entry_by_model(pyt_query_name, only_most_recent=False) == entries[::-1]

    # test if the viability check is ok:
    entry = db.get_entry_by_model(pyt_query_name)
    entry.successful = True
    entry.cli_path = sample_cli_path
    assert db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True) == [entry]
    entry.successful = False
    assert len(db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True)) == 0
    entry.successful = True
    entry.cli_path = None
    assert len(db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True)) == 0

    db.save()
    del db

    # Test if loading is fine
    db2 = EnvDb(json_file)
    # test dict identity
    assert_rec(db2.get_entry_by_model(pyt_query_name).get_config(), entries[1].get_config())
    assert db2.get_entry_by_model(pyt_query_name + "_class") is None

    del db2

    # Test if bad entries are skipped
    with open(json_file, "r") as fh:
        db_dict = json.load(fh)

    # Add a bad entry:
    new_key = max([int(k) for k in db_dict["_default"]]) + 1
    db_dict["_default"][str(new_key)] = {"conda_version": "conda 4.5.4", "kipoi_version": "0.5.6"}

    with open(json_file, "w") as fh:
        json.dump(db_dict, fh)

    # Check if there is a warning
    # with pytest.warns(UserWarning): # There seems to be a general problem with warnings...
    db_warns = EnvDb(json_file)

    assert len(db_warns.entries) == 2

    # Now save so that the bad entry is be gone
    db_warns.save()
    del db_warns

    # Make sure the bad entry is not there anymore
    with open(json_file, "r") as fh:
        db_dict_recovered = json.load(fh)

    found = 0
    for val in db_dict_recovered['_default'].values():
        found += int(val == db_dict["_default"][str(new_key)])

    assert len(db_dict_recovered["_default"]) == new_key - 1
    assert found == 0

    os.unlink(json_file)
示例#27
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi", help='Which source to test')
    parser.add_argument('--git-range',
                        nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n',
                        '--dry_run',
                        action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b',
                        '--batch_size',
                        default=4,
                        type=int,
                        help='Batch size')
    parser.add_argument('-x',
                        '--exitfirst',
                        action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument(
        '-k',
        default=None,
        help='only run tests which match the given substring expression')
    parser.add_argument('-c',
                        '--clean_env',
                        action='store_true',
                        help='clean the environment after running.')
    parser.add_argument(
        "--vep",
        action=kipoi.cli.main.DeprecateAction,
        help=
        "This argument is deprecated. Please use https://github.com/kipoi/kipoi-veff2 directly"
    )
    parser.add_argument('--common_env',
                        action='store_true',
                        help='Test models in common environments.')
    parser.add_argument('--all',
                        action='store_true',
                        help="Test all models in the source")
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help=
        "Increase output verbosity. Show conda stdout during env installation."
    )
    parser.add_argument('--shard_id', type=int, default=-1, help="Shard id")
    parser.add_argument('--num_of_shards',
                        type=int,
                        default=-1,
                        help="Number of shards")
    parser.add_argument('--singularity',
                        action='store_true',
                        help='Test models within their singularity containers')

    args = parser.parse_args(raw_args)
    if args.singularity and args.source != "kipoi":
        raise IOError(
            "Singularity containers are available for kipoi models only")
    if args.singularity and args.common_env:
        raise IOError("Please use only one of --singularity and --common_env")

    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)

    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.format(
            '\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models, source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.format(
            len(test_models), len(all_models), '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)
    if args.num_of_shards > 0 and args.shard_id >= 0:
        if args.shard_id >= args.num_of_shards:
            logger.info(
                "Shard id is invalid. It should be a value between 0 and {0}.".
                format(args.num_of_shards - 1))
            sys.exit(1)
        else:
            all_test_models = test_models
            sublists = np.array_split(all_test_models, args.num_of_shards)
            list_of_shards = [list(split) for split in sublists]
            test_models = list_of_shards[args.shard_id]

    logger.info(test_models)
    # Parse the repo config
    cfg_path = get_file_path(source.local_path,
                             "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info(
            "-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    # Test common environments
    if args.common_env:
        logger.info("Installing common environmnets")
        import yaml
        models_yaml_path = os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                                        "models.yaml")
        if not os.path.exists(models_yaml_path):
            logger.error(
                "{} doesn't exists when installing the common environment".
                format(models_yaml_path))
            sys.exit(1)
        model_envs = yaml.safe_load(
            open(os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                              "models.yaml"),
                 "r",
                 encoding="utf-8"))

        test_envs = {
            get_common_env(m, model_envs)
            for m in test_models if get_common_env(m, model_envs) is not None
        }

        if len(test_envs) == 0:
            logger.info("No common environments to test")
            sys.exit(0)

        logger.info(
            "Instaling environments covering the following models: \n{}".
            format(yaml.dump(model_envs)))
        for env in test_envs:
            if env_exists(env):
                logger.info(
                    "Common environment already exists: {}. Skipping the installation"
                    .format(env))
            else:
                logger.info("Installing environment: {}".format(env))
                create_model_env(os.path.join(SPECIAL_ENV_PREFIX, env),
                                 args.source, env)

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m))
        print('-' * 20)
        try:
            if not args.common_env and not args.singularity:
                # Prepend "test-" to the standard kipoi env name
                env_name = conda_env_name(m, source=args.source)
                env_name = "test-" + env_name
                # Test
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=True,
                           verbose=args.verbose)
            elif args.singularity and not args.common_env:
                print("Testing within singularity container....")
                test_model_singularity(m,
                                       args.source,
                                       get_batch_size(cfg, m, args.batch_size),
                                       verbose=args.verbose)
            elif args.common_env and not args.singularity:
                # figure out the common environment name
                env_name = get_common_env(m, model_envs)
                if env_name is None:
                    # skip is none was found
                    logger.info(
                        "Common environment not found for {}".format(m))
                    continue
                # ---------------------------
                # Test
                print("test_model...")
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=False,
                           verbose=args.verbose)
            else:
                raise IOError(
                    "Please either choose --common_env or --singularity or none"
                )
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env and not args.common_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env and not args.common_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".format(
            len(failed_models), len(test_models), "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))
示例#28
0
文件: env.py 项目: k3nnywilliam/kipoi
def cli_create(cmd, raw_args):
    """Create a conda environment for a model
    """
    from kipoi_conda import get_kipoi_bin
    import uuid
    parser = argparse.ArgumentParser(
        'kipoi env {}'.format(cmd),
        description='Create a conda environment for a specific model.')
    add_env_args(parser)
    parser.add_argument(
        '-e',
        '--env',
        default=None,
        help="Special environment name. default: kipoi-<model>[-<dataloader>]")
    parser.add_argument('--dry-run',
                        action='store_true',
                        help="Don't actually create the environment")
    parser.add_argument(
        '-t',
        '--tmpdir',
        default=None,
        help=
        ("Temporary directory path where to create the conda environment file"
         "Defaults to /tmp/kipoi/envfiles/<uuid>/"))
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="Increase output verbosity. Show conda stdout.")
    args = parser.parse_args(raw_args)

    # create the tmp dir
    if args.tmpdir is None:
        tmpdir = "/tmp/kipoi/envfiles/" + str(uuid.uuid4())[:8]
    else:
        tmpdir = args.tmpdir
    if not os.path.exists(tmpdir):
        os.makedirs(tmpdir)

    # write the env file
    logger.info("Writing environment file: {0}".format(tmpdir))

    if args.model == ['all']:
        from kipoi.cli.source_test import get_common_env
        src = kipoi.get_source(args.source)
        model_envs = yaml.safe_load(
            open(
                os.path.join(src.local_path, SPECIAL_ENV_PREFIX,
                             "models.yaml")))

        # TODO - test this by mocking up the CLI command execution

        # setup the args for all the models
        df = kipoi.list_models()
        dfg = list_models_by_group(df, "")
        for model_group in dfg.group.unique().tolist():
            existing_envs = get_envs_by_model(model_group,
                                              args.source,
                                              only_valid=True)
            if existing_envs or existing_envs is None:
                # No need to create the environment
                existing_envs_str = "\n".join(
                    [e.create_args.env for e in existing_envs])
                logger.info(
                    "Environment for {} already exists ({}). Skipping installation"
                    .format(model_group, existing_envs_str))
                continue

            logger.info(
                "Environment doesn't exists for model group {}. Installing it".
                format(model_group))

            # Figure out which <model> to use for installation
            common_env = get_common_env(model_group, model_envs)
            if common_env is not None:
                # common environment exists for the model. Use it
                logger.info("Using common environment: {}".format(common_env))
                model_group = os.path.join(SPECIAL_ENV_PREFIX, common_env)

            # Run cli_create
            def optional_replace(x, ref, alt):
                if x == ref:
                    return alt
                else:
                    return x

            new_raw_args = [
                optional_replace(x, 'all', model_group) for x in raw_args
                if x is not None
            ]
            cli_create(cmd, new_raw_args)
        logger.info("Done installing all environments!")
        return None

    env, env_file = export_env(args.model,
                               args.dataloader,
                               args.source,
                               env_file=None,
                               env_dir=tmpdir,
                               env=args.env,
                               vep=args.vep,
                               interpret=args.interpret,
                               gpu=args.gpu)

    if not args.dry_run:
        env_db_entry = generate_env_db_entry(args, args_env_overload=env)
        envdb = get_model_env_db()
        envdb.append(env_db_entry)
        envdb.save()

        # setup the conda env from file
        logger.info("Creating conda env from file: {0}".format(env_file))
        kipoi_conda.create_env_from_file(env_file, use_stdout=args.verbose)
        env_db_entry.successful = True

        # env is environment name
        env_db_entry.cli_path = get_kipoi_bin(env)
        get_model_env_db().save()
        logger.info("Done!")
        print("\nActivate the environment via:")
        print("conda activate {0}".format(env))
    else:
        print("Dry run. Conda file path: {}".format(env_file))
示例#29
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi",
                        help='Which source to test')
    parser.add_argument('--git-range', nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n', '--dry_run', action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b', '--batch_size', default=4, type=int,
                        help='Batch size')
    parser.add_argument('-x', '--exitfirst', action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument('-k', default=None,
                        help='only run tests which match the given substring expression')
    parser.add_argument('-c', '--clean_env', action='store_true',
                        help='clean the environment after running.')
    parser.add_argument('--vep', action='store_true',
                        help='Install the vep dependency.')
    parser.add_argument('--all', action='store_true',
                        help="Test all models in the source")

    args = parser.parse_args(raw_args)
    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)
    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.
                    format('\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models,
                                              source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.
                    format(len(test_models), len(all_models),
                           '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)

    # Parse the repo config
    cfg_path = get_file_path(source.local_path, "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info("-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1,
                                            len(test_models),
                                            m))
        print('-' * 20)
        try:
            env_name = conda_env_name(m, source=args.source)
            env_name = "test-" + env_name  # prepend "test-"
            test_model(m, args.source, env_name,
                       get_batch_size(cfg, m, args.batch_size), args.vep)
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".
                     format(len(failed_models),
                            len(test_models),
                            "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))
示例#30
0
def get_model_list(source):
    """Cache for kipoi's list models"""
    df = kipoi.get_source(source).list_models()
    return df