def test_Dependencies(): dep = Dependencies(conda=["conda_pkg1", "conda_pkg2"], pip=["pip_pkg1>=1.1", "pip_pkg2"]) res = dep.to_env_dict("asd") assert res["name"] == "asd" assert res["channels"] == ["defaults"] assert res["dependencies"][0] == "conda_pkg1" assert res["dependencies"][1] == "conda_pkg2" assert res["dependencies"][2]["pip"][1] == "pip_pkg2"
def test_Dependencies_merge(): dep1 = Dependencies(conda=["conda_pkg1", "conda_pkg2"], pip=["pip_pkg1>=1.1", "pip_pkg2"]) dep2 = Dependencies(conda=["conda_pkg1", "conda_pkg3>=1.1"], pip=["pip_pkg1>=1.0", "pip_pkg2==3.3"]) dep_merged = dep1.merge(dep2) assert dep_merged.conda == ['conda_pkg1', 'conda_pkg2', 'conda_pkg3>=1.1'] assert dep_merged.pip == ['pip_pkg1>=1.1,>=1.0', 'pip_pkg2==3.3'] assert dep_merged.conda_channels == ["defaults"]
def test_gpu(): # tensorflow deps = Dependencies(pip=["tensorflow==1.4"]) assert deps.gpu().pip == ["tensorflow-gpu==1.4"] # pytorch deps = Dependencies(conda=["pytorch::pytorch-cpu"]) assert deps.gpu().conda == ["pytorch"] # nothing changed deps = Dependencies(pip=["foo"], conda=["bar"]) assert deps.gpu() == deps.normalized()
def test_deps(dependency, Model): contains = [Dependencies(pip=["bar", dependency]), Dependencies(conda=[dependency, "foo"]), Dependencies(conda=["asd::" + dependency])] doesnt_contain = [Dependencies(pip=["bar"]), Dependencies(conda=["bar"])] for deps in contains: assert Model._sufficient_deps(deps) if dependency != "no-dep": for deps in doesnt_contain: assert not Model._sufficient_deps(deps)
def test_other_channels(): dep1 = Dependencies(conda=["other::conda_pkg2", "conda_pkg1"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["other", "defaults"] dep1 = Dependencies(conda=["conda_pkg1", "other::conda_pkg2"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["defaults", "other"]
def test_dependencies_all_installed(): assert Dependencies(conda=["numpy"], pip=["kipoi"]).all_installed() assert Dependencies(conda=["numpy"], pip=["kipoi>=0.1"]).all_installed() assert Dependencies(conda=["numpy>0.1"], pip=["kipoi>=0.1"]).all_installed() assert not Dependencies(conda=["numpy>0.1"], pip=["kipoi>=10.1"]).all_installed() assert not Dependencies(conda=["numpy>0.1"], pip=["kipoi>=10.1"]).all_installed(verbose=True) assert not Dependencies(conda=["package_doesnt_exist>0.1"], pip=["kipoi>=10.1"]).all_installed(verbose=True)
def test_handle_pysam(): dep1 = Dependencies(conda=["conda_pkg1", "bioconda::pysam"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["bioconda", "conda-forge", "defaults"] dep1 = Dependencies(conda=["conda_pkg1", "bioconda::pybedtools"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["defaults", "bioconda", "conda-forge"]
env_name += "-DL-{0}".format(",".join(dataloader_name)) # limit the env name to 110 characters if len(env_name) > 110: logger.info( "Environment name exceeds 110 characters. Limiting it to 110 characters" ) env_name = env_name[:110] return env_name # Website compatibility conda_env_name = get_env_name # constant dependencies KIPOI_DEPS = Dependencies(pip=["kipoi"]) # TODO - update once kipoi_veff will be on bioconda VEP_DEPS = Dependencies(conda=[ "bioconda::pyvcf", "bioconda::cyvcf2", "bioconda::pybedtools", "bioconda::pysam" ], pip=["kipoi_veff"]) INTERPRET_DEPS = Dependencies(pip=["kipoi_interpret"]) # Hard-code kipoi-seq dataloaders KIPOISEQ_DEPS = Dependencies( conda=['bioconda::pybedtools', 'bioconda::pyfaidx', 'numpy', 'pandas'], pip=['kipoiseq']) def split_models_special_envs(models):
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warn( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
import pytest import numpy as np from kipoi.data import Dataset from kipoi.specs import DataLoaderArgument, DataLoaderSchema, DataLoaderDescription from kipoi.specs import Author, Dependencies from kipoi_utils.utils import inherits_from from collections import OrderedDict import related from kipoi.data import kipoi_dataloader deps = Dependencies(pip='kipoiseq') package_authors = [Author(name='John')] @kipoi_dataloader(override={ "dependencies": deps, 'info.authors': package_authors }) class Dl(Dataset): """ info: doc: short doc args: arg1: doc: this is arg1 example: hey n: doc: length of the dataset output_schema: inputs: name: seq
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: dataloader = os.path.normpath( os.path.join(sub_model, model_descr.default_dataloader)) logger.info( "Inferred dataloader name: {0} from".format(dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
env_name += "-DL-{0}".format(",".join(dataloader_name)) # limit the env name to 110 characters if len(env_name) > 110: logger.info( "Environment name exceeds 110 characters. Limiting it to 110 characters" ) env_name = env_name[:110] return env_name # Website compatibility conda_env_name = get_env_name # constant dependencies KIPOI_DEPS = Dependencies(pip=["kipoi"]) INTERPRET_DEPS = Dependencies(pip=["kipoi_interpret"]) # Hard-code kipoi-seq dataloaders KIPOISEQ_DEPS = Dependencies( conda=['bioconda::pybedtools', 'bioconda::pyfaidx', 'numpy', 'pandas'], pip=['kipoiseq']) def split_models_special_envs(models): special_envs = [] # handcrafted environments only_models = [] # actual models excluding handcrafted environments for model in models: if SPECIAL_ENV_PREFIX in model: special_envs.append(model) else:
from kipoi.data import Dataset, kipoi_dataloader from kipoi.metadata import GenomicRanges from kipoi.specs import Author, Dependencies from kipoi.data import SampleIterator import gffutils from pyfaidx import Fasta import pickle # general dependencies # bioconda::genomelake', TODO - add genomelake again once it gets released with pyfaidx to bioconda deps = Dependencies(conda=['bioconda::pyfaidx', 'numpy', 'pandas'], pip=['kipoiseq', 'kipoi']) package_authors = [Author(name='Jun Cheng', github='s6juncheng')] __all__ = ['ExonInterval', 'generate_exons', 'MMSpliceDl'] # python 2.7 compatibility try: FileNotFoundError except NameError: FileNotFoundError = IOError try: ModuleNotFoundError except NameError: ModuleNotFoundError = ImportError # ------------
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, interpret=False, gpu=False): """Setup the dependencies """ special_envs, only_models = split_models_special_envs(models) deps = Dependencies() # Treat the handcrafted environments differently for special_env in special_envs: from related import from_yaml logger.info("Loading environment definition: {0}".format(special_env)) # Load and merge the handcrafted deps. yaml_path = os.path.join( kipoi.get_source(source).local_path, special_env + ".yaml") if not os.path.exists(yaml_path): raise ValueError( "Environment definition file {0} not found in source {1}". format(yaml_path, source)) with open(yaml_path, "r", encoding="utf-8") as fh: special_env_deps = Dependencies.from_env_dict(from_yaml(fh)) deps = deps.merge(special_env_deps) for model in only_models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warning( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if interpret: # add vep dependencies logger.info("Adding the interpret dependencies") deps = INTERPRET_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def test_bioconda_channels(): dep1 = Dependencies(conda=["conda_pkg1", "bioconda::conda_pkg2"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["defaults", "bioconda", "conda-forge"] dep1 = Dependencies(conda=["bioconda::conda_pkg2", "conda_pkg1"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["bioconda", "conda-forge", "defaults"] dep1 = Dependencies(conda=["bioconda::conda_pkg2"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["bioconda", "conda-forge", "defaults"] dep1 = Dependencies( conda=["conda-forge::conda_pkg2", "bioconda::conda_pkg2"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["conda-forge", "bioconda", "defaults"] dep1 = Dependencies( conda=["asd::conda_pkg2", "bioconda::conda_pkg2", "dsa::conda_pkg2"], pip=[]) channels, packages = dep1._get_channels_packages() assert channels == ["asd", "bioconda", "conda-forge", "dsa", "defaults"]
def test_decorator_env_loading(tmpdir): mdir = cp_tmpdir("example/models/kipoi_dataloader_decorator", tmpdir) assert merge_deps([mdir], source='dir') == \ Dependencies(conda=['python=2.7', 'scikit-learn'], pip=['kipoi', 'scikit-learn', 'tqdm'], conda_channels=['defaults'])
if len(dataloader_name) != 0 and dataloader_name != model_name: env_name += "-DL-{0}".format(",".join(dataloader_name)) # limit the env name to 110 characters if len(env_name) > 110: logger.info( "Environment name exceeds 110 characters. Limiting it to 110 characters" ) env_name = env_name[:110] return env_name # Website compatibility conda_env_name = get_env_name KIPOI_DEPS = Dependencies(pip=["kipoi"]) # TODO - update once kipoi_veff will be on bioconda VEP_DEPS = Dependencies(conda=[ "bioconda::pyvcf", "bioconda::cyvcf2", "bioconda::pybedtools", "bioconda::pysam" ], pip=["kipoi_veff"]) def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model))