def test_model_loading_on_examples(example): """Test extractor """ model_file = "example/models/{0}/dataloader.yaml".format(example) dl = DataLoaderDescription.load(model_file) # check all the fields exists dl.type == "Dataset" dl.defined_as dl.args arg_elem = six.next(six.itervalues(dl.args)) arg_elem.doc arg_elem.type arg_elem.optional dl.info dl.info.authors dl.info.name dl.info.version dl.info.tags dl.info.doc dl.output_schema dl.output_schema.inputs inp_elem = six.next(six.itervalues(dl.output_schema.inputs)) inp_elem.shape inp_elem.special_type inp_elem.associated_metadata dl.output_schema.targets dl.output_schema.metadata
def get_dataloader_factory(dataloader, source="kipoi"): # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader
def load_component_descr(component_dir, which="model"): """Return the parsed yaml file """ from kipoi.specs import ModelDescription, DataLoaderDescription fname = get_component_file(os.path.abspath(component_dir), which, raise_err=True) with cd(os.path.dirname(fname)): if which == "model": return ModelDescription.load(fname) elif which == "dataloader": return DataLoaderDescription.load(fname) else: raise ValueError("which needs to be from {'model', 'dataloader'}")
def get_dataloader_factory(dataloader): # pull the dataloader & get the dataloader directory yaml_path = './model/dataloader.yaml' dataloader_dir = './model/' # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError( "dataloader type: {0} is not in supported dataloaders:{1}".format( dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format( set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn( CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError( "DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) # Inherit the attributes from dl CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source_dir = dataloader_dir #CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader
def get_dataloader_factory(dataloader, source="kipoi"): """Loads the dataloader # Arguments dataloader (str): dataloader name source (str): source name # Returns - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`) decorated with additional attributes. # Methods - __batch_iter(batch_size, num_workers, **kwargs)__ - Arguments - **batch_size**: batch size - **num_workers**: Number of workers to use in parallel. - ****kwargs**: Other kwargs specific to each dataloader - Yields - `dict` with `"inputs"`, `"targets"` and `"metadata"` - __batch_train_iter(cycle=True, **kwargs)__ - Arguments - **cycle**: if True, cycle indefinitely - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()` - __batch_predict_iter(**kwargs)__ - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - "inputs" field from the usual dict returned by `batch_iter()` - __load_all(**kwargs)__ - load the whole dataset into memory - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Returns - `dict` with `"inputs"`, `"targets"` and `"metadata"` - **init_example()** - instantiate the dataloader with example kwargs - **print_args()** - print information about the required arguments # Appended attributes - **type** (str): dataloader type (class name) - **defined_as** (str): path and dataloader name - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description - **info** (kipoi.specs.Info): general information about the dataloader - **schema** (kipoi.specs.DataloaderSchema): information about the input/output data modalities - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies. (implements `install` method for running the installation) - **name** (str): model name - **source** (str): model source - **source_dir** (str): local path to model source storage - **postprocessing** (dict): dictionary of loaded plugin specifications - **example_kwargs** (dict): kwargs for running the provided example """ # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader