def get_dataloader_factory(dataloader, source="kipoi"): # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader
def test_parse_correct_info(info_str, tmpdir): info_str = inp_targ + info_str # add the input: targets headers # loading works info = CLS.from_config(from_yaml(info_str)) info.path = str(tmpdir) outfiles = example_kwargs(info.args, str(tmpdir)) assert os.path.exists(outfiles['intervals_file']) assert isinstance(info.get_example_kwargs(), dict) assert isinstance(example_kwargs(info.args), dict) assert isinstance(info.args["intervals_file"].example, RemoteFile) assert isinstance(info.args["fasta_file"].example, str) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def test_parse_correct_info(info_str): info_str = inp_targ + info_str # add the input: targets headers # loading works info = CLS.from_config(from_yaml(info_str)) assert isinstance(example_kwargs(info.args), dict) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def example_kwargs(cls): if cls.args is None: raise ValueError("Class description `args` is missing. " "Use `_add_description_factory` to annotate the class") if cls.source_dir is None: logger.info("Using current directory for source_dir") cls.source_dir = os.getcwd() # Add init_example method. # example_kwargs also downloads files to {dataloader_dir}/dataloader_files return example_kwargs(cls.args, os.path.join(cls.source_dir, "downloaded/example_files"))
def download_example(cls, output_dir, absolute_path=False, dry_run=False): """Download the example files to the desired directory # Arguments output_dir: output directory where to store the file absolute_path: if True, return absolute paths to the output directories dry_run: if True, return only the file paths without actually downloading the files # Returns dictionary of keyword arguments for the dataloader """ return example_kwargs(cls.args, output_dir, absolute_path=absolute_path, dry_run=dry_run)
def get_dataloader_factory(dataloader, source="kipoi"): """Loads the dataloader # Arguments dataloader (str): dataloader name source (str): source name # Returns - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`) decorated with additional attributes. # Methods - __batch_iter(batch_size, num_workers, **kwargs)__ - Arguments - **batch_size**: batch size - **num_workers**: Number of workers to use in parallel. - ****kwargs**: Other kwargs specific to each dataloader - Yields - `dict` with `"inputs"`, `"targets"` and `"metadata"` - __batch_train_iter(cycle=True, **kwargs)__ - Arguments - **cycle**: if True, cycle indefinitely - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()` - __batch_predict_iter(**kwargs)__ - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - "inputs" field from the usual dict returned by `batch_iter()` - __load_all(**kwargs)__ - load the whole dataset into memory - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Returns - `dict` with `"inputs"`, `"targets"` and `"metadata"` - **init_example()** - instantiate the dataloader with example kwargs - **print_args()** - print information about the required arguments # Appended attributes - **type** (str): dataloader type (class name) - **defined_as** (str): path and dataloader name - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description - **info** (kipoi.specs.Info): general information about the dataloader - **schema** (kipoi.specs.DataloaderSchema): information about the input/output data modalities - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies. (implements `install` method for running the installation) - **name** (str): model name - **source** (str): model source - **source_dir** (str): local path to model source storage - **postprocessing** (dict): dictionary of loaded plugin specifications - **example_kwargs** (dict): kwargs for running the provided example """ # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader