示例#1
0
    def prepare_reference(reference_params: dict, location: str, paired: bool):
        ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location,
                                       "reference")

        seq_import_params = reference_params["params"] if "params" in reference_params else {}

        assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \
                                                  f"Specify the correct path under reference."

        if "is_repertoire" in seq_import_params:
            assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport"
        else:
            seq_import_params["is_repertoire"] = False

        if "paired" in seq_import_params:
            assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport"
        else:
            seq_import_params["paired"] = paired

        format_str = reference_params["format"]

        import_class = ReflectionHandler.get_class_by_name("{}Import".format(format_str))
        default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets",
                                          DefaultParamsLoader.convert_to_snake_case(format_str))

        params = {**default_params, **seq_import_params}

        processed_params = DatasetImportParams.build_object(**params)

        receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params)

        return receptors
    def import_dataset(params, dataset_name: str) -> ReceptorDataset:
        generic_params = DatasetImportParams.build_object(**params)

        filenames = ImportHelper.get_sequence_filenames(generic_params.path, dataset_name)

        PathBuilder.build(generic_params.result_path, warn_if_exists=True)

        dataset = SingleLineReceptorImport._import_from_files(filenames, generic_params)
        dataset.name = dataset_name
        dataset.labels = ImportHelper.extract_sequence_dataset_params(params=generic_params)

        PickleExporter.export(dataset, generic_params.result_path)

        return dataset
示例#3
0
    def import_dataset(import_class, params: dict, dataset_name: str) -> Dataset:
        processed_params = DatasetImportParams.build_object(**params)

        dataset = ImportHelper.load_dataset_if_exists(params, processed_params, dataset_name)
        if dataset is None:
            # backwards compatibility: if is_repertoire is not specified but the metadata file is
            if processed_params.is_repertoire is None and processed_params.metadata_file is not None:
                processed_params.is_repertoire = True

            if processed_params.is_repertoire:
                dataset = ImportHelper.import_repertoire_dataset(import_class, processed_params, dataset_name)
            else:
                dataset = ImportHelper.import_sequence_dataset(import_class, processed_params, dataset_name)

        return dataset
示例#4
0
    def import_dataset(params: dict, dataset_name: str) -> Dataset:
        pickle_params = DatasetImportParams.build_object(**params)

        if pickle_params.path is not None:
            dataset = PickleImport._import_from_path(pickle_params)
        elif pickle_params.metadata_file is not None:
            dataset = PickleImport._import_from_metadata(pickle_params, dataset_name)
        else:
            raise ValueError(f"PickleImport: no path nor metadata file were defined under key {dataset_name}. At least one of these has "
                             f"to be specified to import the dataset.")

        if isinstance(dataset, RepertoireDataset):
            dataset = PickleImport._update_repertoire_paths(pickle_params, dataset)
        else:
            dataset = PickleImport._update_receptor_paths(pickle_params, dataset)

        return dataset