示例#1
0
def test_config_parser_env_interpolation_missing(
    conf_schema_basic, fixtures_dir
):
    GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "env_interpolation_conf.toml"),
        conf_schema_basic,
    )
示例#2
0
def default_dae_config(request, cleanup):
    studies_dirname = tempfile.mkdtemp(prefix="studies_", suffix="_test")

    def fin():
        shutil.rmtree(studies_dirname)

    if cleanup:
        request.addfinalizer(fin)
    dae_conf_path = os.path.join(os.environ.get("DAE_DB_DIR", None),
                                 "DAE.conf")
    dae_config = GPFConfigParser.parse_config(dae_conf_path)
    dae_config["studies_db"]["dir"] = studies_dirname
    remote_config = {
        "id": "TEST_REMOTE",
        "host": "gpfremote",
        "base_url": "api/v3",
        "port": 21010,
        "user": "******",
        "password": "******",
    }
    if "remotes" not in dae_config:
        dae_config["remotes"] = list()
        dae_config["remotes"].append(remote_config)
    else:
        dae_config["remotes"][0] = remote_config
    dae_config = GPFConfigParser.process_config(dae_config,
                                                dae_conf_schema,
                                                config_filename=dae_conf_path)
    return dae_config
示例#3
0
def test_pheno_regressions_from_conf_path(regressions_conf):
    regs = GPFConfigParser.load_config(regressions_conf,
                                       regression_conf_schema)
    expected_regs = {
        "reg1": {
            "instrument_name": "i1",
            "measure_name": "regressor1",
            "jitter": 0.1,
        },
        "reg2": {
            "instrument_name": "i1",
            "measure_name": "regressor2",
            "jitter": 0.2,
        },
        "reg3": {
            "instrument_name": "",
            "measure_name": "common_regressor",
            "jitter": 0.3,
        },
        "reg4": {
            "instrument_name": "i2",
            "measure_name": "regressor1",
            "jitter": 0.4,
        },
    }

    assert len(regs.regression) == len(expected_regs)
    for reg_name, expected_reg in expected_regs.items():
        assert regs.regression[reg_name] == expected_reg
示例#4
0
    def __init__(
            self,
            dae_config=None,
            config_file="DAE.conf",
            work_dir=None,
            defaults=None,
            load_eagerly=False):
        if dae_config is None:
            # FIXME Merge defaults with newly-loaded config
            assert not defaults, defaults
            if work_dir is None:
                work_dir = os.environ["DAE_DB_DIR"]
            config_file = os.path.join(work_dir, config_file)
            dae_config = GPFConfigParser.load_config(
                config_file, dae_conf_schema
            )

        self.dae_config = dae_config
        self.dae_db_dir = work_dir
        self.__autism_gene_profile_config = None
        self.load_eagerly = load_eagerly

        if load_eagerly:
            self.genomes_db
            self.gene_sets_db
            self._gene_info_config
            self._pheno_db
            self._variants_db
            self._gene_info_config
            self.denovo_gene_sets_db
            self._score_config
            self._scores_factory
            self.genotype_storage_db
            self._common_report_facade
            self._background_facade
示例#5
0
    def read_and_parse_file_configuration(cls, options, config_file):

        config = GPFConfigParser.load_config(
            config_file, annotation_conf_schema
        ).to_dict()

        config["options"] = options
        config["columns"] = {}
        config["native_columns"] = []
        config["virtual_columns"] = []
        config["output_columns"] = []

        config = cls._setup_defaults(DefaultBox(config))

        parsed_sections = list()
        for config_section in config.sections:
            if config_section.annotator is None:
                continue
            config_section_dict = recursive_dict_update(
                {"options": options}, config_section.to_dict()
            )
            parsed_sections.append(cls.parse_section(config_section_dict))

        config["sections"] = parsed_sections

        return FrozenBox(config)
示例#6
0
def main(argv=sys.argv[1:], gpf_instance=None):
    if gpf_instance is None:
        gpf_instance = GPFInstance()

    argv = parse_cli_arguments(argv, gpf_instance)

    genotype_storage_db = gpf_instance.genotype_storage_db
    genotype_storage = genotype_storage_db.get_genotype_storage(
        argv.genotype_storage
    )
    if not genotype_storage or (
            genotype_storage and not genotype_storage.is_impala()):
        print("missing or non-impala genotype storage")
        return

    assert os.path.exists(argv.variants)

    study_config = genotype_storage.impala_load_dataset(
        argv.study_id, argv.variants, argv.pedigree)

    if argv.study_config:
        input_config = GPFConfigParser.load_config_raw(argv.study_config)
        study_config = recursive_dict_update(study_config, input_config)

    study_config = StudyConfigBuilder(study_config).build_config()
    assert study_config is not None
    save_study_config(
        gpf_instance.dae_config, argv.study_id, study_config,
        force=argv.force)
示例#7
0
    def __init__(self, config, genomes_db):
        super(VariantAnnotatorBase, self).__init__(config, genomes_db)

        if self.config.options.vcf:
            self.variant_builder = VCFBuilder(self.config,
                                              self.genomic_sequence)
        else:
            self.variant_builder = DAEBuilder(self.config,
                                              self.genomic_sequence)

        if not self.config.virtual_columns:
            self.config = GPFConfigParser.modify_tuple(
                self.config,
                {
                    "virtual_columns": [
                        "CSHL_location",
                        "CSHL_chr",
                        "CSHL_position",
                        "CSHL_variant",
                        "VCF_chr",
                        "VCF_position",
                        "VCF_ref",
                        "VCF_alt",
                    ]
                },
            )
示例#8
0
def test_handle_regressions_default_jitter(mocker, fake_phenotype_data,
                                           output_dir,
                                           fake_phenotype_data_desc_conf):
    def fake_build_regression(*args):
        return {"pvalue_regression_male": 0, "pvalue_regression_female": 0}

    mocked = mocker.patch(
        "dae.pheno_browser.prepare_data."
        "PreparePhenoBrowserBase.build_regression",
        side_effect=fake_build_regression,
    )

    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.m1")
    for i in prep.handle_regressions(regressand):
        pass

    mocked.assert_called()
    measure, reg_measure, jitter = mocked.call_args_list[0][0]
    assert jitter == 0.12
    measure, reg_measure, jitter = mocked.call_args_list[1][0]
    assert jitter == 0.13
示例#9
0
    def _gene_info_config(self):
        logger.debug(
            f"loading gene info config file: "
            f"{self.dae_config.gene_info_db.conf_file}")

        return GPFConfigParser.load_config(
            self.dae_config.gene_info_db.conf_file, gene_info_conf
        )
示例#10
0
    def _autism_gene_profile_config(self):
        agp_config = self.dae_config.autism_gene_tool_config
        if agp_config is None or not os.path.exists(agp_config.conf_file):
            return None

        return GPFConfigParser.load_config(
            self.dae_config.autism_gene_tool_config.conf_file,
            autism_gene_tool_config
        )
示例#11
0
def test_config_parser_load_single(conf_schema_basic, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "basic_conf.toml"), conf_schema_basic
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Basic test config"
    assert config.section1.someval1 == "beep"
    assert config.section1.someval2 == 1.23
    assert config.section1.someval3 == 52345
示例#12
0
def test_handle_regressions_regressand_is_regressor(
        fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf):
    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.age")

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand))
示例#13
0
def test_config_parser_string_interpolation(conf_schema_strings, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "vars_conf.toml"), conf_schema_strings
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Vars test config"
    assert config.vars is None
    assert config.section1.someval1 == "asdf"
    assert config.section1.someval2 == "ghjkl"
    assert config.section1.someval3 == "qwertyasdfghjk"
示例#14
0
def test_config_parser_set_config(conf_schema_set, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "set_conf.toml"), conf_schema_set
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Set test config"
    assert config.section1.someval1 == "ala"
    assert isinstance(config.section1.someval2, set)
    assert (config.section1.someval2 ^ {"a", "b", "c", "d"}) == set()
    assert config.section1.someval3 == 123
示例#15
0
def test_config_parser_load_paths(conf_schema_path, fixtures_dir, mocker):
    patch = mocker.patch("os.path.exists")
    patch.return_value = True
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "path_conf.toml"), conf_schema_path
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Path test config"
    assert config.some_abs_path == "/tmp/maybesomeconf.toml"
    assert config.some_rel_path == os.path.join(
        fixtures_dir, "environ_conf.toml"
    )
示例#16
0
    def simple_study_import(
        self,
        study_id,
        families_loader=None,
        variant_loaders=None,
        study_config=None,
        **kwargs,
    ):

        families_config = self._import_families_file(study_id, families_loader)
        variants_config = self._import_variants_files(study_id,
                                                      variant_loaders)

        config_dict = {
            "id": study_id,
            "conf_dir": ".",
            "has_denovo": False,
            "has_cnv": False,
            "genotype_storage": {
                "id": self.id,
                "files": {
                    "variants": variants_config,
                    "pedigree": families_config,
                },
            },
            "genotype_browser": {
                "enabled": True
            },
        }
        if not variant_loaders:
            config_dict["genotype_browser"]["enabled"] = False
        else:
            variant_loaders[0].get_attribute("source_type")
            if any([
                    loader.get_attribute("source_type") == "denovo"
                    for loader in variant_loaders
            ]):
                config_dict["has_denovo"] = True
            if any([
                    loader.get_attribute("source_type") == "cnv"
                    for loader in variant_loaders
            ]):
                config_dict["has_denovo"] = True
                config_dict["has_cnv"] = True

        if study_config is not None:
            study_config_dict = GPFConfigParser.load_config_raw(study_config)
            config_dict = recursive_dict_update(config_dict, study_config_dict)

        config_builder = StudyConfigBuilder(config_dict)
        return config_builder.build_config()
示例#17
0
def test_handle_regressions_non_continuous_or_ordinal_measure(
        fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf):
    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand_categorical = fake_phenotype_data.get_measure("i1.m5")
    regressand_raw = fake_phenotype_data.get_measure("i1.m6")

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand_categorical))

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand_raw))
示例#18
0
    def __init__(self, dae_config):
        super(PhenoDb, self).__init__()
        assert dae_config

        configs = GPFConfigParser.load_directory_configs(
            dae_config.phenotype_data.dir, pheno_conf_schema
        )

        self.config = {
            config.phenotype_data.name: config.phenotype_data
            for config in configs
            if config.phenotype_data and config.phenotype_data.enabled
        }

        self.pheno_cache = {}
示例#19
0
def test_config_parser_env_interpolation(
    conf_schema_basic, fixtures_dir, mocker
):
    mocker.patch.dict(os.environ, {"test_env_var": "bop"})
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "env_interpolation_conf.toml"),
        conf_schema_basic,
    )

    print(config)
    assert config.id == "152135"
    assert config.name == "Environment interpolation test config"
    assert config.section1.someval1 == "bop"
    assert config.section1.someval2 == 1.23
    assert config.section1.someval3 == 52345
示例#20
0
def test_config_parser_load_directory(conf_schema_basic, fixtures_dir):
    configs = GPFConfigParser.load_directory_configs(
        os.path.join(fixtures_dir, "sample_conf_directory"), conf_schema_basic
    )
    print(configs)

    assert len(configs) == 4
    configs = sorted(configs, key=lambda x: x.id)
    assert configs[0].id == "1"
    assert configs[0].name == "conf1"
    assert configs[1].id == "2"
    assert configs[1].name == "conf2"
    assert configs[2].id == "3"
    assert configs[2].name == "conf3"
    assert configs[3].id == "4"
    assert configs[3].name == "conf4"
示例#21
0
    def __init__(self, dae_dir, conf_file=None):
        self.dae_dir = dae_dir
        if not conf_file:
            conf_file = f"{dae_dir}/genomesDB.conf"

        self.config = GPFConfigParser.load_config(conf_file, genomes_db_conf)

        self._genomes = {}

        for section_id, genome_config in self.config.genome.items():
            genome = Genome.load_config(genome_config, section_id)
            assert genome is not None
            self._genomes[genome.genome_id] = genome

        assert self.config.genomes.default_genome in self._genomes
        self.default_genome = self._genomes[self.config.genomes.default_genome]
示例#22
0
def test_has_regression_measure(fake_phenotype_data, output_dir,
                                regressions_conf):
    reg = GPFConfigParser.load_config(regressions_conf, regression_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)

    expected_reg_measures = [
        ("regressor1", "i1"),
        ("regressor2", "i1"),
        ("common_regressor", ""),
        ("common_regressor", "i1"),
        ("common_regressor", "i2"),
        ("regressor1", "i2"),
    ]

    for e in expected_reg_measures:
        assert prep._has_regression_measure(*e)
示例#23
0
    def _load_group_configs(self):
        default_config_filename = None
        if self.dae_config.default_study_config and \
                self.dae_config.default_study_config.conf_file:
            default_config_filename = \
                self.dae_config.default_study_config.conf_file

        group_configs = GPFConfigParser.load_directory_configs(
            self.dae_config.datasets_db.dir,
            study_config_schema,
            default_config_filename=default_config_filename,
        )

        genotype_group_configs = {}
        for group_config in group_configs:
            assert group_config.id is not None, group_config
            if group_config.enabled is False:
                continue
            genotype_group_configs[group_config.id] = \
                group_config
        return genotype_group_configs
示例#24
0
    def _load_study_configs(self):
        default_config_filename = None
        if self.dae_config.default_study_config and \
                self.dae_config.default_study_config.conf_file:
            default_config_filename = \
                self.dae_config.default_study_config.conf_file

        study_configs = GPFConfigParser.load_directory_configs(
            self.dae_config.studies_db.dir,
            study_config_schema,
            default_config_filename=default_config_filename,
        )

        genotype_study_configs = {}
        for study_config in study_configs:
            assert study_config.id is not None, study_config
            if study_config.enabled is False:
                continue
            genotype_study_configs[study_config.id] = \
                study_config
        return genotype_study_configs
示例#25
0
    def __init__(self, score_filename, config_filename=None):
        self.score_filename = score_filename
        assert os.path.exists(self.score_filename), self.score_filename

        if config_filename is None:
            config_filename = "{}.conf".format(self.score_filename)

        self.config = GPFConfigParser.load_config(config_filename,
                                                  score_file_conf_schema)

        assert self.config.general.header is not None
        assert self.config.columns.score is not None
        self.header = self.config.general.header
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"header {self.header}")
        self.score_names = self.config.columns.score

        self.schema = Schema.from_dict(self.config.score_schema).order_as(
            self.header)
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"schema {self.schema.col_names}")

        assert all([sn in self.schema for sn in self.score_names]), [
            self.score_filename,
            self.score_names,
            self.schema.col_names,
        ]

        self.chr_index = self.schema.col_names.index(self.chr_name)
        self.pos_begin_index = self.schema.col_names.index(self.pos_begin_name)
        self.pos_end_index = self.schema.col_names.index(self.pos_end_name)

        self.chr_prefix = getattr(self.config.general, "chr_prefix", False)

        self.no_score_value = self.config.general.no_score_value or "na"
        if self.no_score_value.lower() in ("na", "none"):
            self.no_score_value = None

        self._init_access()
示例#26
0
    def _build_annotator_for(self, score_name):
        assert os.path.exists(
            self.config.options.scores_directory
        ), self.config.options.scores_directory

        score_filename = self._get_score_file(score_name)

        options = GPFConfigParser.modify_tuple(
            self.config.options, {"scores_file": score_filename}
        )
        columns = {score_name: getattr(self.config.columns, score_name)}

        variant_config = AnnotationConfigParser.parse_section({
                "options": options,
                "columns": columns,
                "annotator": "score_annotator.VariantScoreAnnotator",
                "virtual_columns": [],
            }
        )

        annotator = PositionScoreAnnotator(variant_config, self.genomes_db)
        return annotator
示例#27
0
def test_handle_regressions(mocker, fake_phenotype_data, output_dir,
                            fake_phenotype_data_desc_conf):
    def fake_build_regression(dependent_measure, independent_measure, jitter):
        return {
            "regressand": dependent_measure,
            "regressor": independent_measure,
            "jitter": jitter,
            "pvalue_regression_male": 0,
            "pvalue_regression_female": 0,
        }

    mocked = mocker.patch(
        "dae.pheno_browser.prepare_data."
        "PreparePhenoBrowserBase.build_regression",
        side_effect=fake_build_regression,
    )

    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.m1")

    res = [r for r in prep.handle_regressions(regressand) if r is not None]
    assert len(res) == 2
    assert sorted([r["regression_id"] for r in res]) == sorted(["age", "nviq"])

    mocked.assert_called()
    measure, reg_measure, jitter = mocked.call_args_list[0][0]
    assert measure.measure_id == "i1.m1"
    assert reg_measure.measure_id == "i1.age"
    assert jitter == 0.12
    measure, reg_measure, jitter = mocked.call_args_list[1][0]
    assert measure.measure_id == "i1.m1"
    assert reg_measure.measure_id == "i1.iq"
    assert jitter == 0.13
示例#28
0
    def simple_study_import(self,
                            study_id,
                            families_loader=None,
                            variant_loaders=None,
                            study_config=None,
                            output=".",
                            include_reference=False):

        variants_dir = None
        has_denovo = False
        has_cnv = False
        bucket_index = 0

        if variant_loaders:
            for index, variant_loader in enumerate(variant_loaders):
                assert isinstance(variant_loader, VariantsLoader), \
                    type(variant_loader)

                if variant_loader.get_attribute("source_type") == "denovo":
                    has_denovo = True

                if variant_loader.get_attribute("source_type") == "cnv":
                    has_denovo = True
                    has_cnv = True

                if variant_loader.transmission_type == \
                        TransmissionType.denovo:
                    assert index < 100

                    bucket_index = index  # denovo buckets < 100
                elif variant_loader.transmission_type == \
                        TransmissionType.transmitted:
                    bucket_index = index + 100  # transmitted buckets >=100

                variants_dir = os.path.join(output, "variants")
                partition_description = NoPartitionDescriptor(variants_dir)

                ParquetManager.variants_to_parquet(
                    variant_loader,
                    partition_description,
                    # parquet_filenames.variants,
                    bucket_index=bucket_index,
                    include_reference=include_reference)

        pedigree_filename = os.path.join(output, "pedigree",
                                         "pedigree.parquet")
        families = families_loader.load()
        ParquetManager.families_to_parquet(families, pedigree_filename)

        config_dict = self.impala_load_dataset(study_id,
                                               variants_dir=variants_dir,
                                               pedigree_file=pedigree_filename)

        config_dict["has_denovo"] = has_denovo
        config_dict["has_cnv"] = has_cnv

        if study_config is not None:
            study_config_dict = GPFConfigParser.load_config_raw(study_config)
            config_dict = recursive_dict_update(config_dict, study_config_dict)

        config_builder = StudyConfigBuilder(config_dict)

        return config_builder.build_config()
示例#29
0
def get_person_set_collections_config(config_path):
    return GPFConfigParser.load_config(
        config_path, {"person_set_collections": person_set_collections_schema},
    ).person_set_collections
示例#30
0
def main(argv):

    try:
        # Setup argument parser

        gpf_instance = GPFInstance()
        dae_conf = gpf_instance.dae_config

        parser = pheno_cli_parser()
        args = parser.parse_args(argv)
        if args.instruments is None:
            print("missing instruments directory parameter", sys.stderr)
            raise ValueError()
        if args.pedigree is None:
            print("missing pedigree filename", sys.stderr)
            raise ValueError()
        if args.pheno_name is None:
            print("missing pheno db name", sys.stderr)
            raise ValueError()

        args.pheno_name = verify_phenotype_data_name(args.pheno_name)

        pheno_db_dir = os.path.join(dae_conf.phenotype_data.dir,
                                    args.pheno_name)
        if not os.path.exists(pheno_db_dir):
            os.makedirs(pheno_db_dir)

        args.pheno_db_filename = os.path.join(pheno_db_dir,
                                              "{}.db".format(args.pheno_name))
        if os.path.exists(args.pheno_db_filename):
            if not args.force:
                print("pheno db filename already exists:",
                      args.pheno_db_filename)
                raise ValueError()
            else:
                os.remove(args.pheno_db_filename)

        args.browser_dir = os.path.join(pheno_db_dir, "browser")
        if not os.path.exists(args.browser_dir):
            os.makedirs(args.browser_dir)

        config = parse_phenotype_data_config(args)
        if args.regression:
            regressions = GPFConfigParser.load_config(args.regression,
                                                      regression_conf_schema)
        else:
            regressions = None

        prep = PrepareVariables(config)
        prep.build_pedigree(args.pedigree)
        prep.build_variables(args.instruments, args.data_dictionary)

        build_pheno_browser(
            args.pheno_db_filename,
            args.pheno_name,
            args.browser_dir,
            regressions,
        )

        pheno_conf_path = os.path.join(pheno_db_dir,
                                       "{}.conf".format(args.pheno_name))

        with open(pheno_conf_path, "w") as pheno_conf_file:
            pheno_conf_file.write(
                toml.dumps(generate_phenotype_data_config(args, regressions)))

        return 0
    except KeyboardInterrupt:
        return 0
    except Exception as e:
        traceback.print_exc()

        program_name = "simple_pheno_import.py"
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2