Exemplo n.º 1
0
    def _setup(self):
        self.infile = pysam.TabixFile(self.filename)
        contig_name = self.infile.contigs[-1]
        self._has_chrom_prefix = contig_name.startswith("chr")

        self._region_reset(self.region)
        self.schema = Schema.from_dict({"str": self._header_read()})
Exemplo n.º 2
0
    def _setup(self):

        assert os.path.exists(self.filename)
        assert self.is_gzip(self.filename)
        self.infile = gzip.open(self.filename, "rt")

        if self.options.vcf:
            self._skip_metalines()

        self.schema = Schema.from_dict({"str": self._header_read()})
Exemplo n.º 3
0
    def _setup(self):
        if self.filename == "-":
            self.infile = sys.stdin
        else:
            assert os.path.exists(self.filename), self.filename
            assert not self.is_gzip(self.filename)
            self.infile = open(self.filename, "r")

        if self.options.vcf:
            self._skip_metalines()

        self.schema = Schema.from_dict({"str": self._header_read()})
Exemplo n.º 4
0
def test_schema_from_config(sample_schema_dict):
    expected_columns = {
        "chr": "str",
        "position": "str",
        "variant": "str",
        "dummy_score": "float",
    }
    schema = Schema.from_dict(sample_schema_dict)

    for col, type_ in expected_columns.items():
        assert col in schema.columns
        assert type_ == schema.columns[col].type_name
Exemplo n.º 5
0
def test_merge_schemas(generic_schema, generic_schema_alt):
    schema = Schema.merge_schemas(generic_schema, generic_schema_alt)
    expected_cols = [
        "col1",
        "col2",
        "col3",
        "col4",
        "col5",
        "col6",
        "col7",
        "col8",
        "col11",
        "col12",
    ]
    assert list(schema.columns.keys()) == expected_cols
Exemplo n.º 6
0
    def __init__(self, score_filename, config_filename=None):
        self.score_filename = score_filename
        assert os.path.exists(self.score_filename), self.score_filename

        if config_filename is None:
            config_filename = "{}.conf".format(self.score_filename)

        self.config = GPFConfigParser.load_config(config_filename,
                                                  score_file_conf_schema)

        assert self.config.general.header is not None
        assert self.config.columns.score is not None
        self.header = self.config.general.header
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"header {self.header}")
        self.score_names = self.config.columns.score

        self.schema = Schema.from_dict(self.config.score_schema).order_as(
            self.header)
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"schema {self.schema.col_names}")

        assert all([sn in self.schema for sn in self.score_names]), [
            self.score_filename,
            self.score_names,
            self.schema.col_names,
        ]

        self.chr_index = self.schema.col_names.index(self.chr_name)
        self.pos_begin_index = self.schema.col_names.index(self.pos_begin_name)
        self.pos_end_index = self.schema.col_names.index(self.pos_end_name)

        self.chr_prefix = getattr(self.config.general, "chr_prefix", False)

        self.no_score_value = self.config.general.no_score_value or "na"
        if self.no_score_value.lower() in ("na", "none"):
            self.no_score_value = None

        self._init_access()
Exemplo n.º 7
0
 def merge_schemas(cls, left, right):
     return cls.convert(Schema.merge_schemas(left, right))
Exemplo n.º 8
0
 def from_dict(cls, schema_dict):
     return cls.convert(Schema.from_dict(schema_dict))
Exemplo n.º 9
0
def generic_schema_alt():
    return Schema.from_dict({
        "str": ["col1", "col7", "col8"],
        "float": ["col11", "col12", "col6"]
    })
Exemplo n.º 10
0
def generic_schema():
    return Schema.from_dict({
        "str": ["col1", "col2", "col3"],
        "float": ["col4", "col5", "col6"]
    })