Exemplo n.º 1
0
def write_prg(output_prefix: str, prg_string: str):
    """
    Writes the prg to outfile.
    Writes it as a human readable string, and also as an integer vector
    """
    prg_filename = Path(output_prefix + ".prg")
    with prg_filename.open("w") as prg:
        regex = re.compile(
            r"^(?P<sample>.+)\.max_nest(?P<max_nest>\d+)\.min_match(?P<min_match>\d+)"
        )
        match = regex.search(prg_filename.stem)
        try:
            sample = match.group("sample")
        except IndexError:
            logging.warning(
                "A sample name couldn't be parsed from the prefix. "
                "Using 'sample' as sample name."
            )
            sample = "sample"

        max_nest = int(match.group("max_nest"))
        min_match = int(match.group("min_match"))
        header = f"{sample} max_nest={max_nest} min_match={min_match}"
        print(f">{header}\n{prg_string}", file=prg)

    prg_ints_fpath = Path(output_prefix + ".bin")
    prg_encoder = PrgEncoder()
    prg_ints: PRG_Ints = prg_encoder.encode(prg_string)

    with prg_ints_fpath.open("wb") as ostream:
        prg_encoder.write(prg_ints, ostream)
Exemplo n.º 2
0
    def test_encode_prg_one_site_deletion(self):
        encoder = PrgEncoder()
        prg = " 5  6 C 5 "

        actual = encoder.encode(prg)
        expected = [5, 6, 2, 5]

        self.assertEqual(actual, expected)
Exemplo n.º 3
0
    def test_encode_prg_with_one_snp(self):
        encoder = PrgEncoder()
        prg = "5 A 6 C 5"

        actual = encoder.encode(prg)
        expected = [5, 1, 6, 2, 5]

        self.assertEqual(actual, expected)
Exemplo n.º 4
0
    def test_encode_prg_spacing_no_variants(self):
        encoder = PrgEncoder()
        prg = " a "

        actual = encoder.encode(prg)
        expected = [1]

        self.assertEqual(actual, expected)
Exemplo n.º 5
0
    def test_encode_prg_nonlinear_markers(self):
        encoder = PrgEncoder()
        prg = "55 GA 63 Ct 55"

        actual = encoder.encode(prg)
        expected = [55, 3, 1, 63, 2, 4, 55]

        self.assertEqual(actual, expected)
Exemplo n.º 6
0
    def test_encode_prg_multi_base_alleles(self):
        encoder = PrgEncoder()
        prg = "5 GA 6 CT 5"

        actual = encoder.encode(prg)
        expected = [5, 3, 1, 6, 2, 4, 5]

        self.assertEqual(actual, expected)
Exemplo n.º 7
0
    def test_encode_prg_nested_variation(self):
        encoder = PrgEncoder()
        prg = "5 A 7 C 8 T 8 A 7 6 CT 6 TA 5"

        actual = encoder.encode(prg)
        expected = [5, 1, 7, 2, 8, 4, 8, 1, 8, 6, 2, 4, 6, 4, 1, 6]

        self.assertEqual(actual, expected)
Exemplo n.º 8
0
def write_prg(prg_fname: Path, prg_string: str, options: ArgumentParser):
    """
    Writes th prg to `output_file`.
    Writes it as a human readable string, and also as an integer vector
    """
    seqid = options.seqid or options.prg_name

    if options.output_type.prg:
        with prg_fname.open("w") as prg:
            header = f">{seqid} max_nest={options.max_nesting} min_match={options.min_match_length}"
            print(f"{header}\n{prg_string}", file=prg)

    if options.output_type.binary:
        prg_ints_fpath = prg_fname.with_suffix(".bin")
        prg_encoder = PrgEncoder()
        prg_ints: PRG_Ints = prg_encoder.encode(prg_string)

        with prg_ints_fpath.open("wb") as ostream:
            prg_encoder.write(prg_ints, ostream)
Exemplo n.º 9
0
    def test_encode_empty_string_returns_empty(self):
        encoder = PrgEncoder()
        actual = encoder.encode("")

        self.assertEqual(actual, [])
Exemplo n.º 10
0
 def test_permutations_of_valid_input_passes(self, prg):
     encoder = PrgEncoder()
     encoder.encode(prg)
Exemplo n.º 11
0
 def encode_and_write_prg(self, prg_string: str):
     prg_encoder = PrgEncoder()
     prg_ints = prg_encoder.encode(prg_string)
     with open(self.out_fname, "wb") as fhandle_out:
         prg_encoder.write(prg_ints, fhandle_out)