def test_single_X(self): p = Seq("GYTXTRS") r = x_to_ggsg(p) assert r == Seq("GYTGTRS") p = Seq("XGYTTRS") r = x_to_ggsg(p) assert r == Seq("GGYTTRS") p = Seq("GYTTRSX") r = x_to_ggsg(p) assert r == Seq("GYTTRSG")
def x2ggsg(input, output): """Replace stretches of Xs with Serine-Glycine linker (in a GGSG pattern) INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT. """ for (name, seq, qual) in readfq(input): replacement = x_to_ggsg(seq) if replacement != seq: output_title = f"{name}|withGSlinker" else: output_title = name print(f">{output_title}\n{replacement}", file=output)
def test_many_Xs(self): p = Seq("GYTXXXXXXXXXTRS") r = x_to_ggsg(p) assert r == Seq("GYTGGSGGGSGGTRS")
def test_many_single_Xs(self): p = Seq("GXYTXTXRXS") r = x_to_ggsg(p) assert r == Seq("GGYTGTGRGS")
def test_double_X(self): p = Seq("GYTXXTRS") r = x_to_ggsg(p) assert r == Seq("GYTGGTRS")
def test_multiple_stretches(self): p = Seq("XGYTXXXTRXXS") r = x_to_ggsg(p) assert r == Seq("GGYTGGSTRGGS")
def test_Xs_infix(self): p = Seq("GYTXXXXXTRS") r = x_to_ggsg(p) assert r == Seq("GYTGGSGGTRS")
def test_Xs_suffix(self): p = Seq("GYTTRSXXXX") r = x_to_ggsg(p) assert r == Seq("GYTTRSGGSG")
def test_Xs_prefix(self): p = Seq("XXXGYTTRS") r = x_to_ggsg(p) assert r == Seq("GGSGYTTRS")
def test_no_Xs(self): p = Seq("GYTTRS") r = x_to_ggsg(p) assert p == r
def test_null_seq(self): p = Seq("") r = x_to_ggsg(p) assert p == r