示例#1
0
def generate_motif_features(feature: CDSFeature,
                            motifs: List[HMMResult]) -> List[CDSMotif]:
    """ Convert a list of HMMResult to a list of CDSMotif features """
    # use a locus tag if one exists
    locus_tag = feature.get_name()
    if feature.locus_tag:
        locus_tag = feature.locus_tag

    motif_features = []
    for i, motif in enumerate(motifs):
        i += 1  # user facing, so 1-indexed
        loc = feature.get_sub_location_from_protein_coordinates(
            motif.query_start, motif.query_end)
        prot_loc = FeatureLocation(motif.query_start, motif.query_end)
        new_motif = CDSMotif(loc,
                             feature.get_name(),
                             prot_loc,
                             tool="nrps_pks_domains")
        new_motif.label = motif.hit_id
        new_motif.domain_id = 'nrpspksmotif_{}_{:04d}'.format(locus_tag, i)
        new_motif.evalue = motif.evalue
        new_motif.score = motif.bitscore
        new_motif.detection = "hmmscan"
        new_motif.database = "abmotifs"
        new_motif.locus_tag = locus_tag

        new_motif.translation = feature.translation[motif.query_start:motif.
                                                    query_end]

        motif_features.append(new_motif)
    return motif_features
def generate_domain_features(gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]:
    """ Generates AntismashDomain features for each provided HMMResult

        Arguments:
            gene: the CDSFeature the domains were found in
            domains: a list of HMMResults found in the CDSFeature

        Returns:
            a dictionary mapping the HMMResult used to the matching AntismashDomain
    """
    new_features = {}
    domain_counts = defaultdict(int)  # type: Dict[str, int]
    for domain in domains:
        loc = gene.get_sub_location_from_protein_coordinates(domain.query_start, domain.query_end)

        # set up new feature
        new_feature = AntismashDomain(loc, tool="nrps_pks_domains")
        new_feature.domain = domain.hit_id
        new_feature.locus_tag = gene.locus_tag or gene.get_name()
        new_feature.detection = "hmmscan"
        new_feature.database = "nrpspksdomains.hmm"
        new_feature.evalue = domain.evalue
        new_feature.score = domain.bitscore

        new_feature.translation = gene.translation[domain.query_start:domain.query_end + 1]

        domain_counts[domain.hit_id] += 1  # 1-indexed, so increment before use
        domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id])

        new_feature.domain_id = "nrpspksdomains_" + domain_name
        new_feature.label = domain_name

        new_features[domain] = new_feature
    return new_features
示例#3
0
 def test_simple_location_forward_complete(self):
     cds = CDSFeature(FeatureLocation(0, 15, 1),
                      locus_tag="simple",
                      translation="A")
     new = cds.get_sub_location_from_protein_coordinates(0, 5)
     extracted = new.extract(self.magic)
     assert extracted == self.magic
     assert extracted.translate() == self.translation
示例#4
0
 def test_invalid_qualifier(self):
     cds = CDSFeature(FeatureLocation(1, 5, 1),
                      locus_tag="test",
                      translation="A")
     for bad in ["bad", ["stuff"], {}, 1]:
         with self.assertRaisesRegex(
                 TypeError,
                 "can only be set to an instance of SecMetQualifier"):
             cds.sec_met = bad
示例#5
0
 def test_simple_location_forward_partial(self):
     cds = CDSFeature(FeatureLocation(0, 15, 1),
                      locus_tag="simple",
                      translation="A")
     for start, end in [(1, 5), (0, 3), (2, 3), (1, 4)]:
         print("testing", start, end)
         new = cds.get_sub_location_from_protein_coordinates(start, end)
         print(new)
         extracted = new.extract(self.magic)
         assert extracted == self.magic[start * 3:end * 3]
         assert extracted.translate() == self.translation[start:end]
示例#6
0
 def test_translation_outside_record(self):
     rec = DummyRecord(seq="A" * 10)
     for location in [
             FeatureLocation(0, AfterPosition(6), strand=1),
             FeatureLocation(BeforePosition(4), 10, strand=-1)
     ]:
         bio = SeqFeature(location, type="CDS")
         bio.qualifiers["translation"] = ["M" * 5]
         with self.assertRaisesRegex(SecmetInvalidInputError,
                                     "translation extends out of record"):
             CDSFeature.from_biopython(bio, record=rec)
示例#7
0
 def test_compound_location_reverse_full(self):
     self.reverse_strand()
     cds = CDSFeature(self.location, locus_tag="compound", translation="A")
     new = cds.get_sub_location_from_protein_coordinates(0, 5)
     assert isinstance(new, CompoundLocation)
     assert len(new.parts) == 3
     print(list(map(str, cds.location.parts)))
     print(list(map(str, new.parts)))
     assert len(new) == len(cds.location)
     assert new.extract(
         self.magic_split).translate() == self.translation[0:5]
示例#8
0
 def reverse_strand(self):
     self.magic = self.magic.reverse_complement()
     self.magic_split = self.magic_split.reverse_complement()
     self.sub_locations = [
         FeatureLocation(loc.start, loc.end, strand=loc.strand * -1)
         for loc in self.sub_locations
     ]
     self.location = CompoundLocation(
         self.sub_locations[::self.sub_locations[0].strand])
     self.cds = CDSFeature(self.location,
                           locus_tag="compound",
                           translation="A")
示例#9
0
 def setUp(self):
     self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT")
     self.magic = Seq("ATGGCAGGTATTTGT")
     self.translation = "MAGIC"
     self.sub_locations = [
         FeatureLocation(0, 6, strand=1),
         FeatureLocation(12, 15, strand=1),
         FeatureLocation(21, 27, strand=1)
     ]
     self.location = CompoundLocation(self.sub_locations)
     self.cds = CDSFeature(self.location,
                           locus_tag="compound",
                           translation="A")
示例#10
0
 def test_required_identifiers(self):
     with self.assertRaisesRegex(
             ValueError,
             "requires at least one of: gene, protein_id, locus_tag"):
         CDSFeature(FeatureLocation(1, 5, 1), translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       locus_tag="foo",
                       translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       protein_id="foo",
                       translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       gene="foo",
                       translation="A")
    def setUp(self):
        self.config = build_config(["--cf-create-clusters",
                                    "--cf-mean-threshold", "0.6",
                                    "--cf-min-cds", "5",
                                    "--cf-min-pfams", "5"], modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1,
                              protein_end=end-1, identifier=pfam_id, tool="test")
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
示例#12
0
    def test_without_genefunctions(self):
        bio = self.convert()
        assert "gene_functions" not in bio.qualifiers
        assert "gene_kind" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.gene_functions
示例#13
0
    def test_compound_location_reverse_multiple(self):
        self.reverse_strand()
        cds = CDSFeature(self.location, locus_tag="compound", translation="A")

        new = cds.get_sub_location_from_protein_coordinates(2, 4)
        assert isinstance(new, CompoundLocation)
        print(list(map(str, cds.location.parts)))
        print(list(map(str, new.parts)))
        assert len(new.parts) == 2
        assert len(new) == 6
        assert new.parts[0].start == 12
        assert new.parts[0].end == 15
        assert new.parts[1].start == 3
        assert new.parts[1].end == 6
        assert new.extract(
            self.magic_split).translate() == self.translation[2:4]
示例#14
0
 def test_mixed_strand(self):
     bio = self.cds.to_biopython()[0]
     for location in [
             CompoundLocation([
                 FeatureLocation(1, 5, strand=-1),
                 FeatureLocation(8, 10, strand=1)
             ]),
             CompoundLocation([
                 FeatureLocation(1, 5, strand=1),
                 FeatureLocation(8, 10, strand=None)
             ])
     ]:
         bio.location = location
         with self.assertRaisesRegex(
                 ValueError, "compound locations with mixed strands"):
             CDSFeature.from_biopython(bio)
示例#15
0
 def test_bad_translation(self):
     loc = FeatureLocation(1, 5, 1)
     for trans in [None, "A?", "A!", ""]:
         with self.assertRaisesRegex(
                 ValueError,
                 "valid translation required|invalid translation characters"
         ):
             CDSFeature(loc, locus_tag="test", translation=trans)
示例#16
0
    def test_without_secmet(self):
        assert not self.cds.sec_met
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # for detecting legacy versions
        assert "sec_met_domain" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.sec_met
示例#17
0
    def test_frameshifted_location(self):
        location = CompoundLocation(
            [FeatureLocation(3, 9, 1),
             FeatureLocation(8, 14, 1)])
        assert len(location) == 12
        seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA")
        extracted = location.extract(seq)
        assert extracted == "ATGAGCCCCTCG"
        assert len(extracted) == len(location)
        translation = extracted.translate()
        assert translation == "MSPS"

        cds = CDSFeature(location, locus_tag="test", translation=translation)
        new = cds.get_sub_location_from_protein_coordinates(1, 3)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 2
        assert new.start == 6
        assert new.end == 11
示例#18
0
    def test_compound_location_reverse_single(self):
        self.reverse_strand()
        cds = CDSFeature(self.location, locus_tag="compound", translation="A")

        new = cds.get_sub_location_from_protein_coordinates(0, 2)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 6
        assert new.start == 21
        assert new.end == 27
        assert new.extract(
            self.magic_split).translate() == self.translation[0:2]

        new = cds.get_sub_location_from_protein_coordinates(2, 3)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 3
        assert new.start == 12
        assert new.end == 15
        assert new.extract(
            self.magic_split).translate() == self.translation[2:3]
示例#19
0
    def test_with_genefunctions(self):
        self.cds.gene_functions.add(GeneFunction.ADDITIONAL, "testtool",
                                    "dummy")
        bio = self.convert()
        assert "gene_functions" in bio.qualifiers
        assert bio.qualifiers["gene_kind"] == [str(
            self.cds.gene_function)] == ["biosynthetic-additional"]

        regen = CDSFeature.from_biopython(bio)
        assert regen.gene_function == self.cds.gene_function
        assert regen.gene_functions.get_by_tool(
            "testtool") == self.cds.gene_functions.get_by_tool("testtool")
示例#20
0
    def test_basics(self):
        bio = self.convert()
        assert bio.location == self.cds.location
        assert bio.qualifiers["locus_tag"] == ["loctag"]
        assert bio.qualifiers["gene"] == ["gene"]
        assert bio.qualifiers["protein_id"] == ["prot_id"]
        assert bio.qualifiers["translation"] == ["A" * 4]

        regen = CDSFeature.from_biopython(bio)
        assert regen.location == self.cds.location
        assert regen.locus_tag == self.cds.locus_tag
        assert regen.gene == self.cds.gene
        assert regen.protein_id == self.cds.protein_id
示例#21
0
    def test_with_secmet(self):
        domains = [
            SecMetQualifier.Domain("testA", 0.1, 1.1, 3, "test"),
            SecMetQualifier.Domain("testB", 5.1, 3.9, 5, "dummy")
        ]
        self.cds.sec_met = SecMetQualifier(domains)
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # again, detecting leftover legacy versions
        assert len(bio.qualifiers["sec_met_domain"]) == 2
        assert bio.qualifiers["sec_met_domain"] == list(map(str, domains))

        regen = CDSFeature.from_biopython(bio)
        assert regen.sec_met
        assert len(regen.sec_met.domains) == len(domains)
        assert regen.sec_met.domains == domains
示例#22
0
 def setUp(self):
     self.cds = CDSFeature(FeatureLocation(0, 12, 1),
                           translation="A" * 4,
                           locus_tag="loctag",
                           gene="gene",
                           protein_id="prot_id")
示例#23
0
class TestCDSBiopythonConversion(unittest.TestCase):
    def setUp(self):
        self.cds = CDSFeature(FeatureLocation(0, 12, 1),
                              translation="A" * 4,
                              locus_tag="loctag",
                              gene="gene",
                              protein_id="prot_id")

    def convert(self):
        bio_features = self.cds.to_biopython()
        assert isinstance(bio_features, list)
        assert len(bio_features) == 1
        return bio_features[0]

    def test_basics(self):
        bio = self.convert()
        assert bio.location == self.cds.location
        assert bio.qualifiers["locus_tag"] == ["loctag"]
        assert bio.qualifiers["gene"] == ["gene"]
        assert bio.qualifiers["protein_id"] == ["prot_id"]
        assert bio.qualifiers["translation"] == ["A" * 4]

        regen = CDSFeature.from_biopython(bio)
        assert regen.location == self.cds.location
        assert regen.locus_tag == self.cds.locus_tag
        assert regen.gene == self.cds.gene
        assert regen.protein_id == self.cds.protein_id

    def test_without_genefunctions(self):
        bio = self.convert()
        assert "gene_functions" not in bio.qualifiers
        assert "gene_kind" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.gene_functions

    def test_with_genefunctions(self):
        self.cds.gene_functions.add(GeneFunction.ADDITIONAL, "testtool",
                                    "dummy")
        bio = self.convert()
        assert "gene_functions" in bio.qualifiers
        assert bio.qualifiers["gene_kind"] == [str(
            self.cds.gene_function)] == ["biosynthetic-additional"]

        regen = CDSFeature.from_biopython(bio)
        assert regen.gene_function == self.cds.gene_function
        assert regen.gene_functions.get_by_tool(
            "testtool") == self.cds.gene_functions.get_by_tool("testtool")

    def test_without_secmet(self):
        assert not self.cds.sec_met
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # for detecting legacy versions
        assert "sec_met_domain" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.sec_met

    def test_with_secmet(self):
        domains = [
            SecMetQualifier.Domain("testA", 0.1, 1.1, 3, "test"),
            SecMetQualifier.Domain("testB", 5.1, 3.9, 5, "dummy")
        ]
        self.cds.sec_met = SecMetQualifier(domains)
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # again, detecting leftover legacy versions
        assert len(bio.qualifiers["sec_met_domain"]) == 2
        assert bio.qualifiers["sec_met_domain"] == list(map(str, domains))

        regen = CDSFeature.from_biopython(bio)
        assert regen.sec_met
        assert len(regen.sec_met.domains) == len(domains)
        assert regen.sec_met.domains == domains

    def test_mixed_strand(self):
        bio = self.cds.to_biopython()[0]
        for location in [
                CompoundLocation([
                    FeatureLocation(1, 5, strand=-1),
                    FeatureLocation(8, 10, strand=1)
                ]),
                CompoundLocation([
                    FeatureLocation(1, 5, strand=1),
                    FeatureLocation(8, 10, strand=None)
                ])
        ]:
            bio.location = location
            with self.assertRaisesRegex(
                    ValueError, "compound locations with mixed strands"):
                CDSFeature.from_biopython(bio)
示例#24
0
class TestCDSBiopythonConversion(unittest.TestCase):
    def setUp(self):
        self.cds = CDSFeature(FeatureLocation(0, 12, 1),
                              translation="A" * 4,
                              locus_tag="loctag",
                              gene="gene",
                              protein_id="prot_id")

    def convert(self):
        bio_features = self.cds.to_biopython()
        assert isinstance(bio_features, list)
        assert len(bio_features) == 1
        return bio_features[0]

    def test_basics(self):
        bio = self.convert()
        assert bio.location == self.cds.location
        assert bio.qualifiers["locus_tag"] == ["loctag"]
        assert bio.qualifiers["gene"] == ["gene"]
        assert bio.qualifiers["protein_id"] == ["prot_id"]
        assert bio.qualifiers["translation"] == ["A" * 4]

        regen = CDSFeature.from_biopython(bio)
        assert regen.location == self.cds.location
        assert regen.locus_tag == self.cds.locus_tag
        assert regen.gene == self.cds.gene
        assert regen.protein_id == self.cds.protein_id

    def test_without_genefunctions(self):
        bio = self.convert()
        assert "gene_functions" not in bio.qualifiers
        assert "gene_kind" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.gene_functions

    def test_with_genefunctions(self):
        self.cds.gene_functions.add(GeneFunction.ADDITIONAL, "testtool",
                                    "dummy")
        bio = self.convert()
        assert "gene_functions" in bio.qualifiers
        assert bio.qualifiers["gene_kind"] == [str(
            self.cds.gene_function)] == ["biosynthetic-additional"]

        regen = CDSFeature.from_biopython(bio)
        assert regen.gene_function == self.cds.gene_function
        assert regen.gene_functions.get_by_tool(
            "testtool") == self.cds.gene_functions.get_by_tool("testtool")

    def test_without_secmet(self):
        assert not self.cds.sec_met
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # for detecting legacy versions
        assert "sec_met_domain" not in bio.qualifiers

        regen = CDSFeature.from_biopython(bio)
        assert not regen.sec_met

    def test_with_secmet(self):
        domains = [
            SecMetQualifier.Domain("testA", 0.1, 1.1, 3, "test"),
            SecMetQualifier.Domain("testB", 5.1, 3.9, 5, "dummy")
        ]
        self.cds.sec_met = SecMetQualifier(domains)
        bio = self.convert()
        assert "sec_met" not in bio.qualifiers  # again, detecting leftover legacy versions
        assert len(bio.qualifiers["sec_met_domain"]) == 2
        assert bio.qualifiers["sec_met_domain"] == list(map(str, domains))

        regen = CDSFeature.from_biopython(bio)
        assert regen.sec_met
        assert len(regen.sec_met.domains) == len(domains)
        assert regen.sec_met.domains == domains

    def test_mixed_strand(self):
        bio = self.cds.to_biopython()[0]
        for location in [
                CompoundLocation([
                    FeatureLocation(1, 5, strand=-1),
                    FeatureLocation(8, 10, strand=1)
                ]),
                CompoundLocation([
                    FeatureLocation(1, 5, strand=1),
                    FeatureLocation(8, 10, strand=None)
                ])
        ]:
            bio.location = location
            with self.assertRaisesRegex(
                    ValueError, "compound locations with mixed strands"):
                CDSFeature.from_biopython(bio)
        # compound locations starting with an invalid strand will be treated as per a non-compound wtih a bad strand

    def test_translation_outside_record(self):
        rec = DummyRecord(seq="A" * 10)
        for location in [
                FeatureLocation(0, AfterPosition(6), strand=1),
                FeatureLocation(BeforePosition(4), 10, strand=-1)
        ]:
            bio = SeqFeature(location, type="CDS")
            bio.qualifiers["translation"] = ["M" * 5]
            with self.assertRaisesRegex(SecmetInvalidInputError,
                                        "translation extends out of record"):
                CDSFeature.from_biopython(bio, record=rec)

    def test_invalid_translation_table(self):
        bio = self.cds.to_biopython()[0]
        bio.qualifiers["transl_table"] = ["11a"]
        with self.assertRaisesRegex(SecmetInvalidInputError,
                                    "invalid translation table"):
            CDSFeature.from_biopython(bio)
示例#25
0
 def test_bad_strand(self):
     with self.assertRaisesRegex(ValueError, "Strand must be"):
         CDSFeature(FeatureLocation(1, 5, 0),
                    locus_tag="test",
                    translation="A")
示例#26
0
 def test_bad_strand(self):
     for strand in [0, None]:
         with self.assertRaisesRegex(ValueError, "invalid strand"):
             CDSFeature(FeatureLocation(1, 5, strand),
                        locus_tag="test",
                        translation="A")
示例#27
0
 def test_complicated(self):
     parts = [
         FeatureLocation(121124, 122061, 1),
         FeatureLocation(122339, 122383, 1),
         FeatureLocation(122559, 122666, 1),
         FeatureLocation(122712, 122874, 1),
         FeatureLocation(123060, 123337, 1),
         FeatureLocation(123481, 123749, 1),
         FeatureLocation(123809, 124032, 1),
         FeatureLocation(124091, 124193, 1),
         FeatureLocation(124236, 124401, 1),
         FeatureLocation(124684, 124724, 1)
     ]
     location = CompoundLocation(parts, operator="join")
     cds = CDSFeature(location, locus_tag="complicated", translation="A")
     seq = (
         "ATGAGCCCTCGTCTAGACTACAATGAAGGATACGATTCCGAAGACGAGGAGATCCCCCGTTACGTACACCAT"
         "TCTAGAGGAAAGAGTCATAGATCCGTGAGGACGTCAGGTCGCTCACGCACGTTGGATTACGACGGGGATGAT"
         "GAAGCTAGTGACCACGCTGCCCCCTCCGGGATTGATCGGGACGCTCGAGCCTGTCCAACATCTCGCAGATAT"
         "ACTGATGACTGCCTTGAGACACATAAATTTCGAGGTGCCCGCTCCTCTCGCTCCCGTGGACGAACCGATGAT"
         "AACAAGGTTTTGTACTACACCAAGTATCGCAGCCCGGCTAAGGACTTGCCTATCGAGCGTGATCCCGAGGGT"
         "ATTAATTTATTCAAGGTCCGACAGCACACACGGCCAAGTGACGCTCATGTGCCCAGTGGATACCGTGAGCCC"
         "TACGAAGTCAAGGTCGACGAGTATGAGGATGATCATCCCCGTACATGCACTAGCCGCCGTGACTCTAGACAG"
         "CCGAAAGTCTACAAGGTCCGGGTTGATGAGTACGAGGATAACCTCCCTGCACGCTCTCACACTGACTTTCGC"
         "GAGTCTCCACGGTCTGAAAGATGCTCTAGCCGCTACACCGAGGACTCGAAGCCTGGGGAGCTTCCTCCCCGC"
         "TCAGGGCCCTGTCGGTCCAGCAGGCCTTCTCCGGTCGATGAGGACGTCGAGTATGAGATCCGTGAGCCCCGA"
         "GGGCATCGCTCCAGTCGACACTCTACAGATGTTGACTTTCAGCCAGTAGAACAACATCCTCGCTTTGGACAA"
         "CGTGGACTCAGCAGACCTTCGCGGGTTGATGAGGAAGTCGATTATGAGATCCGTGAGCCCCGTGGCAATCGT"
         "GTCAGTCACGCTGCTCATGGTGACAGCCCCTGTCAGGACCAAAGCTCCAGGCATATCGGCATTCAATTGTGG"
         "AGTACGCGCGGACCCCGGGCGGCTGGCCGTGGCCGGGGTCCTGATGAGTCTGACGATGTTGAGCCCTAGGCA"
         "GGGAATTGCCGTAATGCTCTTCAAACTGTATAGCAAGCTCAGCATCAATTCTTTAACTGGCAGGCGCTCTGC"
         "TCGCGCGTTTCTCTCTTGGGGTGGTTGGTTTGACTGTAGATTTCCTCTTTCAAGGCTTCTAGATACACCTTT"
         "GGAAGATAGCAACGCTATGCAAGATATTTTTGATAATTCAAATCCTTTTTACACATGGAATAGCTGGTGTTC"
         "CTGTTTTATCTAGGCAATTGACCCACGCCATCTCGGTAGGTACGGTAAAAGCAAGCCGTAATCTCGTATGGC"
         "TTCATCCTTAGCATCGTATAGATCTCCACTCGGGACTCGGCCAGGGATCTTCCATCAATCAACGTGAAGAAG"
         "TCCAGCACCCCGCTGAATCATAATATCCTACCGATTCTGCTCTCTTCACCTCTAGATACCCCTCTAGACTCC"
         "TGTCAACATGTTCCGTACAGTCGAAGACCGCCCGACCCCAAAAGAGGTATATAACTGGCGGCTGTACACCGA"
         "GGCCACCATCATTGCCACTGGTACACTCTTGTGAGTAGGTGCTGTTGTAACGAAAAACATCCAACTGATCCG"
         "CCAGGTTCGGCTATGACTCGGCTTTTGTGGGAACTACCATTGCCCGCCAAAGCTTCGTTGATGCCTTCAACA"
         "TCGTCGAGTCGGAGGCGGCGGATATTTCAAGCAATATCACGTCAACCTTTCAGGCCGGCGCATTTTTCGGCG"
         "CCATCTTCTGCTTCTTGCCTGAGTGAAGCCGTTAGAGACGGTCTCACTGGCTAACCGGACCAAGTGACCGAC"
         "AAAATTGGGCGTAAATGGGCCCTTCAGGCAAACACACTGCTGTTTCTTATTGGCGCGATTGTGATGACGGCT"
         "GCAACACATCACCTTTCCTATATATGTAAGTCATATCCCCGTAGTAGTCAAGGTTGTTAACTAGAGCAGATG"
         "CTGGACGAGCTCTCACCGGCATCGCATGCGGCGCTATCACCGCGACCGTCCCCAGCTATATTGCCGAGCTGT"
         "CAATCGTGTCGATCCGGGGCTTCCTCACCGGGTTCTTCGAAGTCGCATACCAGATTGGTAGCTTGGTTGGAT"
         "TCTGGATCAACTATGGCATTAACGAGAACATGGACAACTCCTCGGCCGCAAGCTGGAGAGTGCCTATGGCAG"
         "TCCAGATCATCCCCGCAGGAGTCCTTTTCATTGGTGGCTTTTCCTCCATGAGAGTCCTCTCTGGCTGATGCG"
         "AAAAGACAGTGAGGATGCCGCGACGGCTGCCCTGGAGGCGTTGAGGAAACTGCCACGGTCTCATCAATGTAA"
         "TCTCCCACCAAGACTCAGGACATAGTCCCATGCTGACTATTTTAGATGTCCAGGAAGACATCGAGATGAACC"
         "GCACCAGGCTGCTGGAGGAAGCTCGGATCGCCGAGAAGTACGGACAAGGTTGGTTGGCATATATCCGAGGCG"
         "CACTCTTCGAGCTCTCGCGCCATGGGATGTGGAATCGTGTTCTGCTCGTCCTCTGTGCCTTTGCACTGCAGA"
         "ATATGTCGGGAGCTGCTGCTATCAACTACTATTCCCCCATACTCTTTGCGTCGTTGGGGATCACTGATGTCG"
         "CTCTGTATACAGGTATTTATGGCCTGGTAAAAGGTAAGTTCTTCTCCTTAAGTATCTCTGGCTGACAATAGG"
         "GATTAACTGATGAGTTTACAGCCGTCGCATCAATTATATTCTACGGCATTCTCATTGATATGTGGGGCCGCC"
         "GACGTCCGACCATTGTTTCGTCACTGGCCTGCCCTCTATGTCTCTGGTTTGTGGGTGCATACGTCAAAGTTG"
         "GGCATCCAGCCGATATCATAGACGCCGGCGGGGAATTGTCCCCCTCCACGGAGGCTGGTGGTAGAGCGGCGA"
         "CTGCGATGATTATGATCTACTCCGTCTTGTAAGTGCCCCTCACTTTTGAATGGGCTTCAGCTTGGAACTCGA"
         "GTAACTGGTATCCAGTTGGTCTTTTGGTCTCAACGGTATCCCCTGGATTGTCTCCGCCGAAATCTTCCCCGG"
         "CGCGCTGCGAAATCTCACGGGGACATGGGCTGCGCTGGTGCAATGGTATGCAATTCCCTTCACCTAGTATCC"
         "ATATCTAAATCAGCAGGTTGATCCAATTCGTTATCACCAAAGCTCTCCCGTACATCTTCAATAGCCTTGGGT"
         "ACGGGACGTGGTTCTTCTTCGCCTCCTGGATGCTGCTCGCTATCATTTGGTCATTCTTTTTTCTCCCGGAAA"
         "CCAAGGGGAAGACTCTCGATGAAATGCATACGATCTTGTACGTTTCTCTCCGTCGAAATGTGGTCTTGGCTA"
         "ATGAATCAGCGGCCATTCTCTCGCCGAAGAGCAGGGTAAGGGTGAGGTTCGAGATAACACTACTAAAAGTGA"
         "TCGGGAGGCTGTCTAGTCCAGTAGTTCTAGAGGACTATTGGCTGGATGATTCCTCTGATGATTTTTGATTGG"
         "TGGTGAAAATGTTGGATGTTTAATGCCAATGTACTGGGAGAGAACATGCCGATAGTACATACCGCTGTGTTG"
         "TATATCGAAGACGGTTGATTTATATATCTTAGTCTTTCAAAAGACGGCACTCACACAATCACACTTCGATGA"
     )
     translation = (
         "MSPRLDYNEGYDSEDEEIPRYVHHSRGKSHRSVRTSGRSRTLDYDGDDEASDHAAPSGIDRDAR"
         "ACPTSRRYTDDCLETHKFRGARSSRSRGRTDDNKVLYYTKYRSPAKDLPIERDPEGINLFKVRQ"
         "HTRPSDAHVPSGYREPYEVKVDEYEDDHPRTCTSRRDSRQPKVYKVRVDEYEDNLPARSHTDFR"
         "ESPRSERCSSRYTEDSKPGELPPRSGPCRSSRPSPVDEDVEYEIREPRGHRSSRHSTDVDFQPV"
         "EQHPRFGQRGLSRPSRVDEEVDYEIREPRGNRVSHAAHGDSPCQDQSSRHIGIQLWTGVPVLSR"
         "QLTHAISTPVNMFRTVEDRPTPKEVYNWRLYTEATIIATGTLLFGYDSAFVGTTIARQSFVDAF"
         "NIVESEAADISSNITSTFQAGAFFGAIFCFLPEADAGRALTGIACGAITATVPSYIAELSIVSI"
         "RGFLTGFFEVAYQIGSLVGFWINYGINENMDNSSAASWRVPMAVQIIPAGVLFIGGFSSMREDI"
         "EMNRTRLLEEARIAEKYGQGWLAYIRGALFELSRHGMWNRVLLVLCAFALQNMSGAAAINYYSP"
         "ILFASLGITDVALYTGIYGLVKAVASIIFYGILIDMWGRRRPTIVSSLACPLCLWFVGAYVKVG"
         "HPADIIDAGGELSPSTEAGGRAATAMIMIYSVFWSFGLNGIPWIVSAEIFPGALRNLTGTWAAL"
         "VQWLIQFVITKALPYIFNSLGYGTWFFFASWMLLAIIWSFFFLPETKGKTLDEMHTIFLSKDGT"
         "HTITLR")
     new = cds.get_sub_location_from_protein_coordinates(353, 412)
     # pad the beginning to match the location
     assert new.extract(Seq("x" * location.start +
                            seq)).translate() == translation[353:412]
示例#28
0
class TestCDSProteinLocation(unittest.TestCase):
    def setUp(self):
        self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT")
        self.magic = Seq("ATGGCAGGTATTTGT")
        self.translation = "MAGIC"
        self.sub_locations = [
            FeatureLocation(0, 6, strand=1),
            FeatureLocation(12, 15, strand=1),
            FeatureLocation(21, 27, strand=1)
        ]
        self.location = CompoundLocation(self.sub_locations)
        self.cds = CDSFeature(self.location,
                              locus_tag="compound",
                              translation="A")

    def reverse_strand(self):
        self.magic = self.magic.reverse_complement()
        self.magic_split = self.magic_split.reverse_complement()
        self.sub_locations = [
            FeatureLocation(loc.start, loc.end, strand=loc.strand * -1)
            for loc in self.sub_locations
        ]
        self.location = CompoundLocation(
            self.sub_locations[::self.sub_locations[0].strand])
        self.cds = CDSFeature(self.location,
                              locus_tag="compound",
                              translation="A")

    def test_simple_location_forward_complete(self):
        cds = CDSFeature(FeatureLocation(0, 15, 1),
                         locus_tag="simple",
                         translation="A")
        new = cds.get_sub_location_from_protein_coordinates(0, 5)
        extracted = new.extract(self.magic)
        assert extracted == self.magic
        assert extracted.translate() == self.translation

    def test_simple_location_forward_partial(self):
        cds = CDSFeature(FeatureLocation(0, 15, 1),
                         locus_tag="simple",
                         translation="A")
        for start, end in [(1, 5), (0, 3), (2, 3), (1, 4)]:
            print("testing", start, end)
            new = cds.get_sub_location_from_protein_coordinates(start, end)
            print(new)
            extracted = new.extract(self.magic)
            assert extracted == self.magic[start * 3:end * 3]
            assert extracted.translate() == self.translation[start:end]

    def test_compound_location_forward_full(self):
        new = self.cds.get_sub_location_from_protein_coordinates(0, 5)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 3
        print(list(map(str, self.cds.location.parts)))
        print(list(map(str, new.parts)))
        assert len(new) == len(self.cds.location)
        assert new == self.location, "%s != %s" % (str(new), str(
            self.location))
        extracted = new.extract(self.magic_split)
        assert extracted == self.magic
        assert extracted.translate() == self.translation[0:5]

    def test_compound_forward_within_single(self):
        new = self.cds.get_sub_location_from_protein_coordinates(0, 2)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 6
        assert new.start == 0
        assert new.end == 6
        assert new.extract(
            self.magic_split).translate() == self.translation[0:2]

        new = self.cds.get_sub_location_from_protein_coordinates(2, 3)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 3
        assert new.start == 12
        assert new.end == 15
        assert new.extract(
            self.magic_split).translate() == self.translation[2:3]

    def test_compound_forward_over_multiple(self):
        new = self.cds.get_sub_location_from_protein_coordinates(2, 4)
        assert isinstance(new, CompoundLocation)
        print(list(map(str, self.cds.location.parts)))
        print(list(map(str, new.parts)))
        assert len(new.parts) == 2
        assert len(new) == 6
        assert new.parts[0].start == 12
        assert new.parts[0].end == 15
        assert new.parts[1].start == 21
        assert new.parts[1].end == 24
        assert new.extract(
            self.magic_split).translate() == self.translation[2:4]

    def test_compound_location_reverse_full(self):
        self.reverse_strand()
        cds = CDSFeature(self.location, locus_tag="compound", translation="A")
        new = cds.get_sub_location_from_protein_coordinates(0, 5)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 3
        print(list(map(str, cds.location.parts)))
        print(list(map(str, new.parts)))
        assert len(new) == len(cds.location)
        assert new.extract(
            self.magic_split).translate() == self.translation[0:5]

    def test_compound_location_reverse_single(self):
        self.reverse_strand()
        cds = CDSFeature(self.location, locus_tag="compound", translation="A")

        new = cds.get_sub_location_from_protein_coordinates(0, 2)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 6
        assert new.start == 21
        assert new.end == 27
        assert new.extract(
            self.magic_split).translate() == self.translation[0:2]

        new = cds.get_sub_location_from_protein_coordinates(2, 3)
        assert isinstance(new, FeatureLocation)
        assert len(new) == 3
        assert new.start == 12
        assert new.end == 15
        assert new.extract(
            self.magic_split).translate() == self.translation[2:3]

    def test_compound_location_reverse_multiple(self):
        self.reverse_strand()
        cds = CDSFeature(self.location, locus_tag="compound", translation="A")

        new = cds.get_sub_location_from_protein_coordinates(2, 4)
        assert isinstance(new, CompoundLocation)
        print(list(map(str, cds.location.parts)))
        print(list(map(str, new.parts)))
        assert len(new.parts) == 2
        assert len(new) == 6
        assert new.parts[0].start == 12
        assert new.parts[0].end == 15
        assert new.parts[1].start == 3
        assert new.parts[1].end == 6
        assert new.extract(
            self.magic_split).translate() == self.translation[2:4]

    def test_frameshifted_location(self):
        location = CompoundLocation(
            [FeatureLocation(3, 9, 1),
             FeatureLocation(8, 14, 1)])
        assert len(location) == 12
        seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA")
        extracted = location.extract(seq)
        assert extracted == "ATGAGCCCCTCG"
        assert len(extracted) == len(location)
        translation = extracted.translate()
        assert translation == "MSPS"

        cds = CDSFeature(location, locus_tag="test", translation=translation)
        new = cds.get_sub_location_from_protein_coordinates(1, 3)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 2
        assert new.start == 6
        assert new.end == 11

    def test_complicated(self):
        parts = [
            FeatureLocation(121124, 122061, 1),
            FeatureLocation(122339, 122383, 1),
            FeatureLocation(122559, 122666, 1),
            FeatureLocation(122712, 122874, 1),
            FeatureLocation(123060, 123337, 1),
            FeatureLocation(123481, 123749, 1),
            FeatureLocation(123809, 124032, 1),
            FeatureLocation(124091, 124193, 1),
            FeatureLocation(124236, 124401, 1),
            FeatureLocation(124684, 124724, 1)
        ]
        location = CompoundLocation(parts, operator="join")
        cds = CDSFeature(location, locus_tag="complicated", translation="A")
        seq = (
            "ATGAGCCCTCGTCTAGACTACAATGAAGGATACGATTCCGAAGACGAGGAGATCCCCCGTTACGTACACCAT"
            "TCTAGAGGAAAGAGTCATAGATCCGTGAGGACGTCAGGTCGCTCACGCACGTTGGATTACGACGGGGATGAT"
            "GAAGCTAGTGACCACGCTGCCCCCTCCGGGATTGATCGGGACGCTCGAGCCTGTCCAACATCTCGCAGATAT"
            "ACTGATGACTGCCTTGAGACACATAAATTTCGAGGTGCCCGCTCCTCTCGCTCCCGTGGACGAACCGATGAT"
            "AACAAGGTTTTGTACTACACCAAGTATCGCAGCCCGGCTAAGGACTTGCCTATCGAGCGTGATCCCGAGGGT"
            "ATTAATTTATTCAAGGTCCGACAGCACACACGGCCAAGTGACGCTCATGTGCCCAGTGGATACCGTGAGCCC"
            "TACGAAGTCAAGGTCGACGAGTATGAGGATGATCATCCCCGTACATGCACTAGCCGCCGTGACTCTAGACAG"
            "CCGAAAGTCTACAAGGTCCGGGTTGATGAGTACGAGGATAACCTCCCTGCACGCTCTCACACTGACTTTCGC"
            "GAGTCTCCACGGTCTGAAAGATGCTCTAGCCGCTACACCGAGGACTCGAAGCCTGGGGAGCTTCCTCCCCGC"
            "TCAGGGCCCTGTCGGTCCAGCAGGCCTTCTCCGGTCGATGAGGACGTCGAGTATGAGATCCGTGAGCCCCGA"
            "GGGCATCGCTCCAGTCGACACTCTACAGATGTTGACTTTCAGCCAGTAGAACAACATCCTCGCTTTGGACAA"
            "CGTGGACTCAGCAGACCTTCGCGGGTTGATGAGGAAGTCGATTATGAGATCCGTGAGCCCCGTGGCAATCGT"
            "GTCAGTCACGCTGCTCATGGTGACAGCCCCTGTCAGGACCAAAGCTCCAGGCATATCGGCATTCAATTGTGG"
            "AGTACGCGCGGACCCCGGGCGGCTGGCCGTGGCCGGGGTCCTGATGAGTCTGACGATGTTGAGCCCTAGGCA"
            "GGGAATTGCCGTAATGCTCTTCAAACTGTATAGCAAGCTCAGCATCAATTCTTTAACTGGCAGGCGCTCTGC"
            "TCGCGCGTTTCTCTCTTGGGGTGGTTGGTTTGACTGTAGATTTCCTCTTTCAAGGCTTCTAGATACACCTTT"
            "GGAAGATAGCAACGCTATGCAAGATATTTTTGATAATTCAAATCCTTTTTACACATGGAATAGCTGGTGTTC"
            "CTGTTTTATCTAGGCAATTGACCCACGCCATCTCGGTAGGTACGGTAAAAGCAAGCCGTAATCTCGTATGGC"
            "TTCATCCTTAGCATCGTATAGATCTCCACTCGGGACTCGGCCAGGGATCTTCCATCAATCAACGTGAAGAAG"
            "TCCAGCACCCCGCTGAATCATAATATCCTACCGATTCTGCTCTCTTCACCTCTAGATACCCCTCTAGACTCC"
            "TGTCAACATGTTCCGTACAGTCGAAGACCGCCCGACCCCAAAAGAGGTATATAACTGGCGGCTGTACACCGA"
            "GGCCACCATCATTGCCACTGGTACACTCTTGTGAGTAGGTGCTGTTGTAACGAAAAACATCCAACTGATCCG"
            "CCAGGTTCGGCTATGACTCGGCTTTTGTGGGAACTACCATTGCCCGCCAAAGCTTCGTTGATGCCTTCAACA"
            "TCGTCGAGTCGGAGGCGGCGGATATTTCAAGCAATATCACGTCAACCTTTCAGGCCGGCGCATTTTTCGGCG"
            "CCATCTTCTGCTTCTTGCCTGAGTGAAGCCGTTAGAGACGGTCTCACTGGCTAACCGGACCAAGTGACCGAC"
            "AAAATTGGGCGTAAATGGGCCCTTCAGGCAAACACACTGCTGTTTCTTATTGGCGCGATTGTGATGACGGCT"
            "GCAACACATCACCTTTCCTATATATGTAAGTCATATCCCCGTAGTAGTCAAGGTTGTTAACTAGAGCAGATG"
            "CTGGACGAGCTCTCACCGGCATCGCATGCGGCGCTATCACCGCGACCGTCCCCAGCTATATTGCCGAGCTGT"
            "CAATCGTGTCGATCCGGGGCTTCCTCACCGGGTTCTTCGAAGTCGCATACCAGATTGGTAGCTTGGTTGGAT"
            "TCTGGATCAACTATGGCATTAACGAGAACATGGACAACTCCTCGGCCGCAAGCTGGAGAGTGCCTATGGCAG"
            "TCCAGATCATCCCCGCAGGAGTCCTTTTCATTGGTGGCTTTTCCTCCATGAGAGTCCTCTCTGGCTGATGCG"
            "AAAAGACAGTGAGGATGCCGCGACGGCTGCCCTGGAGGCGTTGAGGAAACTGCCACGGTCTCATCAATGTAA"
            "TCTCCCACCAAGACTCAGGACATAGTCCCATGCTGACTATTTTAGATGTCCAGGAAGACATCGAGATGAACC"
            "GCACCAGGCTGCTGGAGGAAGCTCGGATCGCCGAGAAGTACGGACAAGGTTGGTTGGCATATATCCGAGGCG"
            "CACTCTTCGAGCTCTCGCGCCATGGGATGTGGAATCGTGTTCTGCTCGTCCTCTGTGCCTTTGCACTGCAGA"
            "ATATGTCGGGAGCTGCTGCTATCAACTACTATTCCCCCATACTCTTTGCGTCGTTGGGGATCACTGATGTCG"
            "CTCTGTATACAGGTATTTATGGCCTGGTAAAAGGTAAGTTCTTCTCCTTAAGTATCTCTGGCTGACAATAGG"
            "GATTAACTGATGAGTTTACAGCCGTCGCATCAATTATATTCTACGGCATTCTCATTGATATGTGGGGCCGCC"
            "GACGTCCGACCATTGTTTCGTCACTGGCCTGCCCTCTATGTCTCTGGTTTGTGGGTGCATACGTCAAAGTTG"
            "GGCATCCAGCCGATATCATAGACGCCGGCGGGGAATTGTCCCCCTCCACGGAGGCTGGTGGTAGAGCGGCGA"
            "CTGCGATGATTATGATCTACTCCGTCTTGTAAGTGCCCCTCACTTTTGAATGGGCTTCAGCTTGGAACTCGA"
            "GTAACTGGTATCCAGTTGGTCTTTTGGTCTCAACGGTATCCCCTGGATTGTCTCCGCCGAAATCTTCCCCGG"
            "CGCGCTGCGAAATCTCACGGGGACATGGGCTGCGCTGGTGCAATGGTATGCAATTCCCTTCACCTAGTATCC"
            "ATATCTAAATCAGCAGGTTGATCCAATTCGTTATCACCAAAGCTCTCCCGTACATCTTCAATAGCCTTGGGT"
            "ACGGGACGTGGTTCTTCTTCGCCTCCTGGATGCTGCTCGCTATCATTTGGTCATTCTTTTTTCTCCCGGAAA"
            "CCAAGGGGAAGACTCTCGATGAAATGCATACGATCTTGTACGTTTCTCTCCGTCGAAATGTGGTCTTGGCTA"
            "ATGAATCAGCGGCCATTCTCTCGCCGAAGAGCAGGGTAAGGGTGAGGTTCGAGATAACACTACTAAAAGTGA"
            "TCGGGAGGCTGTCTAGTCCAGTAGTTCTAGAGGACTATTGGCTGGATGATTCCTCTGATGATTTTTGATTGG"
            "TGGTGAAAATGTTGGATGTTTAATGCCAATGTACTGGGAGAGAACATGCCGATAGTACATACCGCTGTGTTG"
            "TATATCGAAGACGGTTGATTTATATATCTTAGTCTTTCAAAAGACGGCACTCACACAATCACACTTCGATGA"
        )
        translation = (
            "MSPRLDYNEGYDSEDEEIPRYVHHSRGKSHRSVRTSGRSRTLDYDGDDEASDHAAPSGIDRDAR"
            "ACPTSRRYTDDCLETHKFRGARSSRSRGRTDDNKVLYYTKYRSPAKDLPIERDPEGINLFKVRQ"
            "HTRPSDAHVPSGYREPYEVKVDEYEDDHPRTCTSRRDSRQPKVYKVRVDEYEDNLPARSHTDFR"
            "ESPRSERCSSRYTEDSKPGELPPRSGPCRSSRPSPVDEDVEYEIREPRGHRSSRHSTDVDFQPV"
            "EQHPRFGQRGLSRPSRVDEEVDYEIREPRGNRVSHAAHGDSPCQDQSSRHIGIQLWTGVPVLSR"
            "QLTHAISTPVNMFRTVEDRPTPKEVYNWRLYTEATIIATGTLLFGYDSAFVGTTIARQSFVDAF"
            "NIVESEAADISSNITSTFQAGAFFGAIFCFLPEADAGRALTGIACGAITATVPSYIAELSIVSI"
            "RGFLTGFFEVAYQIGSLVGFWINYGINENMDNSSAASWRVPMAVQIIPAGVLFIGGFSSMREDI"
            "EMNRTRLLEEARIAEKYGQGWLAYIRGALFELSRHGMWNRVLLVLCAFALQNMSGAAAINYYSP"
            "ILFASLGITDVALYTGIYGLVKAVASIIFYGILIDMWGRRRPTIVSSLACPLCLWFVGAYVKVG"
            "HPADIIDAGGELSPSTEAGGRAATAMIMIYSVFWSFGLNGIPWIVSAEIFPGALRNLTGTWAAL"
            "VQWLIQFVITKALPYIFNSLGYGTWFFFASWMLLAIIWSFFFLPETKGKTLDEMHTIFLSKDGT"
            "HTITLR")
        new = cds.get_sub_location_from_protein_coordinates(353, 412)
        # pad the beginning to match the location
        assert new.extract(Seq("x" * location.start +
                               seq)).translate() == translation[353:412]

    def test_extends_past_after(self):
        self.sub_locations[-1] = FeatureLocation(21,
                                                 AfterPosition(29),
                                                 strand=1)
        self.cds.location = CompoundLocation(self.sub_locations)

        new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
        assert new.end == 27

    def test_extends_past_before(self):
        self.reverse_strand()
        self.sub_locations[0] = FeatureLocation(BeforePosition(2),
                                                self.sub_locations[0].end,
                                                strand=-1)
        self.cds.location = CompoundLocation(self.sub_locations[::-1])
        new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
        assert new.start == 3
示例#29
0
 def test_invalid_translation_table(self):
     bio = self.cds.to_biopython()[0]
     bio.qualifiers["transl_table"] = ["11a"]
     with self.assertRaisesRegex(SecmetInvalidInputError,
                                 "invalid translation table"):
         CDSFeature.from_biopython(bio)
示例#30
0
 def __init__(self, feature: CDSFeature) -> None:
     super().__init__(["id", "sequence", "domains", "modules"])
     self.sequence = feature.translation
     self.id = feature.get_name()
     self.domains = []  # type: List[JSONDomain]
     self.modules = []  # type: List[JSONModule]