示例#1
0
def check_variant_strand(var_details, intron_locs):
    var_details.sort(key=itemgetter(2))
    count, reverse = 0, 0
    var_locs_reversed = list()
    var_locs = [v[2] for v in var_details]
    for intron in intron_locs:
        count += 1
        if intron[3] == 1:
            continue                   #intron is on forward strand
        reverse += 1
        intron_start = intron[1]
        intron_end = intron[2]
        if intron_end < var_locs[0]:
            continue
        a = bisect.bisect(var_locs, intron_start)
        b = bisect.bisect(var_locs, intron_end)
        for i in np.arange(a, b):
            var_details[i] = (var_details[i][0], var_details[i][1], var_details[i][2], \
                              DNA.complement(var_details[i][3]), DNA.complement(var_details[i][4]))
            var_locs_reversed.append(i)
    print('Number of introns processed'     , count)
    print('Number of reverse strand introns', reverse)
    print('Number of variants           ', len(var_details))
    print('Number of variants on (-) strand', len(var_locs_reversed))
    return var_details, var_locs_reversed
示例#2
0
 def test_assemble_seq(self):
     """should correctly fill in a sequence with N's"""
     expect = DNA.make_seq("NAAAAANNCCCCCNNGGGNNN")
     frags = ["AAAAA", "CCCCC", "GGG"]
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
     positions = [(1, 6), (8, 13), (15, 18)]
     self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
     # should work with:
     # start matches first frag start
     expect = DNA.make_seq("AAAAANNCCCCCNNGGGNNN")
     positions = [(0, 5), (7, 12), (14, 17)]
     self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
     # end matches last frag_end
     expect = DNA.make_seq("NAAAAANNCCCCCNNGGG")
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
     # both start and end matched
     expect = DNA.make_seq("AAAAANNCCCCCNNGGG")
     positions = [(10, 15), (17, 22), (24, 27)]
     self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
     # one frag
     expect = DNA.make_seq("".join(frags))
     positions = [(10, 23)]
     self.assertEqual(_assemble_seq(["".join(frags)], 10, 23, positions),
                      expect)
示例#3
0
    def test_gap_coords_to_map(self):
        """construct a Map from coordinates of gap alone"""
        m, seq = DNA.make_seq("-AC--GT-TTA--").parse_out_gaps()
        gap_coords = {0: 1, 2: 2, 4: 1, 7: 2}
        seqlen = 70
        got = gap_coords_to_map(gap_coords, seqlen)
        self.assertEqual(len(got), seqlen + sum(gap_coords.values()))

        gap_coords = {5: 2, 17: 3, 10: 2}
        seqlen = 20
        got = gap_coords_to_map(gap_coords, seqlen)
        self.assertEqual(len(got), sum(gap_coords.values()) + seqlen)

        # roundtrip from Map.get_gap_coordinates()
        self.assertEqual(dict(got.get_gap_coordinates()), gap_coords)

        # and no gaps
        m, seq = DNA.make_seq("ACGTTTA").parse_out_gaps()
        got = gap_coords_to_map({}, len(seq))
        self.assertEqual(len(got), len(m))
        self.assertEqual(got.get_coordinates(), m.get_coordinates())

        # and gaps outside sequence
        with self.assertRaises(ValueError):
            got = gap_coords_to_map({20: 1}, len(seq))
示例#4
0
 def test_gaps_at_both_ends(self):
     s = "aaaccggttt"
     s1 = DNA.make_seq(s[:-2], name="A")
     s2 = DNA.make_seq(s[2:], name="B")
     for a in self._aligned_both_ways(s1, s2, local=False):
         self.assertEqual(matchedColumns(a), 6)
         self.assertEqual(len(a), 10)
示例#5
0
 def test_local_tiebreak(self):
     """Should pick the first best-equal hit rather than the last one"""
     # so that the Pyrex and Python versions give the same result.
     score_matrix = make_dna_scoring_dict(match=1,
                                          transition=-1,
                                          transversion=-1)
     pattern = DNA.make_seq("cwc", name="pattern")
     two_hit = DNA.make_seq("cactc", name="target")
     aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
     hit = aln.named_seqs["target"]
     self.assertEqual(str(hit).lower(), "cac")
示例#6
0
 def setUp(self):
     self.cigar_text = "3D2M3D6MDM2D3MD"
     self.aln_seq = DNA.make_seq("---AA---GCTTAG-A--CCT-")
     self.aln_seq1 = DNA.make_seq("CCAAAAAA---TAGT-GGC--G")
     self.map, self.seq = self.aln_seq.parse_out_gaps()
     self.map1, self.seq1 = self.aln_seq1.parse_out_gaps()
     self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
     self.aln = make_aligned_seqs(
         {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1}, array_align=False
     )
     self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
     self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}
示例#7
0
 def test_codon(self):
     s1 = DNA.make_seq("tacgccgta", name="A")
     s2 = DNA.make_seq("tacgta", name="B")
     codon_model = cogent3.evolve.substitution_model.TimeReversibleCodon(
         model_gaps=False,
         equal_motif_probs=True,
         mprob_model="conditional")
     tree = cogent3.make_tree(tip_names=["A", "B"])
     lf = codon_model.make_likelihood_function(tree, aligned=False)
     lf.set_sequences(dict(A=s1, B=s2))
     a = lf.get_log_likelihood().edge.get_viterbi_path().get_alignment()
     self.assertEqual(matchedColumns(a), 6)
     self.assertEqual(len(a), 9)
示例#8
0
def get_rc_record(alleles, ancestor, allele_freqs, flank_5, flank_3):
    """reverse complements the alleles, ancestror, flanking seqs, and allele freqs
    """
    complement = DNA.complement
    alleles_rc = set([complement(b) for b in alleles])
    ancestor_rc = complement(ancestor)

    allele_freqs_rc = {}
    for allele, freq in allele_freqs.items():
        allele_freqs_rc[complement(allele)] = freq

    flank_5_rc = str(DNA.make_seq(flank_5).rc())
    flank_3_rc = str(DNA.make_seq(flank_3).rc())
    return alleles_rc, ancestor_rc, allele_freqs_rc, flank_3_rc, flank_5_rc
示例#9
0
 def _make_aln(
     self,
     orig,
     model=dna_model,
     param_vals=None,
     indel_rate=0.1,
     indel_length=0.5,
     **kw,
 ):
     kw["indel_rate"] = indel_rate
     kw["indel_length"] = indel_length
     seqs = {
         key: DNA.make_seq(value)
         for (key, value) in list(orig.items())
     }
     if len(seqs) == 2:
         tree = cogent3.make_tree(treestring="(A:.1,B:.1)")
     else:
         tree = cogent3.make_tree(
             treestring="(((A:.1,B:.1):.1,C:.1):.1,D:.1)")
     aln, tree = cogent3.align.progressive.TreeAlign(model,
                                                     seqs,
                                                     tree=tree,
                                                     param_vals=param_vals,
                                                     show_progress=False,
                                                     **kw)
     return aln
示例#10
0
 def setUp(self):
     # A Sequence with a couple of exons on it.
     self.s = DNA.make_seq(
         "AAGAAGAAGACCCCCAAAAAAAAAATTTTTTTTTTAAAAAAAAAAAAA", name="Orig")
     self.exon1 = self.s.add_annotation(Feature, "exon", "fred", [(10, 15)])
     self.exon2 = self.s.add_annotation(Feature, "exon", "trev", [(30, 40)])
     self.nested_feature = self.exon1.add_feature("repeat", "bob", [(2, 5)])
示例#11
0
    def test_inherit_feature(self):
        """should be able to subclass and extend _Feature"""

        class NewFeat(_Feature):
            def __init__(self, *args, **kwargs):
                super(NewFeat, self).__init__(*args, **kwargs)

            def newMethod(self):
                if len(self.map.spans) > 1:
                    as_one = self.as_one_span()  # should create new instance of NewFeat
                    return as_one.newMethod()
                return True

        seq = DNA.make_seq("ACGTACGTACGT")
        f = seq.add_annotation(
            NewFeat, as_map([(1, 3), (5, 7)], len(seq)), type="gene", name="abcd"
        )
        self.assertEqual(type(f.as_one_span()), NewFeat)
        self.assertEqual(type(f.get_shadow()), NewFeat)
        f2 = seq.add_annotation(
            NewFeat, as_map([(3, 5)], len(seq)), type="gene", name="def"
        )

        self.assertEqual(
            type(seq.get_region_covering_all([f, f2], feature_class=NewFeat)), NewFeat
        )
        # now use the new method
        f.newMethod()
示例#12
0
 def test_annotate_matches_to(self):
     """annotate_matches_to attaches annotations correctly to a Sequence
     """
     seq = DNA.make_seq("TTCCACTTCCGCTT", name="x")
     pattern = "CCRC"
     annot = seq.annotate_matches_to(pattern=pattern,
                                     annot_type="domain",
                                     name="fred",
                                     allow_multiple=True)
     self.assertEqual([a.get_slice() for a in annot], ["CCAC", "CCGC"])
     annot = seq.annotate_matches_to(pattern=pattern,
                                     annot_type="domain",
                                     name="fred",
                                     allow_multiple=False)
     self.assertEqual(len(annot), 1)
     fred = annot[0].get_slice()
     self.assertEqual(str(fred), "CCAC")
     # For Sequence objects of a non-IUPAC MolType, annotate_matches_to
     # should return an empty annotation.
     seq = ASCII.make_seq(seq="TTCCACTTCCGCTT")
     annot = seq.annotate_matches_to(pattern=pattern,
                                     annot_type="domain",
                                     name="fred",
                                     allow_multiple=False)
     self.assertEqual(annot, [])
示例#13
0
    def test_constructor_equivalence(self):
        """"""

        # These different constructions should generate the same output.
        data = [["human", "CGAAACGTTT"], ["mouse", "CTAAACGTCG"]]
        as_series = make_aligned_seqs(data=data, array_align=False)
        as_items = make_aligned_seqs(data=data, array_align=False)

        serial = as_series.with_masked_annotations(["cpgsite"])
        itemwise = as_items.with_masked_annotations(["cpgsite"])
        self.assertEqual(str(serial), str(itemwise))

        # Annotations should be correctly masked,
        # whether the sequence has been reverse complemented or not.
        # We use the plus/minus strand CDS containing sequences created above.
        plus = DNA.make_seq("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA",
                            name="plus")
        _ = plus.add_annotation(Feature, "CDS", "gene", [(2, 6), (10, 15),
                                                         (25, 35)])
        minus = plus.rc()
        self.assertEqual(
            str(plus.with_masked_annotations("CDS")),
            "AA????AAAA?????AAAAAAAAAA??????????AAA",
        )
        self.assertEqual(
            str(minus.with_masked_annotations("CDS")),
            "TTT??????????TTTTTTTTTT?????TTTT????TT",
        )
示例#14
0
    def test_picklability(self):
        """Pickle an alignment containing an annotated sequence"""
        # This depends on alignments, sequences, features, maps and spans
        # Doesn't test round trip result is correct, which should possibly
        # be done for maps/spans, but seqs/alignments are just simple
        # python classes without __getstate__ etc.
        import pickle as pickle

        seq1 = DNA.make_seq("aagaagaagaccccca")
        seq2 = DNA.make_seq("aagaagaagaccccct")
        seq2.add_feature("exon", "fred", [(10, 15)])
        aln = make_aligned_seqs(data={"a": seq1, "b": seq2})
        # TODO the ability to pickle/unpickle depends on the protocol
        # in Py3 for reasons that are not clear. This needs to be looked
        # more closely
        dmp = pickle.dumps(aln, protocol=1)
        aln2 = pickle.loads(dmp)
示例#15
0
 def test_translate_frames(self):
     """returns translated sequences"""
     seq = DNA.make_seq("ATGCTGACATAAA", name="fake1")
     tr = translate_frames(seq)
     self.assertEqual(tr, ["MLT*", "C*HK", "ADI"])
     # with the bacterial nuclear and plant plastid code
     tr = translate_frames(seq, gc="Euplotid Nuclear")
     self.assertEqual(tr, ["MLT*", "CCHK", "ADI"])
示例#16
0
 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.SGC)
     seq = DNA.make_seq("ATGCTAACATAAA")
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.get_stop_indices(seq, start=frame)
         self.assertEqual(got, expect)
示例#17
0
 def test_roundtrip_variable(self):
     """should recover the Variable feature type"""
     seq = DNA.make_seq("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA", name="plus")
     xx_y = [[[2, 6], 2.4], [[10, 15], 5.1], [[25, 35], 1.3]]
     y_valued = seq.add_annotation(Variable, "SNP", "freq", xx_y)
     json = seq.to_json()
     new = deserialise_object(json)
     got = list(new.get_annotations_matching("SNP"))[0]
     # annoyingly, comes back as list of lists
     self.assertEqual(got.xxy_list, [[list(xx), y] for xx, y in y_valued.xxy_list])
示例#18
0
def makeSampleSequence(with_gaps=False):
    raw_seq = "AACCCAAAATTTTTTGGGGGGGGGGCCCC"
    cds = (15, 25)
    utr = (12, 15)
    if with_gaps:
        raw_seq = raw_seq[:5] + "-----" + raw_seq[10:-2] + "--"
    seq = DNA.make_seq(raw_seq)
    seq.add_annotation(Feature, "CDS", "CDS", [cds])
    seq.add_annotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq
示例#19
0
 def test_convert_input(self):
     """converts data for dotplotting"""
     m, seq = DNA.make_seq("ACGGT--A").parse_out_gaps()
     aligned_seq = Aligned(m, seq)
     mapped_gap, new_seq = _convert_input(aligned_seq, None)
     self.assertIs(new_seq.moltype, DNA)
     self.assertIs(mapped_gap, m)
     self.assertIs(new_seq, seq)
     mapped_gap, new_seq = _convert_input("ACGGT--A", DNA)
     self.assertEqual(str(mapped_gap), str(m))
     self.assertEqual(str(new_seq), str(seq))
示例#20
0
    def test_get_align_coords(self):
        """correctly returns the alignment coordinates"""
        # 01234  5
        # ACGGT--A
        #   012345
        # --GGTTTA
        m1, seq1 = DNA.make_seq("ACGGT--A").parse_out_gaps()
        m2, seq2 = DNA.make_seq("--GGTTTA").parse_out_gaps()
        x, y = get_align_coords(m1, m2)
        expect = [2, 4, None, 5, 5], [0, 2, None, 5, 5]
        self.assertEqual((x, y), expect)

        # we have no gaps, so coords will be None
        m1, s1 = seq1.parse_out_gaps()
        m2, s2 = seq2.parse_out_gaps()
        self.assertEqual(get_align_coords(m1, m2), None)

        # unless we indicate the seqs came from an Alignment
        m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
        m2, seq2 = DNA.make_seq("GGGGTTTA").parse_out_gaps()
        x, y = get_align_coords(m1, m2, aligned=True)
        self.assertEqual((x, y), ([0, len(seq1)], [0, len(seq1)]))

        # raises an exception if the Aligned seqs are different lengths
        m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
        m2, seq2 = DNA.make_seq("GGGGTT").parse_out_gaps()
        with self.assertRaises(AssertionError):
            get_align_coords(m1, m2, aligned=True)
示例#21
0
    def test_feature_from_alignment(self):
        """ seq features obtained from the alignment"""

        # Sequence features can be accessed via a containing Alignment:

        aln = make_aligned_seqs(data=[["x", "-AAAAAAAAA"], ["y",
                                                            "TTTT--TTTT"]],
                                array_align=False)
        self.assertEqual(str(aln), ">x\n-AAAAAAAAA\n>y\nTTTT--TTTT\n")
        exon = aln.get_seq("x").add_annotation(Feature, "exon", "fred",
                                               [(3, 8)])
        aln_exons = aln.get_annotations_from_seq("x", "exon")
        aln_exons = aln.get_annotations_from_any_seq("exon")
        # But these will be returned as **alignment**
        # features with locations in alignment coordinates.

        self.assertEqual(str(exon), 'exon "fred" at [3:8]/9')
        self.assertEqual(str(aln_exons[0]), 'exon "fred" at [4:9]/10')
        self.assertEqual(str(aln_exons[0].get_slice()),
                         ">x\nAAAAA\n>y\n--TTT\n")
        aln_exons[0].attach()
        self.assertEqual(len(aln.annotations), 1)

        # Similarly alignment features can be projected onto the aligned sequences,
        # where they may end up falling across gaps:

        exons = aln.get_projected_annotations("y", "exon")
        self.assertEqual(str(exons), '[exon "fred" at [-2-, 4:7]/8]')
        self.assertEqual(str(aln.get_seq("y")[exons[0].map.without_gaps()]),
                         "TTT")

        # We copy the annotations from another sequence,

        aln = make_aligned_seqs(data=[["x", "-AAAAAAAAA"], ["y",
                                                            "TTTT--CCCC"]],
                                array_align=False)
        self.s = DNA.make_seq("AAAAAAAAA", name="x")
        exon = self.s.add_annotation(Feature, "exon", "fred", [(3, 8)])
        exon = aln.get_seq("x").copy_annotations(self.s)
        aln_exons = list(aln.get_annotations_from_seq("x", "exon"))
        self.assertEqual(str(aln_exons), '[exon "fred" at [4:9]/10]')

        # even if the name is different.

        exon = aln.get_seq("y").copy_annotations(self.s)
        aln_exons = list(aln.get_annotations_from_seq("y", "exon"))
        self.assertEqual(str(aln_exons), '[exon "fred" at [3:4, 6:10]/10]')
        self.assertEqual(str(aln[aln_exons]), ">x\nAAAAA\n>y\nTCCCC\n")

        # default for get_annotations_from_any_seq is return all features
        got = aln.get_annotations_from_any_seq()
        self.assertEqual(len(got), 2)
示例#22
0
    def test_seq_shorter(self):
        """lost spans on shorter sequences"""

        # If the sequence is shorter, again you get a lost span.

        aln = make_aligned_seqs(data=[["x", "-AAAAAAAAA"], ["y",
                                                            "TTTT--TTTT"]],
                                array_align=False)
        diff_len_seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCCCCCCCCCC", "x")
        nonmatch = diff_len_seq.add_feature("repeat", "A", [(12, 14)])
        aln.get_seq("y").copy_annotations(diff_len_seq)
        copied = list(aln.get_annotations_from_seq("y", "repeat"))
        self.assertEqual(str(copied), '[repeat "A" at [10:10, -6-]/10]')
示例#23
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.make_seq("AAAATGCTTA" * r)
    seq1 = DNA.make_seq("AATTTTGCTG" * r)

    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of
        # the work.
        aln = classic_align_pairwise(seq1,
                                     seq2,
                                     S,
                                     10,
                                     2,
                                     local=False,
                                     return_alignment=False,
                                     **kw)
    except ArithmeticError:
        return "*"
    else:
        t = time.clock() - t0
        return int((len(seq1) * len(seq2)) / t / 1000)
示例#24
0
    def test_lost_spans(self):
        """features no longer included in an alignment represented by lost spans"""

        # If the feature lies outside the sequence being copied to, you get a
        # lost span

        aln = make_aligned_seqs(data=[["x", "-AAAA"], ["y", "TTTTT"]],
                                array_align=False)
        seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x")
        exon = seq.add_feature("exon", "A", [(5, 8)])
        aln.get_seq("x").copy_annotations(seq)
        copied = list(aln.get_annotations_from_seq("x", "exon"))
        self.assertEqual(str(copied), '[exon "A" at [5:5, -4-]/5]')
        self.assertEqual(str(copied[0].get_slice()), ">x\n----\n>y\n----\n")
示例#25
0
    def test_seq_different_name_with_same_length(self):
        """copying features between sequences"""

        # You can copy to a sequence with a different name,
        # in a different alignment if the feature lies within the length

        aln = make_aligned_seqs(data=[["x", "-AAAAAAAAA"], ["y",
                                                            "TTTT--TTTT"]],
                                array_align=False)
        seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x")
        match_exon = seq.add_feature("exon", "A", [(5, 8)])
        aln.get_seq("y").copy_annotations(seq)
        copied = list(aln.get_annotations_from_seq("y", "exon"))
        self.assertEqual(str(copied), '[exon "A" at [7:10]/10]')
示例#26
0
    def test_score_seq_obj(self):
        """produce correct score from seq"""
        from cogent3 import DNA

        data = [
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ]
        pssm = PSSM(data, "ACTG")
        seq = DNA.make_seq("".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3]))
        scores = pssm.score_seq(seq)
        assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)
示例#27
0
def CigarParser(seqs,
                cigars,
                sliced=False,
                ref_seqname=None,
                start=None,
                end=None,
                moltype=DNA):
    """return an alignment from raw sequences and cigar strings
    if sliced, will return an alignment correspondent to ref sequence start to end

    Parameters
    ----------
        seqs - raw sequences as {seqname: seq}
        cigars - corresponding cigar text as {seqname: cigar_text}
        cigars and seqs should have the same seqnames
        moltype - optional default to DNA

    """
    data = {}
    if not sliced:
        for seqname in list(seqs.keys()):
            aligned_seq = aligned_from_cigar(cigars[seqname],
                                             seqs[seqname],
                                             moltype=moltype)
            data[seqname] = aligned_seq
    else:
        ref_aln_seq = aligned_from_cigar(cigars[ref_seqname],
                                         seqs[ref_seqname],
                                         moltype=moltype)
        m, aln_loc = slice_cigar(cigars[ref_seqname],
                                 start,
                                 end,
                                 by_align=False)
        data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
        for seqname in [
                seqname for seqname in list(seqs.keys())
                if seqname != ref_seqname
        ]:
            m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
            if seq_loc:
                seq = seqs[seqname]
                if isinstance(seq, str):
                    seq = moltype.make_seq(seq)
                data[seqname] = seq[seq_loc[0]:seq_loc[1]].gapped_by_map(m)
            else:
                data[seqname] = DNA.make_seq("-" * (aln_loc[1] - aln_loc[0]))
    aln = make_aligned_seqs(data)
    return aln
def get_var_data(aln, variant, ref_name, aln_flank, min_length, chroms):
    if variant == ['']:
        return None

    [
        var_name, var_chrom, exon_strand, var_effects, var_alleles,
        flank_5_seq, flank_3_seq, var_coord
    ] = variant

    if not is_correct_chrom(chroms, var_chrom):
        return None

    var_alleles = set(var_alleles.split('/'))

    var_start = int(var_coord.split(',')[0])
    #get  alignment
    syn_aln = get_syntenic_alignment(aln, var_name, var_start, ref_name,
                                     aln_flank)
    syn_aln = LoadSeqs(data=copy.deepcopy(syn_aln.todict()),
                       moltype=DNA,
                       array_align=False)
    #check alignments and only keep the alignment meet requirements
    checked_aln = align_checker(syn_aln, ref_name, aln_flank, min_length)
    if not checked_aln:
        return None

    start_base = get_start_state(checked_aln, ref_name, aln_flank)

    end_base = get_end_state(start_base, var_alleles)

    if not end_base:
        return None

    if end_base is '':
        return None

    nbr_seq = DNA.make_seq(flank_5_seq + flank_3_seq)
    gc_content = get_gc(nbr_seq)

    allele_freqs = pep_alleles = gene_loc = gene_id = 'None'

    response = '-1'

    return (var_name, var_chrom, exon_strand, var_effects, allele_freqs,
            str(var_alleles), str(start_base), str(end_base), str(flank_5_seq),
            str(flank_3_seq), str(gc_content), pep_alleles, gene_loc, gene_id,
            response)
示例#29
0
def _reverse_complement(table):
    '''returns a table with sequences reverse complemented'''
    pos_indices = [i for i, c in enumerate(
        table.header) if c.startswith('pos')]

    rows = table.tolist()
    for row in rows:
        # we use the cogent3 DnaSeq object to do reverse complementing
        seq = DNA.make_seq(''.join(row[i] for i in pos_indices))
        seq = list(seq.rc())
        for i, index in enumerate(pos_indices):
            row[index] = seq[i]
    if rows:
        new = make_table(header=table.header, rows=rows)
    else:
        new = None
    return new
示例#30
0
    def test_roundtrip_json(self):
        """features can roundtrip from json"""
        from cogent3.util.deserialise import deserialise_seq

        seq = DNA.make_seq("AAAAATATTATTGGGT")
        seq.add_annotation(Feature, "exon", "myname", [(0, 5)])
        got = seq.to_json()
        new = deserialise_object(got)
        feat = new.get_annotations_matching("exon")[0]
        self.assertEqual(str(feat.get_slice()), "AAAAA")

        # now with a list span
        seq = seq[3:]
        feat = seq.get_annotations_matching("exon")[0]
        got = seq.to_json()
        new = deserialise_object(got)
        feat = new.get_annotations_matching("exon")[0]
        self.assertEqual(str(feat.get_slice(complete=False)), "AA")