def test_complement_ambiguous_dna_values(self): for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = str( Seq.Seq(values, alphabet=IUPAC.ambiguous_dna).complement()) ambig_values = ambiguous_dna_values[ ambiguous_dna_complement[ambig_char]] self.assertEqual(set(compl_values), set(ambig_values))
def _calc_at_content_values(): at_u = {} at_a = {} unamb = "GCTASWN" for b, opts in ambiguous_dna_values.items(): d = at_u if b in unamb else at_a d[b] = float(opts.count("A") + opts.count("T")) / len(opts) d[b.lower()] = d[b] return at_u, at_a
def _calc_gc_content_values(): gc_u = {} gc_a = {} unamb = "GCTASWN" for b, opts in ambiguous_dna_values.items(): d = gc_u if b in unamb else gc_a d[b] = float(opts.count("C") + opts.count("G")) / len(opts) d[b.lower()] = d[b] return gc_u, gc_a
def load_iupac(): iupac_code = { tuple(letters): code for code, letters in ambiguous_dna_values.items() } return { permutation: (code, ) for letters, code in iupac_code.items() for permutation in permutations(letters) }
def test_complement_ambiguous_dna_values(self): for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = Seq.Seq(values).complement() ambig_values = ambiguous_dna_values[ambiguous_dna_complement[ambig_char]] self.assertCountEqual(compl_values, ambig_values)
def complement(sequence): # TODO - Add a complement function to Bio/Seq.py? # There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] def sorted_dict(d): """A sorted repr of a dictionary.""" return "{%s}" % ", ".join("%s: %s" % (repr(k), repr(v)) for k, v in sorted(d.items())) print("") print("DNA Ambiguity mapping: %s" % sorted_dict(ambiguous_dna_values)) print("DNA Complement mapping: %s" % sorted_dict(ambiguous_dna_complement)) for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = complement(values) print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char])) assert set(compl_values) == set( ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print("") print("RNA Ambiguity mapping: %s" % sorted_dict(ambiguous_rna_values)) print("RNA Complement mapping: %s" % sorted_dict(ambiguous_rna_complement)) for ambig_char, values in sorted(ambiguous_rna_values.items()): compl_values = complement(values).replace( "T", "U") # need to help as no alphabet print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char])) assert set(compl_values) == set(
def complement(sequence): # TODO - Add a complement function to Bio/Seq.py? # There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] def sorted_dict(d): """A sorted repr of a dictionary.""" return "{%s}" % ", ".join("%s: %s" % (repr(k), repr(v)) for k, v in sorted(d.items())) print("") print("DNA Ambiguity mapping: %s" % sorted_dict(ambiguous_dna_values)) print("DNA Complement mapping: %s" % sorted_dict(ambiguous_dna_complement)) for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = complement(values) print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char])) assert set(compl_values) == set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print("") print("RNA Ambiguity mapping: %s" % sorted_dict(ambiguous_rna_values)) print("RNA Complement mapping: %s" % sorted_dict(ambiguous_rna_complement)) for ambig_char, values in sorted(ambiguous_rna_values.items()): compl_values = complement(values).replace("T", "U") # need to help as no alphabet print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char])) assert set(compl_values) == set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]]) print("")
def test_complement_ambiguous_dna_values(self): for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = str(Seq.Seq(values, alphabet=IUPAC.ambiguous_dna).complement()) self.assertEqual(set(compl_values), set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]))
def _calc_base_values(base): d = {} for b, opts in ambiguous_dna_values.items(): d[b] = float(opts.count(base)) / len(opts) d[b.lower()] = float(opts.count(base)) / len(opts) return d
if debug: print('yield', segment_smpl_seqs, [(snp_['start'], snp_['stop'] - 1) for snp_ in snps_to_yield]) yield segment, segment_smpl_seqs, snps_to_yield reamining_smpl_seqs = [srfs[pos - offset:] for srfs in sample_ref_seqs] #remaining_seq = ref_seq[pos - offset:] if reamining_smpl_seqs[0]: reg = pos, region[2] if len(region) > 2 else None yield reg, reamining_smpl_seqs, [] IUPAC = { tuple(sorted(nucls)): iupac.encode('utf8') for iupac, nucls in ambiguous_dna_values.items() } def to_str(bytes_): if isinstance(bytes_, bytes): return bytes_.decode('utf-8') else: return bytes_ def to_bytes(str_): if not isinstance(str_, bytes): return str_.encode('utf-8') else: return str_
pos = max([snp_['stop'] for snp_ in snps_to_yield]) if len(region) < 3 or pos <= region[2]: # Are the SNPs inside the region to return? if debug: print('yield', segment_smpl_seqs, [(snp_['start'], snp_['stop'] - 1) for snp_ in snps_to_yield]) yield segment, segment_smpl_seqs, snps_to_yield reamining_smpl_seqs = [srfs[pos - offset:] for srfs in sample_ref_seqs] #remaining_seq = ref_seq[pos - offset:] if reamining_smpl_seqs[0]: reg = pos, region[2] if len(region) > 2 else None yield reg, reamining_smpl_seqs, [] IUPAC = {tuple(sorted(nucls)): iupac.encode('utf8') for iupac, nucls in ambiguous_dna_values.items()} def to_str(bytes_): if isinstance(bytes_, bytes): return bytes_.decode('utf-8') else: return bytes_ def to_bytes(str_): if not isinstance(str_, bytes): return str_.encode('utf-8') else: return str_