Пример #1
0
def mut(x, pmut, pgap):
    res = x
    if random.random() < pmut:
        res = random.choice(translate.AAs())
    if random.random() < pgap:
        res = res + '-'
    return res
Пример #2
0
def sequenceCompositionNorm(seq):
    c = collections.Counter(seq)
    v = [0] * 20
    for (i, aa) in enumerate(translate.AAs()):
        v[i] = c[aa]
    vn = normVector(np.array(v))
    return vn
Пример #3
0
def mostFrequent(thelist, nogap=False, gap='-'):
    counts = sorted([(thelist.count(aa), aa)
                     for aa in [x for x in translate.AAs()] + [gap]],
                    reverse=True)
    res = counts[0][1]
    if nogap and res == gap and len(counts) > 1 and counts[1][0] > 0:
        res = counts[1][1]
    return res
Пример #4
0
def entropy(thelist, gap='-', base=20.0):
    counts = [(thelist.count(aa), aa) for aa in translate.AAs()]
    ungapped_len = len(thelist) - thelist.count(gap)
    res = 0.0
    if ungapped_len > 0:
        props = [ct / float(ungapped_len) for (ct, aa) in counts if ct > 0]
        res = sum([-p * math.log(p, base) for p in props])
    return res
Пример #5
0
 def test_remove_gaps(self):
     s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
     others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
     seqs = [s1] + others
     als = muscle.alignSequences(seqs)
     res = len(als) == len(seqs)
     for i in range(len(als)):
         self.assertTrue(
             als[i].replace("-", '') == seqs[i].replace("-", ''))
Пример #6
0
def mutProtein(prot, mut_rate=0.5):
    aas = translate.AAs()
    mut_prot = ""
    for i in range(len(prot)):
        if random.random() < mut_rate:
            mut_prot += random.choice(aas)
        else:
            mut_prot += prot[i]
    return mut_prot
Пример #7
0
def test001():
    s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
    others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
    seqs = [s1] + others
    als = muscle.alignSequences(seqs)
    res = len(als) == len(seqs)
    for i in range(len(als)):
        res = res and (als[i].replace("-", '') == seqs[i].replace("-", ''))
    return True
Пример #8
0
def test002():
    s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
    others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
    seqs = [s1] + others
    res = False
    try:
        als = muscle.alignSequences(
            seqs, exepath=os.path.expanduser("~/develop/muscle3.8.13/muscle"))
    except muscle.MuscleError, me:
        res = True
Пример #9
0
def makeAlignment(nrow, ncol, average_gap, average_hamming, gap='-'):
    chars = [c for c in translate.AAs()]
    gaps = [gap]
    alts = [chars, gaps]
    alignment = []
    for x in range(nrow):
        which_kind = sp.random.binomial(1, average_gap, ncol)
        st = ''.join([sp.random.choice(alts[xi]) for xi in which_kind])
        alignment.append(st)
    return alignment
Пример #10
0
	def test_run(self):
		"""Composition"""
		comp = protprop.Composition()
		fname = "tmp_composition.txt"
		inf = open(fname, 'w')
		inf.write("aa\tproportion\n")
		for aa in translate.AAs():
			inf.write("{}\t{}\n".format(aa, 1.0/20))
		inf.close()
		inf = open(fname, 'r')
		comp.read(inf)
		self.assertAlmostEqual(comp['A'], 1.0/20)
		inf.close()
		os.remove(fname)
Пример #11
0
 def getComposition(self, sequence, normalize=False, aas=translate.AAs()):
     #aas = translate.AAs()
     if aas is None:
         aas = ''
     #seq_aas = aas + ''.join(sorted(list(set([aa for aa in sequence if not aa in aas]))))
     aa_counts = [(aa, sequence.count(aa)) for aa in aas]
     res = aa_counts
     if normalize:
         tot = float(sum([c for (aa, c) in aa_counts]))
         if tot > 0.0:
             res = [(aa, c / tot) for (aa, c) in aa_counts]
         else:
             res = [(aa, c) for (aa, c) in aa_counts]
     return res
Пример #12
0
 def test_gapped_index(self):
     s1 = ''.join(stats.sample_wr(translate.AAs(), 50))
     # No gaps: pgap = 0.0
     others = [''.join([mut(x, 0.2, 0.0) for x in s1]) for i in range(9)]
     seqs = [s1] + others
     res = False
     try:
         als = muscle.alignSequences(seqs)
         #print als
         self.assertTrue(len(als) == len(seqs))
         for (i, s) in enumerate(seqs):
             self.assertTrue(s == als[i].replace("-", ''))
     except muscle.MuscleError as me:
         self.assertTrue(False)
Пример #13
0
def sequenceEntropy(seq, base=20, aas=translate.AAs()):
	counts = dict([(aa,0) for aa in aas])
	n = 0.0 # length of the counted amino acids
	for aa in seq:
		try:
			counts[aa] += 1
			n += 1
		except KeyError:
			pass
	base_div = sp.log(base) # log_b(f) = log_e(f)/log_e(b)
	entropy = -sum([(aac/n)*sp.log(aac/n)/base_div for (aa,aac) in counts.items() if aac>0])
	res = EntropyResult()
	res.entropy = entropy
	res.counts = counts
	return res
Пример #14
0
 def __init__(self, seq, distributions, alphabet=translate.AAs()):
     self._seq = seq
     self._alphabet = alphabet
     p = dict([(aa, 0.0) for aa in alphabet])
     for (i, aa) in enumerate(seq):
         p[aa] += 1.0
     n = len(seq)
     self._proportion_vec = sp.r_[[float(p[a]) / n for a in alphabet]]
     #
     comp = []
     for a in alphabet:
         d = distributions[a]
         z = (p[a] - d.mean) / d.sd
         comp.append(z)
     self._composition_vec = sp.r_[comp]
     self.normalize()
Пример #15
0
 def __init__(self, seq, weights=None, alphabet=translate.AAs()):
     self._seq = seq
     self._len = len(seq)
     if not weights is None:
         assert len(seq) == len(weights)
     else:
         weights = [1.0] * len(seq)
     d = dict([(aa, 0) for aa in alphabet])
     for (i, aa) in enumerate(seq):
         try:
             d[aa] += weights[i]
         except KeyError:
             d[aa] = weights[i]
     self._alphabet = alphabet
     self._composition_vec = sp.r_[[float(d[a]) for a in alphabet]]
     self.normalize()
Пример #16
0
    if options.optimize:
        info_outs.write("# Optimizing sequences...\n")
        gc = translate.geneticCode(rna=False)
        codons = {}
        opt_codon_dict = dict([(gc[c], c) for c in opt_codons])
        opt_codon_dict['W'] = 'TGG'
        opt_codon_dict['M'] = 'ATG'

        opt_headers = []
        opt_seqs = []
        # optimize the codon sequences
        for (id, seq) in seqs:
            orig_codons = [c for c in translate.codons(seq)]
            prot_seq = translate.translate(seq)
            if not prot_seq is None:
                for aa in translate.AAs():
                    codons[aa] = [
                        c for c in translate.getCodonsForAA(aa, rna=False)
                        if relad_dict[c] >= options.min_rel_adapt
                    ]
                opt_seq = ''
                for (aai, aa) in enumerate(prot_seq):
                    #opt_seq += opt_codon_dict[aa] #random.choice(codons[aa])
                    codons_to_choose_from = codons[aa]
                    # If avoiding codons and we have a choice, eliminate the avoided codon.
                    if options.avoid_sequence and len(
                            codons_to_choose_from) > 1:
                        try:
                            codons_to_choose_from.remove(orig_codons[aai])
                        except ValueError:  # codon to be avoided not among codon choices anyway
                            pass
Пример #17
0
 def getComposition(seq, aas=translate.AAs(), normalize=False):
     comp = Composition(aas)
     comp.initFromSequence(seq, normalize)
     return comp
Пример #18
0
 def __init__(self, aas=translate.AAs()):
     self._aas = aas
     self._comp_dict = dict([(aa, 0) for aa in self._aas
                             ])  # list of (aa, frequency) tuples
Пример #19
0
        # By default, write to stdout
        data_outs.addStream(sys.stdout)

    # Write out parameters
    data_outs.write("# Run started {}\n".format(util.timestamp()))
    data_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
    data_outs.write("# Parameters:\n")
    optdict = vars(options)
    for (k, v) in optdict.items():
        data_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

    pp = ProteinProperties()
    aas = None
    if not options.aas is None:
        if options.aas.lower() == 'all':
            aas = translate.AAs()
        else:
            aas = [aa for aa in options.aas]

    # Single sequence?
    if not options.sequence is None:
        headers = ['Input']
        seqs = [options.sequence]
    else:
        if not options.in_fname is None:
            fname = os.path.expanduser(options.in_fname)
            #print(fname)
            (headers, seqs) = biofile.readFASTA(open(fname, 'r'))
        else:
            info_outs.write("# No sequence or file provided; exiting\n")
            sys.exit()
Пример #20
0
 def __init__(self):
     self._aas = translate.AAs()
Пример #21
0
 def fromSequence(self, seq):
     aas = translate.AAs()
     composition = self._prop.getComposition(seq, normalize=True, aas=aas)
     return SequenceCompositionComparator(composition)
Пример #22
0
def randomProtein(L):
    aas = translate.AAs()
    return ''.join(stats.sample_wr(aas, L))
Пример #23
0
                        action="store_true",
                        help="compute amino-acid frequencies?")
    parser.add_argument("--gc",
                        dest="do_gc",
                        default=False,
                        action="store_true",
                        help="compute GC frequencies?")
    parser.add_argument("--mw",
                        dest="do_mw",
                        default=False,
                        action="store_true",
                        help="compute molecular weights?")
    parser.add_argument("--target-aas",
                        dest="target_aas",
                        type=str,
                        default=translate.AAs(),
                        help="amino acids (e.g. ACDEF) for frequency analysis")
    parser.add_argument("-p",
                        "--pseudo",
                        dest="pseudocount",
                        type=float,
                        default=0.0,
                        help="pseudocount to add to all frequencies")
    parser.add_argument("-o",
                        "--out",
                        dest="out_fname",
                        type=str,
                        default=None,
                        help="output filename")
    options = parser.parse_args()