def main(): data = readFASTA(sys.argv[1]) dna_len = len(data[0][1]) profile = [{'A' : 0, 'C' : 0, 'G' : 0, 'T' : 0}]*dna_len for i in range(dna_len): count = {'A' : 0, 'C' : 0, 'G' : 0, 'T' : 0} for j in range(0, len(data)): dna = data[j][1] count[dna[i]] += 1 profile[i] = count dna = str() for pair in profile: dna += getMaxProb(pair) print dna for nuc in 'ACGT': line = nuc + ':' for pair in profile: line += ' ' + str(pair[nuc]) print line
def main(): data = readFASTA(sys.argv[1]) dna_len = len(data[0][1]) profile = [{'A': 0, 'C': 0, 'G': 0, 'T': 0}] * dna_len for i in range(dna_len): count = {'A': 0, 'C': 0, 'G': 0, 'T': 0} for j in range(0, len(data)): dna = data[j][1] count[dna[i]] += 1 profile[i] = count dna = str() for pair in profile: dna += getMaxProb(pair) print dna for nuc in 'ACGT': line = nuc + ':' for pair in profile: line += ' ' + str(pair[nuc]) print line
def main(): data = readFASTA(sys.argv[1]) dna = list() for fasta in data: dna.append(fasta[1]) longest = long_substr(dna) print longest
def main(): data = readFASTA(sys.argv[1]) dna = data[0].dna for i in range(1, len(data)): dna = dna.replace(data[i].dna, '') prot = DNA_to_PROT(dna) print prot
def main(): data = readFASTA(sys.argv[1]) dna = data.dna n = len(dna) revc = revcomp_DNA(dna) locations = list() for l in range(4, 13): for i in range(n - l + 1): if dna[i:i + l] == revc[i:i + l]: locations.append(str(i + 1) + ' ' + str(l)) print '\n'.join(locations)
def main(): data = readFASTA(sys.argv[1]) s, q = [d.dna for d in data] transitions = transversions = 0 for i in range(len(q)): if (s[i] != q[i]): if (is_transition(s[i], q[i])): transitions += 1 else: transversions += 1 print 1. * transitions / transversions
def main(): data = readFASTA(sys.argv[1]) s, q = [d.dna for d in data] i = 0 res = list() for c in q: idx = s.find(c, i) res.append(idx + 1) i = idx + 1 print ' '.join(list(map(str, res)))
def main(): data = readFASTA(sys.argv[1]) s, q = [d.dna for d in data] transitions = transversions = 0 for i in range(len(q)): if(s[i] != q[i]): if(is_transition(s[i], q[i])): transitions += 1 else: transversions += 1 print 1.*transitions/transversions
def main(): data = readFASTA(sys.argv[1]) matrix = zeros((len(data), len(data))) for i in range(len(data)): for j in range(i + 1, len(data)): matrix[i][j] = p_distance(data[i][1], data[j][1]) matrix[j][i] = matrix[i][ j] # this matrix is mirrored by main diag, so we can compute only part of matrix for i in range(len(data)): line = ' '.join(map(str, matrix[i])) print line
def main(): data = readFASTA(sys.argv[1]) dna = data.dna fail = [0]*len(dna) fail[0] = 0 for i in range(1, len(dna)): j = fail[i-1] while(j > 0 and dna[i] != dna[j]): j = fail[j - 1] if(dna[i] == dna[j]): j += 1 fail[i] = j with open('output/kmp.txt', 'w') as output: output.write(' '.join(list(map(str, fail))))
help="Input FASTA") parser.add_argument('-o', '--output', required=False, type=argparse.FileType('w'), default=stdout, help="Output") parser.add_argument('-a', '--alpha', required=False, type=float, default=float('inf'), help="Gamma distribution alpha parameter") args = parser.parse_args() return args.input, args.output, args.alpha # main code execution infile, outfile, alpha = parseArgs() seqs = readFASTA(infile) infile.close() keys = list(seqs.keys()) L = None for k in keys: if L is None: L = len(seqs[k]) assert L == len(seqs[k]), "All sequences must be of equal length" for i in range(len(keys) - 1): for j in range(i + 1, len(keys)): outfile.write('%f\n' % jc69(seqs[keys[i]], seqs[keys[j]], alpha))
''' Convert names of FASTA file to random safe names. Sequences are output to STDOUT. Dictionary of name mappings is output to STDERR. ''' from common import ran_str, readFASTA K = 20 # safenames will be length 20 ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' from sys import stdin, stderr import argparse parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-i', '--input', required=False, type=argparse.FileType('r'), default=stdin, help="Input FASTA") args = parser.parse_args() seqs = readFASTA(args.input) map = {} for key in seqs: safe = ran_str(ALPHABET, K) while safe in map: safe = ran_str(ALPHABET, K) map[safe] = key print('>%s\n%s\n' % (safe, seqs[key])) print(str(map), file=stderr)