def main(): check_input(sys.argv[0]) seqs = [] for _, seq in read_fasta(sys.argv[1]): seqs.append(seq) graph = create_graph(seqs) print(find_shortest_superstring(graph))
def main(): """Calculate total number of different RNA strings for the input.""" check_input(sys.argv[0]) # Amino acids and # of codons that code for them NUM_CODONS = { "I": 3, "L": 6, "V": 4, "F": 2, "M": 1, "C": 2, "A": 4, "G": 4, "P": 4, "T": 4, "S": 6, "Y": 2, "W": 1, "Q": 2, "N": 2, "H": 2, "E": 2, "D": 2, "K": 2, "R": 6, "Stop": 3 } with open(sys.argv[1]) as infile: aa_seq = infile.readline().strip() print(possible_mrnas(aa_seq, NUM_CODONS))
def main(): """Calculate the size of the next population.""" check_input(sys.argv[0]) expected_offspring = {1: 1, 2: 1, 3: 1, 4: 0.75, 5: 0.5, 6: 0} with open(sys.argv[1]) as infile: data = infile.readline().strip().split(" ") print(calculate_average(data, expected_offspring))
def main(): """Calculate hamming distance for input file.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: string_1 = infile.readline().strip() string_2 = infile.readline().strip() print(hamming_distance(string_1, string_2))
def main(): """Print enumerated kmers for input k and alphabet.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: alpha = infile.readline().strip().split(" ") k = int(infile.readline().strip()) lexi(alpha, k)
def main(): """Calculate and print probability.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: args = [int(i) for i in infile.readline().strip().split()] print(calculate_lia(args[0], args[1]))
def main(): check_input(sys.argv[0]) seq = "" with open(sys.argv[1]) as infile: for line in infile: seq += line.strip() print(rna(seq))
def main(): """Count total partial permutations for input.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: n = int(infile.readline().strip()) k = int(infile.readline().strip()) print(partial_permutations(n, k))
def main(): """Find longest common subsequence of input sequences.""" check_input(sys.argv[0]) seqs = [] for _, seq in read_fasta(sys.argv[1]): seqs.append(seq) print(find_common_subsequence(seqs[0], seqs[1]))
def main(): """Translate input string.""" check_input(sys.argv[0]) input_string = "" with open(sys.argv[1]) as infile: for line in infile: input_string += line.strip() print(translate(input_string))
def main(): """Print distance matrix for input sequences.""" check_input(sys.argv[0]) sequences = [] for _, seq in read_fasta(sys.argv[1]): sequences.append(seq) for row in create_matrix(sequences): print(" ".join([str(i) for i in row]))
def main(): """Calculate probabilities for input.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: seq = infile.readline().strip() prob = infile.readline().strip().split(" ") print(" ".join([str(i) for i in find_log_probabilites(seq, prob)]))
def main(): """Calculate mass of input protein sequence.""" check_input(sys.argv[0]) input_protein = "" with open(sys.argv[1]) as infile: for line in infile: input_protein += line.strip() print(calculate_weight(input_protein, aa_mass))
def main(): """Print erroneous and correct reads.""" check_input(sys.argv[0]) reads = [] for _, seq in read_fasta(sys.argv[1]): reads.append(seq) corrected_reads = find_erroneous_reads(reads) for pair in corrected_reads: print("->".join(pair))
def main(): """Print GC content for input sequence.""" check_input(sys.argv[0]) gc_content = {} for header, seq in read_fasta(sys.argv[1]): gc_content[get_gc(seq)] = header max_gc = max(gc_content) print(gc_content[max_gc]) print(max_gc)
def main(): check_input(sys.argv[0]) enum_list = [] with open(sys.argv[1]) as infile: num = int(infile.readline().strip()) print((2**num) * factorial(num)) get_signs(num, enum_list) for l in enum_list: permHelper(l, 0, len(l) - 1)
def main(): """Splice and translate input DNA string.""" check_input(sys.argv[0]) seqs = [] for _, seq in read_fasta(sys.argv[1]): transcribed = rna(seq) seqs.append(transcribed) mrna = splice(seqs) print(translate(mrna))
def main(): """Find common substring in input sequences.""" check_input(sys.argv[0]) contigs = [] for _, seq in read_fasta(sys.argv[1]): contigs.append(seq) # Check shortest sequence against other sequences checker = min(contigs, key=lambda x: len(x)) print(get_common(contigs, checker))
def main(): """Print consensus sequence and profile matrix for a given set of sequences.""" check_input(sys.argv[0]) infile = sys.argv[1] seqs = [] for _, seq in read_fasta(infile): seqs.append(seq) profile = generate_profile(seqs) print(get_consensus(profile)) print_profile(profile)
def main(): """Produce a tree.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: adj_list = [] nodes = int(infile.readline().strip()) for line in infile: adj_list.append([int(i) for i in line.split()]) print(complete_tree(nodes, adj_list))
def main(): """Find indices of a given subsequence in a given sequence.""" check_input(sys.argv[0]) i = 0 for _, seq in read_fasta(sys.argv[1]): if i == 0: sequence = seq elif i == 1: search = seq i += 1 print(find_subsequence(sequence, search))
def main(): """Load all sequences from the input fasta file and print graph.""" check_input(sys.argv[0]) sequences = {} for header, seq in read_fasta(sys.argv[1]): sequences[header] = seq for header_1 in sequences: for header_2 in sequences: if sequences[header_1] != sequences[ header_2]: # Only compare different seqs if suffix(sequences[header_1]) == prefix(sequences[header_2]): print(header_1, header_2, file=sys.stdout)
def main(): """Count transitions and transversions between two given DNA strings.""" check_input(sys.argv[0]) i = 0 for _, seq in read_fasta(sys.argv[1]): if i == 0: seq_1 = seq elif i == 1: seq_2 = seq i += 1 transitions, transversions = count_mutations(seq_1, seq_2) ratio = transitions / transversions print(ratio)
def main(): """Find all candidate protein strings.""" check_input(sys.argv[0]) for _, seq in read_fasta(sys.argv[1]): inseq = seq rna_reading_frames = find_all_rnas(inseq) orfs = [] for rna in rna_reading_frames: orfs.extend(to_mrna(rna)) unique_orfs = set(orfs) for orf in unique_orfs: peptide = translate(orf) if len(peptide) > 0: print(translate(orf), file=sys.stdout)
def main(): """Reverse complement input DNA sequence.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: sequence = infile.readline().strip() print(rev_comp(sequence))
def main(): """Read file and print the failure array.""" check_input(sys.argv[0]) for _, seq in read_fasta(sys.argv[1]): print(" ".join([str(x) for x in create_failure_array(seq)]))
def main(): """Print 4-mer composition array.""" check_input(sys.argv[0]) for _, seq in read_fasta(sys.argv[1]): count_composition(seq)
def main(): """Count perfect matchings in input sequence.""" check_input(sys.argv[0]) for _, seq in read_fasta(sys.argv[1]): print(str(perfect_matchings(seq)))
def main(): """Print number of permutations and all possible permutations for input number.""" check_input(sys.argv[0]) with open(sys.argv[1]) as infile: invalue = int(infile.readline().strip()) perm(invalue)
def main(): """Count maximum matchings for the input sequence.""" check_input(sys.argv[0]) for _, seq in read_fasta(sys.argv[1]): print(maximum_matchings(seq))