def question_07(): sequences, qualities = dnaseq.read_fastq("ERR037900_1.first1000.fastq") stats = {} for i in range(len(sequences)): for j in range(len(sequences[i])): stats[j] = stats.get(j, 0) + qualities[i][j] r = min(stats.iterkeys(), key=(lambda x: stats[x])) print "question_07: %i" % r
#!/usr/bin/python import dnaseq import bm_preproc import kmer_index human_chromosome = dnaseq.read_genome("chr1.GRCh38.excerpt.fasta") phix_reads, _ = dnaseq.read_fastq("ERR266411_1.for_asm.fastq") def question_01(): p = "GCTGATCGATCGTACG" d = dnaseq.approximate_match_distance(human_chromosome, p) print "question_01: %i" % d def question_02(): p = "GATTTACCAGATTGAG" d = dnaseq.approximate_match_distance(human_chromosome, p) print "question_02: %i" % d def question_03(): d, _ = dnaseq.overlap_graph(phix_reads, 30) print "question_03: %i" % len(d) def question_04(): _, n = dnaseq.overlap_graph(phix_reads, 30) print "question_04: %i" % n if __name__ == '__main__': question_01() question_02()
#!/usr/bin/python import dnaseq import bm_preproc import kmer_index mystery_virus_reads, _ = dnaseq.read_fastq("ads1_week4_reads.fq") mystery_virus_genom = dnaseq.assemble(mystery_virus_reads, 30) def question_01(): ss = ["CCT", "CTT", "TGC", "TGG", "GAT", "ATT"] s, _ = dnaseq.scs(ss) print "question_01: %i" % len(s) def question_02(): ss = ["CCT", "CTT", "TGC", "TGG", "GAT", "ATT"] _, options = dnaseq.scs(ss) print "question_02: %i" % len(options) def question_03(): print "question_03: %i" % mystery_virus_genom.count("A") def question_04(): print "question_04: %i" % mystery_virus_genom.count("T") def question_05(): print "question_04:" print "--------- GENOM BEGIN ---------" print mystery_virus_genom print "--------- GENOM END ---------"