def initiate_case(batchSize): reference = decoder("data/KR233687.fasta") sequence = decoder("data/ERR1293055_first100.fastq") refKmer = kmer_maker(13, reference, True) seqKmer = kmer_maker(13, sequence, False) reference_trie = Trie() sternum = mapper(refKmer, seqKmer, reference_trie, batchSize) sternum.filter_matching() return sternum
def run(): reference = decoder(args.reference) sequence = decoder(args.sequence) if int(args.method) == 3: spine = BWT(reference) refKmer = reference else: refKmer = kmer_maker(int(args.ksize), reference, True) seqKmer = kmer_maker(int(args.ksize), sequence, False) if int(args.method) == 1: # mapping through Suffix Trie spine = Trie() elif int(args.method) == 2: # mapping through Suffix Array spine = SA(reference) sternum = mapper(refKmer, seqKmer, spine, int(args.batchSize)) sternum.filter_matching(int(args.minKcount), int(args.minPercentage)) reporter(sternum, args.outputPrefix + "_" + str(args.method) + "_")
def test_zclear(self): """ Test deleting kmers' files from disk """ fileName = "data/ERR1293055_first100.fastq" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, False) kmer.dump() kmer.clear() files = glob.glob(kmer.filePrefix + "_*" + kmer.fileExten) self.assertEqual(dict(), kmer.kmers) self.assertEqual(files, [])
def test_splice_fasta_overlapping(self): """ Test splicing fasta overlapping kmers """ fileName = "data/KR233687.fasta" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, True) self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"]) self.assertIn(["TAATGGTGGCATA", 579], kmer.kmers["KR233687.2.1"]) self.assertIn(["ATTCAGTTGATAG", 1], kmer.kmers["KR233687.2.2"]) self.assertIn(["ATGGTCATCAATT", 1123], kmer.kmers["KR233687.2.2"]) self.assertEqual(2, kmer.seqCount)
def test_dump(self): """ Test storing kmers to disk """ fileName = "data/ERR1293055_first100.fastq" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, False) kmer.dump() file = open("_39_ERR1293055.40.kmers") lines = file.read() self.assertIn("GTTGGGATCAATA", lines) file.close()
def test_splice_fastq_Nonoverlapping(self): """ Test splicing fastq nonoverlapping kmers """ fileName = "data/ERR1293055_first100.fastq" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, False) self.assertIn(["CTCTTCTACTTCT", 0], kmer.kmers["ERR1293055.1"]) self.assertIn(["GTTGGGATCAATA", 0], kmer.kmers["ERR1293055.40"]) self.assertIn(["ATTCAAATGTTCC", 286], kmer.kmers["ERR1293055.100"]) self.assertNotIn("TCCACTTCACTTT", kmer.kmers["ERR1293055.90"]) self.assertEqual(100, kmer.seqCount)
def test_splice_fasta_Nonoverlapping(self): """ Test splicing fasta nonoverlapping kmers """ fileName = "data/KR233687.fasta" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, False) self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"]) self.assertIn(["TCAATCCCGCACT", 13], kmer.kmers["KR233687.2.1"]) self.assertIn(["TTCGGATGGTCAT", 1118], kmer.kmers["KR233687.2.2"]) self.assertNotIn(["AGATCTAATGTCT", 1], kmer.kmers["KR233687.2.1"]) self.assertEqual(2, kmer.seqCount)
def lancer(): if var_demo.get() == 0: if var_mode.get() == "encode": if var_choix.get() == "fichier": valeur = entre_file.get() elif var_choix.get() == "texte": valeur = var_texte.get() encoder(var_choix.get(), valeur, var_save.get()) else: valeur = entre_file.get() if var_correc.get() == 0: decoder(valeur, var_save.get(), True) else: print("sans correection") decoder(valeur, var_save.get(), False) else: valeur = entre_texte_demo.get() temps, s_bytes, s_trit1, len_Trit, nb0, s_trit3, s_trit4, s_dna, dicoDebut, dicoReverse, dicoI3, ID, dicoP, dicoIX, dicoIX_dna, dicoFinal, s_dna_final = encoder( "texte", valeur, var_save.get()) var_afficher = afficherEncodage(valeur, temps, s_bytes, s_trit1, len_Trit, nb0, s_trit3, s_trit4, s_dna, dicoDebut, dicoReverse, dicoI3, ID, dicoP, dicoIX, dicoIX_dna, dicoFinal, s_dna_final) fenetre_demo = Tk() fenetre_demo.title( "Démonstration de l'encodage d'un texte vers de l'ADN") barre = Scrollbar(fenetre_demo) label_demo = Text(fenetre_demo, yscrollcommand=barre.set) barre.config(command=label_demo.yview) barre.pack(side="right", fill='y') label_demo.pack(expand=1, fill="both") label_demo.insert(0.0, var_afficher)
def test_load(self): """ Test storing kmers to disk """ fileName = "data/ERR1293055_first100.fastq" fastaFile = decoder(fileName) kmer = kmer_maker(13, fastaFile, False) kmer.dump() kmer.load("", 3) self.assertIn("ERR1293055.3", kmer.kmers) kmer.dump() kmer.load("", 4) self.assertIn("ERR1293055.7", kmer.kmers) kmer.load("", -1) self.assertIn("ERR1293055.100", kmer.kmers)
def test_fastq(self): """ Test if properly decoded """ fileName = "data/ERR1293055_first100.fastq" fastaFile = decoder(fileName) result = fastaFile.seq expected = "CTCTTCTACTTCTACACCTAATACATCCCCTCCCTCCCTCTCCCCCCTCCCCCTTCCT" self.assertIn(expected, str(result["ERR1293055.1"])) expected = "CACCCTTTCTTTATCCTTTTTATTTCTAATCTTTTTTTGTCGTTTCGTCTTTTTTTTT" self.assertIn(expected, str(result["ERR1293055.15"])) expected = "ATACAAAGCAAATCAAGGCAAAATAATTGGCCGAACAGATGTTAGCTTTAGTGGAGGA" self.assertIn(expected, str(result["ERR1293055.99"]))
def decodeall(self): """ Attempt to decode all documents and store them to mongo """ for entry in os.listdir(self.storedir): full_path = os.path.join(self.storedir, entry) if os.path.isfile(full_path) and entry[0] != '.': try: mydive = decoder(self.storedir, entry) if mydive.verifydata(): mydive.decode() # print(mydive.datadict) if "profilenumber" in mydive.datadict: hits = self.mongocollection.find({"profilenumber": mydive.datadict["profilenumber"]}) # print("Count at database ", hits.count()) if hits.count() == 0: self.mongocollection.insert_one(mydive.datadict) logger.debug('Saved %s to mongodb', mydive.datadict["profilenumber"]) except: logger.debug('Error decoding %s', entry)
def test_fasta(self): """ Test if properly decoded """ fileName = "data/KR233687.fasta" fastaFile = decoder(fileName) result = fastaFile.seq expected = "GAGATCTAATGTCTCAATCCCGCACTCGCGAGATACTAACAAAAACCACTGTGGACCA\ TATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAAT\ GAAATATCCAATCACAGCAGACAAGAGAATAATGGAAATGATTCCTGAAAGAAATGAACAAGGCCAGACGCTTT\ GGAGCAAGACAAATGATGCTGGATCAGACAGAGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAATAG" self.assertIn(expected, result["KR233687.2.1"]) expected = "GGAGTGGAATCTGCAGTGCTGAGGGGGTTCCTAATTCTGGGCAGGGAGGACAGAAGA\ TATGGACCAGCACTAAGCATCAATGAACTGAGCAATCTTGCGAAAGGGGAGAAAGCCAATGTGCTGATAGGGCAAGG\ AGACGTGGTGCTGGTAATGAAACGGAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATT\ CGGATGGTCATCAATT" self.assertIn(expected, result["KR233687.2.2"])
t0 = datetime.now() print('RUN_START: ' + str(t0)) N_tweets = 0 N_matches = 0 N_warnings = 0 N_errors = 0 N_duplicates = 0 N_large_duplicates = 0 for f in files: if f[-5:] == '.json': #try: if int(f[:8]) >= int(start): if int(f[:8]) <= int(end): d = decoder(keywords, dirIn, dirOut, hiMem, mode, lcase, emoji, logging, yesterday_dict, today_dict) #record = d.fixjson(dirIn, f, hiMem, emojiFile) record = d.fixjson(dirIn, f, hiMem) if logging: print('\n# FILE STATS #') print('FILE: ' + str(f)) n_tweets = d.n_tweets N_tweets += n_tweets print('TWEETS: ' + str(n_tweets)) n_matches = d.n_matches N_matches += n_matches print('MATCHES: ' + str(n_matches)) n_warnings = d.n_warnings N_warnings += n_warnings print('WARNINGS: ' + str(n_warnings))
joindre_salon_nok={} joindre_salon_nok["taille"]=125 joindre_salon_nok["seq"]=1 joindre_salon_nok["Type"]=12 ack={} ack["taille"]=125 ack["seq"]=1 ack["Type"]=63 print("---------- TEST TYPE 1 ----------\n") print(inscription) test1=encode.encoder(inscription) print("\n{0}\n".format(test1)) test1bis=decode.decoder(test1) print(test1bis) print("\n---------- TEST TYPE 2 ----------\n") print(film) test2=encode.encoder(film) print("\n{0}\n".format(test2)) test2bis=decode.decoder(test2) print(test2bis) print("\n---------- TEST TYPE 3 ----------\n") print(user) test3=encode.encoder(user) print("\n{0}\n".format(test3)) test3bis=decode.decoder(test3) print(test3bis) print("\n---------- TEST TYPE 4 ----------\n") print(maj_user)
print("\n---------- TEST TYPE 9 ----------\n") print(desinscription) test9=encode.encoder(desinscription) print("\n{0}\n".format(test9)) test9bis=decode2.decoder(test9) print(test9bis) print("\n---------- TEST TYPE 10 ----------\n") print(redir) test10=encode.encoder(redir) print("\n{0}\n".format(test10)) test10bis=decode2.decoder(test10) print(test10bis) print("\n---------- TEST TYPE 11 ----------\n") print(joindre_salon_ok) test11=encode.encoder(joindre_salon_ok) print("\n{0}\n".format(test11)) test11bis=decode.decoder(test11) print(test11bis) print("\n---------- TEST TYPE 12 ----------\n") print(joindre_salon_nok) test12=encode.encoder(joindre_salon_nok) print("\n{0}\n".format(test12)) test12bis=decode.decoder(test12) print(test12bis) print("\n---------- TEST TYPE 13 ----------\n") print(ack) test13=encode.encoder(ack) print("\n{0}\n".format(test13)) test13bis=decode.decoder(test13) print(test13bis)
from decode import decoder from encode import encoder assert decoder(encoder("alodicksiano")) == "alodicksiano" assert decoder(encoder("alodicksiano")) == "alodicksiano" assert decoder(encoder("alodicksiano")) == "alodicksiano" assert decoder(encoder("123456789 123456789 123456789 123456789") ) == "123456789 123456789 123456789 123456789"