Пример #1
0
def initiate_case(batchSize):
    reference = decoder("data/KR233687.fasta")
    sequence = decoder("data/ERR1293055_first100.fastq")
    refKmer = kmer_maker(13, reference, True)
    seqKmer = kmer_maker(13, sequence, False)
    reference_trie = Trie()
    sternum = mapper(refKmer, seqKmer, reference_trie, batchSize)
    sternum.filter_matching()
    return sternum
Пример #2
0
def run():
    reference = decoder(args.reference)
    sequence = decoder(args.sequence)
    if int(args.method) == 3:
        spine = BWT(reference)
        refKmer = reference
    else:
        refKmer = kmer_maker(int(args.ksize), reference, True)
    seqKmer = kmer_maker(int(args.ksize), sequence, False)
    if int(args.method) == 1:  # mapping through Suffix Trie
        spine = Trie()
    elif int(args.method) == 2:  # mapping through Suffix Array
        spine = SA(reference)
    sternum = mapper(refKmer, seqKmer, spine, int(args.batchSize))
    sternum.filter_matching(int(args.minKcount), int(args.minPercentage))
    reporter(sternum, args.outputPrefix + "_" + str(args.method) + "_")
Пример #3
0
 def test_zclear(self):
     """
     Test deleting kmers' files from disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     kmer.clear()
     files = glob.glob(kmer.filePrefix + "_*" + kmer.fileExten)
     self.assertEqual(dict(), kmer.kmers)
     self.assertEqual(files, [])
Пример #4
0
 def test_splice_fasta_overlapping(self):
     """
     Test splicing fasta overlapping kmers
     """
     fileName = "data/KR233687.fasta"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, True)
     self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TAATGGTGGCATA", 579], kmer.kmers["KR233687.2.1"])
     self.assertIn(["ATTCAGTTGATAG", 1], kmer.kmers["KR233687.2.2"])
     self.assertIn(["ATGGTCATCAATT", 1123], kmer.kmers["KR233687.2.2"])
     self.assertEqual(2, kmer.seqCount)
Пример #5
0
 def test_dump(self):
     """
     Test storing kmers to disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     file = open("_39_ERR1293055.40.kmers")
     lines = file.read()
     self.assertIn("GTTGGGATCAATA", lines)
     file.close()
Пример #6
0
 def test_splice_fastq_Nonoverlapping(self):
     """
     Test splicing fastq nonoverlapping kmers
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     self.assertIn(["CTCTTCTACTTCT", 0], kmer.kmers["ERR1293055.1"])
     self.assertIn(["GTTGGGATCAATA", 0], kmer.kmers["ERR1293055.40"])
     self.assertIn(["ATTCAAATGTTCC", 286], kmer.kmers["ERR1293055.100"])
     self.assertNotIn("TCCACTTCACTTT", kmer.kmers["ERR1293055.90"])
     self.assertEqual(100, kmer.seqCount)
Пример #7
0
 def test_splice_fasta_Nonoverlapping(self):
     """
     Test splicing fasta nonoverlapping kmers
     """
     fileName = "data/KR233687.fasta"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TCAATCCCGCACT", 13], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TTCGGATGGTCAT", 1118], kmer.kmers["KR233687.2.2"])
     self.assertNotIn(["AGATCTAATGTCT", 1], kmer.kmers["KR233687.2.1"])
     self.assertEqual(2, kmer.seqCount)
Пример #8
0
def lancer():
    if var_demo.get() == 0:
        if var_mode.get() == "encode":
            if var_choix.get() == "fichier":
                valeur = entre_file.get()
            elif var_choix.get() == "texte":
                valeur = var_texte.get()
            encoder(var_choix.get(), valeur, var_save.get())
        else:
            valeur = entre_file.get()
            if var_correc.get() == 0:
                decoder(valeur, var_save.get(), True)
            else:
                print("sans correection")
                decoder(valeur, var_save.get(), False)
    else:
        valeur = entre_texte_demo.get()
        temps, s_bytes, s_trit1, len_Trit, nb0, s_trit3, s_trit4, s_dna, dicoDebut, dicoReverse, dicoI3, ID, dicoP, dicoIX, dicoIX_dna, dicoFinal, s_dna_final = encoder(
            "texte", valeur, var_save.get())

        var_afficher = afficherEncodage(valeur, temps, s_bytes, s_trit1,
                                        len_Trit, nb0, s_trit3, s_trit4, s_dna,
                                        dicoDebut, dicoReverse, dicoI3, ID,
                                        dicoP, dicoIX, dicoIX_dna, dicoFinal,
                                        s_dna_final)
        fenetre_demo = Tk()
        fenetre_demo.title(
            "Démonstration de l'encodage d'un texte vers de l'ADN")

        barre = Scrollbar(fenetre_demo)
        label_demo = Text(fenetre_demo, yscrollcommand=barre.set)

        barre.config(command=label_demo.yview)
        barre.pack(side="right", fill='y')
        label_demo.pack(expand=1, fill="both")
        label_demo.insert(0.0, var_afficher)
Пример #9
0
 def test_load(self):
     """
     Test storing kmers to disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     kmer.load("", 3)
     self.assertIn("ERR1293055.3", kmer.kmers)
     kmer.dump()
     kmer.load("", 4)
     self.assertIn("ERR1293055.7", kmer.kmers)
     kmer.load("", -1)
     self.assertIn("ERR1293055.100", kmer.kmers)
Пример #10
0
    def test_fastq(self):
        """
        Test if properly decoded
        """
        fileName = "data/ERR1293055_first100.fastq"
        fastaFile = decoder(fileName)
        result = fastaFile.seq

        expected = "CTCTTCTACTTCTACACCTAATACATCCCCTCCCTCCCTCTCCCCCCTCCCCCTTCCT"
        self.assertIn(expected, str(result["ERR1293055.1"]))

        expected = "CACCCTTTCTTTATCCTTTTTATTTCTAATCTTTTTTTGTCGTTTCGTCTTTTTTTTT"
        self.assertIn(expected, str(result["ERR1293055.15"]))

        expected = "ATACAAAGCAAATCAAGGCAAAATAATTGGCCGAACAGATGTTAGCTTTAGTGGAGGA"
        self.assertIn(expected, str(result["ERR1293055.99"]))
Пример #11
0
 def decodeall(self):
     """
     Attempt to decode all documents and store them to mongo
     """
     for entry in os.listdir(self.storedir):
         full_path = os.path.join(self.storedir, entry)
         if os.path.isfile(full_path) and entry[0] != '.':
             try:
                 mydive = decoder(self.storedir, entry)
                 if mydive.verifydata():
                     mydive.decode()
                     # print(mydive.datadict)
                     if "profilenumber" in mydive.datadict:
                         hits = self.mongocollection.find({"profilenumber": mydive.datadict["profilenumber"]})
                         # print("Count at database ", hits.count())
                         if hits.count() == 0:
                             self.mongocollection.insert_one(mydive.datadict)
                             logger.debug('Saved %s to mongodb', mydive.datadict["profilenumber"])
             except:
                 logger.debug('Error decoding %s', entry)
Пример #12
0
    def test_fasta(self):
        """
        Test if properly decoded
        """
        fileName = "data/KR233687.fasta"
        fastaFile = decoder(fileName)
        result = fastaFile.seq

        expected = "GAGATCTAATGTCTCAATCCCGCACTCGCGAGATACTAACAAAAACCACTGTGGACCA\
TATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAAT\
GAAATATCCAATCACAGCAGACAAGAGAATAATGGAAATGATTCCTGAAAGAAATGAACAAGGCCAGACGCTTT\
GGAGCAAGACAAATGATGCTGGATCAGACAGAGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAATAG"

        self.assertIn(expected, result["KR233687.2.1"])

        expected = "GGAGTGGAATCTGCAGTGCTGAGGGGGTTCCTAATTCTGGGCAGGGAGGACAGAAGA\
TATGGACCAGCACTAAGCATCAATGAACTGAGCAATCTTGCGAAAGGGGAGAAAGCCAATGTGCTGATAGGGCAAGG\
AGACGTGGTGCTGGTAATGAAACGGAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATT\
CGGATGGTCATCAATT"

        self.assertIn(expected, result["KR233687.2.2"])
Пример #13
0
    t0 = datetime.now()
    print('RUN_START:  ' + str(t0))

N_tweets = 0
N_matches = 0
N_warnings = 0
N_errors = 0
N_duplicates = 0
N_large_duplicates = 0

for f in files:
    if f[-5:] == '.json':
        #try:
        if int(f[:8]) >= int(start):
            if int(f[:8]) <= int(end):
                d = decoder(keywords, dirIn, dirOut, hiMem, mode, lcase, emoji,
                            logging, yesterday_dict, today_dict)
                #record = d.fixjson(dirIn, f, hiMem, emojiFile)
                record = d.fixjson(dirIn, f, hiMem)

                if logging:
                    print('\n# FILE STATS #')
                    print('FILE:       ' + str(f))
                    n_tweets = d.n_tweets
                    N_tweets += n_tweets
                    print('TWEETS:     ' + str(n_tweets))
                    n_matches = d.n_matches
                    N_matches += n_matches
                    print('MATCHES:    ' + str(n_matches))
                    n_warnings = d.n_warnings
                    N_warnings += n_warnings
                    print('WARNINGS:   ' + str(n_warnings))
Пример #14
0
joindre_salon_nok={}
joindre_salon_nok["taille"]=125
joindre_salon_nok["seq"]=1
joindre_salon_nok["Type"]=12

ack={}
ack["taille"]=125
ack["seq"]=1
ack["Type"]=63

print("---------- TEST TYPE 1 ----------\n")
print(inscription)
test1=encode.encoder(inscription)
print("\n{0}\n".format(test1))
test1bis=decode.decoder(test1)
print(test1bis)
print("\n---------- TEST TYPE 2 ----------\n")
print(film)
test2=encode.encoder(film)
print("\n{0}\n".format(test2))
test2bis=decode.decoder(test2)
print(test2bis)
print("\n---------- TEST TYPE 3 ----------\n")
print(user)
test3=encode.encoder(user)
print("\n{0}\n".format(test3))
test3bis=decode.decoder(test3)
print(test3bis)
print("\n---------- TEST TYPE 4 ----------\n")
print(maj_user)
Пример #15
0
print("\n---------- TEST TYPE 9 ----------\n")
print(desinscription)
test9=encode.encoder(desinscription)
print("\n{0}\n".format(test9))
test9bis=decode2.decoder(test9)
print(test9bis)
print("\n---------- TEST TYPE 10 ----------\n")
print(redir)
test10=encode.encoder(redir)
print("\n{0}\n".format(test10))
test10bis=decode2.decoder(test10)
print(test10bis)
print("\n---------- TEST TYPE 11 ----------\n")
print(joindre_salon_ok)
test11=encode.encoder(joindre_salon_ok)
print("\n{0}\n".format(test11))
test11bis=decode.decoder(test11)
print(test11bis)
print("\n---------- TEST TYPE 12 ----------\n")
print(joindre_salon_nok)
test12=encode.encoder(joindre_salon_nok)
print("\n{0}\n".format(test12))
test12bis=decode.decoder(test12)
print(test12bis)
print("\n---------- TEST TYPE 13 ----------\n")
print(ack)
test13=encode.encoder(ack)
print("\n{0}\n".format(test13))
test13bis=decode.decoder(test13)
print(test13bis)
Пример #16
0
from decode import decoder
from encode import encoder

assert decoder(encoder("alodicksiano")) == "alodicksiano"
assert decoder(encoder("alodicksiano")) == "alodicksiano"
assert decoder(encoder("alodicksiano")) == "alodicksiano"
assert decoder(encoder("123456789 123456789 123456789 123456789")
               ) == "123456789 123456789 123456789 123456789"