def setup(self): thisdir = os.path.dirname(__file__) self._fileName = os.path.join(thisdir, 'fastqRecovery') self._testfq = os.path.join(thisdir, 'test.fastq') screed.read_fastq_sequences(self._testfq) screed.ToFastq(self._testfq, self._fileName) screed.read_fastq_sequences(self._fileName) self.db = screed.ScreedDB(self._fileName)
def setup(self): self._fileName = utils.get_temp_filename('fastqRecovery') self._testfq = utils.get_temp_filename('test.fastq') shutil.copy(utils.get_test_data('test.fastq'), self._testfq) screed.read_fastq_sequences(self._testfq) screed.ToFastq(self._testfq, self._fileName) screed.read_fastq_sequences(self._fileName) self.db = screed.ScreedDB(self._fileName)
def setup(self): thisdir = os.path.dirname(__file__) self._fqName = os.path.join(thisdir, 'fa_to_fq') self._faName = os.path.join(thisdir, 'fq_to_fa') self._testfa = os.path.join(thisdir, 'test.fa') screed.read_fasta_sequences(self._testfa) screed.ToFastq(self._testfa, self._fqName) # Fasta db -> fasta text screed.read_fastq_sequences(self._fqName) # Fastq file -> fastq db screed.ToFasta(self._fqName, self._faName) # Fastq db -> fasta text screed.read_fasta_sequences(self._faName) # Fasta file -> fasta db self.db = screed.ScreedDB(self._faName)
def setup(self): self._fqName = utils.get_temp_filename('fa_to_fq') self._faName = utils.get_temp_filename('fq_to_fa') self._testfa = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), self._testfa) screed.read_fasta_sequences(self._testfa) screed.ToFastq(self._testfa, self._fqName) # Fasta db -> fasta text screed.read_fastq_sequences(self._fqName) # Fastq file -> fastq db screed.ToFasta(self._fqName, self._faName) # Fastq db -> fasta text screed.read_fasta_sequences(self._faName) # Fasta file -> fasta db self.db = screed.ScreedDB(self._faName)
def time_screed(f, random_seqs, name): show_name(name) rm("%s_%s" % (f, screed.DBConstants.fileExtension)) t = time.time() screed.read_fastq_sequences(f) print "create: %.3f" % (time.time() - t) faqdb = screed.ScreedDB(f) t = time.time() for r in random_seqs: faqdb[r[1:]].sequence print "search: %.3f" % (time.time() - t) del faqdb
def openDB(fileName): """Opening screed DB; making if not already existing Args: fileName -- Name of sequence file or screedDB file """ logging.info('{}: Making/opening screed database for: "{}"'.format(my_time(), fileName)) # making db if needed if not fileName.endswith('_screed'): try: screed.read_fastq_sequences(fileName) fileName = fileName + '_screed' except KeyError: try: screed.read_fasta_sequences(fileName) fileName = fileName + '_screed' except IOError: msg = 'Cannot open {}'.format(fileName) raise IOError(msg) # init screed db return screed.ScreedDB(fileName)
def setup(self): self._testfq = os.path.join(os.path.dirname(__file__), 'test.fastq') screed.read_fastq_sequences(self._testfq) self.db = screed.ScreedDB(self._testfq)
python both.py R1.fastq R2.fastq out put: R1.fastq.both R2.fastq.both This script uses the screed module: https://github.com/ctb/screed ''' import screed import sys R1_IN = sys.argv[1] R2_IN = sys.argv[2] screed.read_fastq_sequences(R1_IN) screed.read_fastq_sequences(R2_IN) DB_R1 = screed.ScreedDB(R1_IN+'_screed') DB_R2 = screed.ScreedDB(R2_IN+'_screed') with open(R1_IN+'.both','w') as R1_OUT: with open(R2_IN+'.both','w') as R2_OUT: for record, thing in DB_R1.iteritems(): try: match = DB_R2[thing['name'].replace(" 1:"," 2:")] except KeyError: continue R1_OUT.write('@%s %s\n%s\n+\n%s\n' % (thing['name'],thing['annotations'],thing['sequence'],thing['accuracy'])) R2_OUT.write('@%s %s\n%s\n+\n%s\n' % (match['name'],match['annotations'],match['sequence'],match['accuracy']))
screeddb = Filein + '_screed' if os.path.exists(screeddb): db = screed.ScreedDB(screeddb) if Percent != 100 and os.path.exists(os.path.join(Mypath,'temp')): print("Screed database and shuffled files already exists proceeding to resampling.") resamp(Percent, os.path.join(Mypath,'temp'), Mypath, Fileout, db, sys.argv[3], NumResamp) os.remove(os.path.join(Mypath,'temp')) print('Shuffleing done with resampleing. No log file written.') sys.exit() else: print("Screed database already exists proceeding to shuffleing.") else: print("Creating screed database.") db = screed.read_fastq_sequences(Filein) print(dict(db)) ReadNum = len(db) N='name' S='sequence' A='accuracy' n, j, k, l = 0, 0, 0, 0 with open(Fileout, 'w') as outfile: with open(Singleton1, 'w') as R1: with open(Singleton2, 'w') as R2: for record, thing in db.iteritems():
#!/usr/bin/env python # Copyright (c) 2008-2010, Michigan State University from __future__ import absolute_import import sys from screed import read_fastq_sequences from screed import DBConstants # A shell interface to the screed FQDBM database writing function if __name__ == "__main__": # Make sure the user entered the command line arguments correctly if len(sys.argv) != 2: sys.stderr.write("ERROR: USAGE IS: %s <dbfilename>\n" % sys.argv[0]) exit(1) filename = sys.argv[1] read_fastq_sequences(filename) print("Database saved in %s%s" % (sys.argv[1], DBConstants.fileExtension)) exit(0)