def setup(self): thisdir = os.path.dirname(__file__) self._testfa = os.path.join(thisdir, 'test.fa') fadbm = os.path.join(thisdir, '..', 'fadbm.py') subprocess.check_call(['python', fadbm, self._testfa], stdout=subprocess.PIPE) self.db = screed.ScreedDB(self._testfa)
def setup(self): self._fqName = utils.get_temp_filename('fa_to_fq') self._faName = utils.get_temp_filename('fq_to_fa') self._testfa = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), self._testfa) cmd = ['screed', 'db', self._testfa] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret cmd = ['screed', 'dump_fastq', self._testfa, self._fqName] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret cmd = ['screed', 'db', self._fqName] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret cmd = ['screed', 'dump_fasta', self._fqName, self._faName] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret cmd = ['screed', 'db', self._faName] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret self.db = screed.ScreedDB(self._faName)
def setup(self): self._testfile = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), self._testfile) screed.read_fasta_sequences(self._testfile) self._db = screed.ScreedDB(self._testfile) self._ns = nostring()
def setup(self): thisdir = os.path.dirname(__file__) self._fileName = os.path.join(thisdir, 'fastqRecovery') self._testfq = os.path.join(thisdir, 'test.fastq') screed.read_fastq_sequences(self._testfq) screed.ToFastq(self._testfq, self._fileName) screed.read_fastq_sequences(self._fileName) self.db = screed.ScreedDB(self._fileName)
def test_make_db(): _testfa = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), _testfa) screed.make_db(_testfa) db = screed.ScreedDB(_testfa) os.unlink(_testfa + fileExtension)
def setup(self): self._fileName = os.path.join(os.path.dirname(__file__), 'fastaRecovery') self._testfa = os.path.join(os.path.dirname(__file__), 'test.fa') screed.read_fasta_sequences(self._testfa) screed.ToFasta(self._testfa, self._fileName) screed.read_fasta_sequences(self._fileName) self.db = screed.ScreedDB(self._fileName)
def setup(self): self._fileName = utils.get_temp_filename('fastaRecovery') self._testfa = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), self._testfa) screed.read_fasta_sequences(self._testfa) screed.ToFasta(self._testfa, self._fileName) screed.read_fasta_sequences(self._fileName) self.db = screed.ScreedDB(self._fileName)
def setup(self): thisdir = os.path.dirname(__file__) self._testfq = utils.get_temp_filename('test.fastq') shutil.copy(utils.get_test_data('test.fastq'), self._testfq) cmd = ['python', '-m', 'screed', 'db', self._testfq] ret = subprocess.check_call(cmd, stdout=subprocess.PIPE) assert ret == 0, ret self.db = screed.ScreedDB(self._testfq)
def test_nodb(): """ Tests if screed throws an appropriate exception if it is asked to open a non-existant screed database """ try: db = screed.ScreedDB('foo') assert 1 == 0 # Previous line should throw an error except ValueError: pass
def setup(self): thisdir = os.path.dirname(__file__) self._fqName = os.path.join(thisdir, 'fa_to_fq') self._faName = os.path.join(thisdir, 'fq_to_fa') self._testfa = os.path.join(thisdir, 'test.fa') screed.read_fasta_sequences(self._testfa) screed.ToFastq(self._testfa, self._fqName) # Fasta db -> fasta text screed.read_fastq_sequences(self._fqName) # Fastq file -> fastq db screed.ToFasta(self._fqName, self._faName) # Fastq db -> fasta text screed.read_fasta_sequences(self._faName) # Fasta file -> fasta db self.db = screed.ScreedDB(self._faName)
def setup(self): self._fqName = utils.get_temp_filename('fa_to_fq') self._faName = utils.get_temp_filename('fq_to_fa') self._testfa = utils.get_temp_filename('test.fa') shutil.copy(utils.get_test_data('test.fa'), self._testfa) screed.read_fasta_sequences(self._testfa) screed.ToFastq(self._testfa, self._fqName) # Fasta db -> fasta text screed.read_fastq_sequences(self._fqName) # Fastq file -> fastq db screed.ToFasta(self._fqName, self._faName) # Fastq db -> fasta text screed.read_fasta_sequences(self._faName) # Fasta file -> fasta db self.db = screed.ScreedDB(self._faName)
def time_screed(f, random_seqs, name): show_name(name) rm("%s_%s" % (f, screed.DBConstants.fileExtension)) t = time.time() screed.read_fastq_sequences(f) print "create: %.3f" % (time.time() - t) faqdb = screed.ScreedDB(f) t = time.time() for r in random_seqs: faqdb[r[1:]].sequence print "search: %.3f" % (time.time() - t) del faqdb
def test_wrongdb(): """ Tests if screed throws an appropriate exception if it is asked to open a file that isn't a screed database """ try: blah = 'blah_screed' blah_file = open(blah, 'wb') blah_file.close() db = screed.ScreedDB(blah) os.unlink(blah) assert 1 == 0 except TypeError: os.unlink(blah) pass
def openDB(fileName): """Opening screed DB; making if not already existing Args: fileName -- Name of sequence file or screedDB file """ logging.info('{}: Making/opening screed database for: "{}"'.format(my_time(), fileName)) # making db if needed if not fileName.endswith('_screed'): try: screed.read_fastq_sequences(fileName) fileName = fileName + '_screed' except KeyError: try: screed.read_fasta_sequences(fileName) fileName = fileName + '_screed' except IOError: msg = 'Cannot open {}'.format(fileName) raise IOError(msg) # init screed db return screed.ScreedDB(fileName)
def build_get_hit_length_function(referenceLengths): """ Given the referenceLengths parameter return a lambda function that will map a reference sequence id to its sequence length The referenceLenths parameter may be either a python dict or a str name of a fasta file. In the latter case, the file is parsed to get lengths """ if isinstance(referenceLengths, str): import screed # assume we have the path to a fasta file # has it been parsed by screed? if not os.path.exists("%s_screed" % (referenceLengths)): # TODO: just use Bio.SeqIO to get lengths if # screed module or screed index is missing. # screed is overkill here. screed.read_fasta_sequences(referenceLengths) refScreed = screed.ScreedDB(referenceLengths) return lambda h: len(refScreed[h]['sequence']) else: return lambda h: referenceLengths[h]
def setup(self): screed.seqparse.read_hava_sequences(testha) self._db = screed.ScreedDB(testha)
def setup(self): self._testfq = utils.get_temp_filename('test.fastq') shutil.copy(utils.get_test_data('test.fastq'), self._testfq) screed.read_fastq_sequences(self._testfq) self.db = screed.ScreedDB(self._testfq)
def load_contig_names(fp, delimiter='::'): contig_map = {} with open(fp, 'r') as file: for line in file: md5, original = line.strip().split(delimiter) contig_map[md5] = original return contig_map if not os.path.exists(args.outdir): os.makedirs(args.outdir) # https://screed.readthedocs.io/en/latest/screed.html screed.make_db(fp_genome) db = screed.ScreedDB(fp_genome) contigs, coords = load_coords(fp_frames) cnt = count_hits(fp_aln, coords) contig_map = load_contig_names(args.names, '::') for c in contigs: ix, arr = get_cnt_sequence(cnt, c) algo = rpt.Pelt(model='rbf').fit(arr) # .fit_predict() # Pelt method seems best for multiple breakpoint detection, see also # https://www.marinedatascience.co/blog/2019/09/28/comparison-of-change-point-detection-methods/ try:
import cPickle import screed data_names = cPickle.load('names.db') data_fullname = cPickle.load(open('fullnames.db')) data_seqs = screed.ScreedDB(cPickle.load('names.db'))
def setup(self): self._testfa = os.path.join(os.path.dirname(__file__), 'test-whitespace.fa') screed.read_fasta_sequences(self._testfa) self.db = screed.ScreedDB(self._testfa)
#! /usr/bin/env python """ Adjust counts to RKPM, reads-per-thousand-bases-of-mRNA. """ import sys import screed sequence_database = sys.argv[1] counts_file = sys.argv[2] seqdb = screed.ScreedDB(sequence_database) for line in open(counts_file): count, name = line.strip().split() # parse lines like '1523 geneX' count = int(count) # look up the sequence in the seqdb dictionary-like database. sequence_length = len(seqdb[name].sequence) # calculate the appropriate divisor div = float(sequence_length) / 1000. # divide! print float(count) / div, name
python both.py R1.fastq R2.fastq out put: R1.fastq.both R2.fastq.both This script uses the screed module: https://github.com/ctb/screed ''' import screed import sys R1_IN = sys.argv[1] R2_IN = sys.argv[2] screed.read_fastq_sequences(R1_IN) screed.read_fastq_sequences(R2_IN) DB_R1 = screed.ScreedDB(R1_IN+'_screed') DB_R2 = screed.ScreedDB(R2_IN+'_screed') with open(R1_IN+'.both','w') as R1_OUT: with open(R2_IN+'.both','w') as R2_OUT: for record, thing in DB_R1.iteritems(): try: match = DB_R2[thing['name'].replace(" 1:"," 2:")] except KeyError: continue R1_OUT.write('@%s %s\n%s\n+\n%s\n' % (thing['name'],thing['annotations'],thing['sequence'],thing['accuracy'])) R2_OUT.write('@%s %s\n%s\n+\n%s\n' % (match['name'],match['annotations'],match['sequence'],match['accuracy']))
#!/usr/bin/env python import sys import random import screed filein = sys.argv[1] fileout = sys.argv[2] num_to_choose = int(sys.argv[3]) fw = open(fileout, 'w') db = screed.ScreedDB(filein) names = db.keys() size = len(names) print size print num_to_choose to_choose_list = random.sample(range(size), num_to_choose) #to_choose_list = random.sample(xrange(2000),200) for i in to_choose_list: record = db.loadRecordByIndex(i) fw.write('>' + str(record.name) + '\n') fw.write(str(record.sequence) + '\n') #print record.name #print record.sequence
def setup(self): self._testfq = os.path.join(os.path.dirname(__file__), 'test.fastq') screed.read_fastq_sequences(self._testfq) self.db = screed.ScreedDB(self._testfq)
import cPickle import screed fp = open('mouse.namedb') is_ncbi = cPickle.load(fp) mouse_names = cPickle.load(fp) fp.close() mouse_fullname = cPickle.load(open('mouse.namedb.fullname')) mouse_seqs = screed.ScreedDB('mouse.protein.faa')
def setup(self): self._testfa = utils.get_temp_filename('test-whitespace.fa') shutil.copy(utils.get_test_data('test-whitespace.fa'), self._testfa) screed.read_fasta_sequences(self._testfa) self.db = screed.ScreedDB(self._testfa)