def check_general_fails(self, filename, good_count): handle = open(filename, "rU") tuples = QualityIO.FastqGeneralIterator(handle) for i in range(good_count): title, seq, qual = next(tuples) # Make sure no errors! self.assertRaises(ValueError, next, tuples) handle.close()
def read_fastq(fastq: str, pair: Optional[str] = None) -> Tuple[Read]: """Read in a FASTQ file, and optionally its pair 'fastq' the filename for the forward or only FASTQ file 'pair' an optional filename for the reverse FASTQ file""" reads = dict() # type: Dict[str, Read] with open(fastq, 'r') as ffile: # type: _io.TextIOWrapper for read in QualityIO.FastqGeneralIterator(ffile): # type: Tuple[str] read_id, seq, qual = read # type: str, str, str reads[read_id] = Read(read_id=read_id, seq=seq, qual=qual) if pair: with open(pair, 'r') as rfile: # type: _io.TextIOWrapper for read in QualityIO.FastqGeneralIterator( rfile): # type: Tuple[str] read_id, seq, qual = read # type: str, str, str reads[read_id].add_reverse(seq=seq, qual=qual) return tuple(reads.values())
def check_general_fails(self, filename, good_count): tuples = QualityIO.FastqGeneralIterator(filename) msg = "FastqGeneralIterator failed to detect error in %s" % filename for i in range(good_count): title, seq, qual = next(tuples) # Make sure no errors! # Detect error in the next record: with self.assertRaises(ValueError, msg=msg) as cm: title, seq, qual = next(tuples)
def check_general_passes(self, filename, record_count): tuples = QualityIO.FastqGeneralIterator(filename) # This "raw" parser doesn't check the ASCII characters which means # certain invalid FASTQ files will get parsed without errors. msg = "FastqGeneralIterator failed to parse %s" % filename count = 0 for title, seq, qual in tuples: self.assertEqual(len(seq), len(qual), msg=msg) count += 1 self.assertEqual(count, record_count, msg=msg)
def check_general_passes(self, filename, record_count): handle = open(filename, "rU") tuples = QualityIO.FastqGeneralIterator(handle) #This "raw" parser doesn't check the ASCII characters which means #certain invalid FASTQ files will get parsed without errors. count = 0 for title, seq, qual in tuples: self.assertEqual(len(seq), len(qual)) count += 1 self.assertEqual(count, record_count) handle.close()
def Main(): args = ParseArg() # enable_thread_profiling() # yappi.set_clock_type("wall") # yappi.start() # global type # type = "fastq" # if args.fastq: # type="fastq" # elif args.fasta: # type="fasta" global Range Range = args.range global Trim Trim = args.trim global max_score max_score = args.max_score name1 = args.input1.split('/')[-1] name2 = args.input2.split('/')[-1] #----------- read barcode ---------- global barcodes global barcode_len barcodes = [] for i in open(args.barcode, 'r'): i = i.strip() barcodes.append(i) barcode_len = len(i) barcodes.append('unassign') #----------------------------------- records = QualityIO.FastqGeneralIterator(open(args.input1, "rU")) records2 = QualityIO.FastqGeneralIterator(open(args.input2, "rU")) Files = MainIO(records, records2, name1, name2) print "start to assign sequence to different barcodes..." print "----------" num_thread = args.parallel # q=Queue(maxsize=10000) # feeder = threading.Thread(target = push_stuff, args = (q, num_thread, records, records2)) # feeder.start() workers = [] for i in range(num_thread): worker = threading.Thread(target=do_stuff, args=(Files, )) workers.append(worker) #worker.setDaemon(True) worker.start() # print >>sys.stderr,"Finish reading records" # q.join() # feeder.join() for i in range(num_thread): workers[i].join()