示例#1
0
 def check_general_fails(self, filename, good_count):
     handle = open(filename, "rU")
     tuples = QualityIO.FastqGeneralIterator(handle)
     for i in range(good_count):
         title, seq, qual = next(tuples)  # Make sure no errors!
     self.assertRaises(ValueError, next, tuples)
     handle.close()
示例#2
0
文件: fastq.py 项目: cez026/BarcSeek
def read_fastq(fastq: str, pair: Optional[str] = None) -> Tuple[Read]:
    """Read in a FASTQ file, and optionally its pair
    'fastq' the filename for the forward or only FASTQ file
    'pair' an optional filename for the reverse FASTQ file"""
    reads = dict()  # type: Dict[str, Read]
    with open(fastq, 'r') as ffile:  # type: _io.TextIOWrapper
        for read in QualityIO.FastqGeneralIterator(ffile):  # type: Tuple[str]
            read_id, seq, qual = read  # type: str, str, str
            reads[read_id] = Read(read_id=read_id, seq=seq, qual=qual)
    if pair:
        with open(pair, 'r') as rfile:  # type: _io.TextIOWrapper
            for read in QualityIO.FastqGeneralIterator(
                    rfile):  # type: Tuple[str]
                read_id, seq, qual = read  # type: str, str, str
                reads[read_id].add_reverse(seq=seq, qual=qual)
    return tuple(reads.values())
 def check_general_fails(self, filename, good_count):
     tuples = QualityIO.FastqGeneralIterator(filename)
     msg = "FastqGeneralIterator failed to detect error in %s" % filename
     for i in range(good_count):
         title, seq, qual = next(tuples)  # Make sure no errors!
     # Detect error in the next record:
     with self.assertRaises(ValueError, msg=msg) as cm:
         title, seq, qual = next(tuples)
 def check_general_passes(self, filename, record_count):
     tuples = QualityIO.FastqGeneralIterator(filename)
     # This "raw" parser doesn't check the ASCII characters which means
     # certain invalid FASTQ files will get parsed without errors.
     msg = "FastqGeneralIterator failed to parse %s" % filename
     count = 0
     for title, seq, qual in tuples:
         self.assertEqual(len(seq), len(qual), msg=msg)
         count += 1
     self.assertEqual(count, record_count, msg=msg)
示例#5
0
 def check_general_passes(self, filename, record_count):
     handle = open(filename, "rU")
     tuples = QualityIO.FastqGeneralIterator(handle)
     #This "raw" parser doesn't check the ASCII characters which means
     #certain invalid FASTQ files will get parsed without errors.
     count = 0
     for title, seq, qual in tuples:
         self.assertEqual(len(seq), len(qual))
         count += 1
     self.assertEqual(count, record_count)
     handle.close()
示例#6
0
def Main():
    args = ParseArg()

    #    enable_thread_profiling()
    #    yappi.set_clock_type("wall")
    #    yappi.start()

    #    global type
    #    type = "fastq"
    #    if args.fastq:
    #       type="fastq"
    #    elif args.fasta:
    #       type="fasta"

    global Range
    Range = args.range

    global Trim
    Trim = args.trim

    global max_score
    max_score = args.max_score

    name1 = args.input1.split('/')[-1]
    name2 = args.input2.split('/')[-1]
    #----------- read barcode ----------
    global barcodes
    global barcode_len
    barcodes = []
    for i in open(args.barcode, 'r'):
        i = i.strip()
        barcodes.append(i)
        barcode_len = len(i)

    barcodes.append('unassign')

    #-----------------------------------

    records = QualityIO.FastqGeneralIterator(open(args.input1, "rU"))
    records2 = QualityIO.FastqGeneralIterator(open(args.input2, "rU"))

    Files = MainIO(records, records2, name1, name2)

    print "start to assign sequence to different barcodes..."
    print "----------"
    num_thread = args.parallel
    #    q=Queue(maxsize=10000)

    #    feeder = threading.Thread(target = push_stuff, args = (q, num_thread, records, records2))
    #    feeder.start()

    workers = []
    for i in range(num_thread):
        worker = threading.Thread(target=do_stuff, args=(Files, ))
        workers.append(worker)
        #worker.setDaemon(True)
        worker.start()

#    print >>sys.stderr,"Finish reading records"

#    q.join()

#    feeder.join()
    for i in range(num_thread):
        workers[i].join()