def build_fwtrack(self): """Build FWTrackII from all lines, return a FWTrackII object. Note: All locations will be merged (exclude the same location) then sorted after the track is built. If both_strand is True, it will store strand information in FWTrackII object. if do_merge is False, it will not merge the same location after the track is built. """ fwtrack = FWTrackII() i = 0 m = 0 for thisline in self.fhd: (chromosome, fpos, strand) = self.__fw_parse_line(thisline) i += 1 if i == 1000000: m += 1 logging.info(" %d" % (m * 1000000)) i = 0 if not fpos or not chromosome: continue fwtrack.add_loc(chromosome, fpos, strand) return fwtrack
def build_fwtrack (self): """Build FWTrackII from all lines, return a FWTrackII object. Note: All locations will be merged (exclude the same location) then sorted after the track is built. If both_strand is True, it will store strand information in FWTrackII object. if do_merge is False, it will not merge the same location after the track is built. """ fwtrack = FWTrackII() i = 0 m = 0 for thisline in self.fhd: (chromosome,fpos,strand) = self.__fw_parse_line(thisline) i+=1 if i == 1000000: m += 1 logging.info(" %d" % (m*1000000)) i=0 if not fpos or not chromosome: continue fwtrack.add_loc(chromosome,fpos,strand) return fwtrack
def build_fwtrack(self, dist=200): """Build FWTrackII from all lines, return a FWTrackII object. lfhd: the filehandler for left tag file rfhd: the filehandler for right tag file dist: the best distance between two tags in a pair The score system for pairing two tags: score = abs(abs(rtag-ltag)-200)+error4lefttag+error4righttag the smaller score the better pairing. If the score for a pairing is bigger than 200, this pairing will be discarded. Note only the best pair is kept. If there are over two best pairings, this pair of left and right tags will be discarded. Note, the orders in left tag file and right tag file must match, i.e., the Nth left tag must has the same name as the Nth right tag. Note, remove comment lines beforehand. """ fwtrack = FWTrackII() i = 0 m = 0 lnext = self.lfhd.next rnext = self.rfhd.next self.dist = dist try: while 1: lline = lnext() rline = rnext() (chromname, fpos, strand) = self.__fw_parse_line(lline, rline) i += 1 if i == 1000000: m += 1 logging.info(" %d" % (m * 1000000)) i = 0 if not fpos or not chromname: continue try: chromname = chromname[:chromname.rindex(".fa")] except ValueError: pass fwtrack.add_loc(chromname, fpos, strand) except StopIteration: pass return fwtrack
def build_fwtrack (self, dist=200): """Build FWTrackII from all lines, return a FWTrackII object. lfhd: the filehandler for left tag file rfhd: the filehandler for right tag file dist: the best distance between two tags in a pair The score system for pairing two tags: score = abs(abs(rtag-ltag)-200)+error4lefttag+error4righttag the smaller score the better pairing. If the score for a pairing is bigger than 200, this pairing will be discarded. Note only the best pair is kept. If there are over two best pairings, this pair of left and right tags will be discarded. Note, the orders in left tag file and right tag file must match, i.e., the Nth left tag must has the same name as the Nth right tag. Note, remove comment lines beforehand. """ fwtrack = FWTrackII() i = 0 m = 0 lnext = self.lfhd.next rnext = self.rfhd.next self.dist = dist try: while 1: lline = lnext() rline = rnext() (chromname,fpos,strand) = self.__fw_parse_line(lline,rline) i+=1 if i == 1000000: m += 1 logging.info(" %d" % (m*1000000)) i=0 if not fpos or not chromname: continue try: chromname = chromname[:chromname.rindex(".fa")] except ValueError: pass fwtrack.add_loc(chromname,fpos,strand) except StopIteration: pass return fwtrack
def build_fwtrack(self): """Build FWTrackII from all lines, return a FWTrackII object. """ fwtrack = FWTrackII() i = 0 m = 0 for thisline in self.fhd: (chromosome, fpos, strand) = self.__fw_parse_line(thisline) i += 1 if i == 1000000: m += 1 logging.info(" %d" % (m * 1000000)) i = 0 if not fpos or not chromosome: continue fwtrack.add_loc(chromosome, fpos, strand) return fwtrack
def build_fwtrack (self): """Build FWTrackII from all lines, return a FWTrackII object. """ fwtrack = FWTrackII() i = 0 m = 0 for thisline in self.fhd: (chromosome,fpos,strand) = self.__fw_parse_line(thisline) i+=1 if i == 1000000: m += 1 logging.info(" %d" % (m*1000000)) i=0 if not fpos or not chromosome: continue fwtrack.add_loc(chromosome,fpos,strand) return fwtrack
def build_fwtrack(self): """Build FWTrackII from all lines, return a FWTrackII object. Note only the unique match for a tag is kept. """ fwtrack = FWTrackII() i = 0 m = 0 references = [] fseek = self.fhd.seek fread = self.fhd.read ftell = self.fhd.tell # move to pos 4, there starts something fseek(4) header_len = struct.unpack('<i', fread(4))[0] fseek(header_len + ftell()) # get the number of chromosome nc = struct.unpack('<i', fread(4))[0] for x in range(nc): # read each chromosome name nlength = struct.unpack('<i', fread(4))[0] references.append(fread(nlength)[:-1]) # jump over chromosome size, we don't need it fseek(ftell() + 4) while 1: try: entrylength = struct.unpack('<i', fread(4))[0] except struct.error: break (chrid, fpos, strand) = self.__fw_binary_parse(fread(entrylength)) i += 1 if i == 1000000: m += 1 logging.info(" %d" % (m * 1000000)) i = 0 if fpos >= 0: fwtrack.add_loc(references[chrid], fpos, strand) self.fhd.close() return fwtrack
def build_fwtrack (self): """Build FWTrackII from all lines, return a FWTrackII object. Note only the unique match for a tag is kept. """ fwtrack = FWTrackII() i = 0 m = 0 references = [] fseek = self.fhd.seek fread = self.fhd.read ftell = self.fhd.tell # move to pos 4, there starts something fseek(4) header_len = struct.unpack('<i', fread(4))[0] fseek(header_len + ftell()) # get the number of chromosome nc = struct.unpack('<i', fread(4))[0] for x in range(nc): # read each chromosome name nlength = struct.unpack('<i', fread(4))[0] references.append(fread(nlength)[:-1]) # jump over chromosome size, we don't need it fseek(ftell() + 4) while 1: try: entrylength = struct.unpack('<i', fread(4))[0] except struct.error: break (chrid,fpos,strand) = self.__fw_binary_parse(fread(entrylength)) i+=1 if i == 1000000: m += 1 logging.info(" %d" % (m*1000000)) i=0 if fpos >= 0: fwtrack.add_loc(references[chrid],fpos,strand) self.fhd.close() return fwtrack