def parse(self): # Initial variables # fields = [] info = {} # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict([p.split('=',1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number) fields = [] continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if not fields: self.handler.newTrack(info, self.name) fields = all_fields[0:len(items)] self.handler.defineFields(fields) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error("The track%s has non integers as interval bounds", self.path, number) if items[3] <= items[2]: self.handler.error("The track%s has negative or null intervals", self.path, number) # Score field # if items[4] == '.' or items[4] == '': items[4] = 0.0 try: items[4] = float(items[4]) except ValueError: self.handler.error("The track%s has non floats as score values", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error("The track%s has non integers as frame value", self.path, number) # Group or attribute field # if items[7] == '.': items[7] = '' # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # l_chrom, l_name, l_start, l_end, l_strand, l_score = None, None, None, None, None, None # Start a new track # self.handler.newTrack({'int_to_float':'score'}, self.name) self.handler.defineFields(all_fields) # Line loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue if line.startswith("track "): continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() # Is it a legal line ? # if len(items) < 5: self.handler.error("The track%s has less than five columns", self.path, number) # Chromosome # chrom = items.pop(0) # Name field # name = items[0] # Start and end field # try: pos = int(items[1]) except ValueError: self.handler.error("The track%s has non integers as position", self.path, number) start = pos-1 end = pos # Strand field # strand = strand_to_int(items[2]) # Score field # try: score = int(items[3]) except ValueError: self.handler.error("The track%s has non integers as tag count values", self.path, number) # Ignore null scores # if score == 0: continue # Merge adjacent features with same scores # if (l_chrom, l_name, l_strand, l_score) == (chrom, name, strand, score) and start == l_end: l_end = end continue else: if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score)) l_chrom, l_name, l_start, l_end, l_strand, l_score = chrom, name, start, end, strand, score # Last feature # if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score))
def parse(self): # Initial variables # self.handler.newTrack({'int_to_float':'score'}, self.name) self.handler.defineFields(all_fields) with open(self.path) as f: for number, line in enumerate(f): items = line.split('\t') chrom = items.pop(0) if len(items) < 4: self.handler.error("The track%s doesn't have five columns", self.path, number) # Name field # name = items[0] # Start field # try: pos = int(items[1]) except ValueError: self.handler.error("The track%s has non integers as position", self.path, number) # Strand field # strand = strand_to_int(items[2]) # Score field # try: score = int(items[3]) except ValueError: self.handler.error("The track%s has non integers as tag count values", self.path, number) # Yield it # self.handler.newFeature(chrom, (name, pos-1, pos, strand, score))
def parse(self): # Initial variables # info = {} declare_track = True # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict( [p.split('=', 1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error( "The track%s seems to have an invalid <track> header line", self.path, number) declare_track = True continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() if len(items) > 8: items = items[0:8] + [' '.join(items[8:])] # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if declare_track: declare_track = False self.handler.newTrack(info, self.name) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error( "The track%s has non integers as interval bounds", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error( "The track%s has non integers as frame value", self.path, number) # The last special column # attr = shlex.split(items.pop()) attr = [(attr[i], attr[i + 1].strip(';')) for i in xrange(0, len(attr), 2)] # Not using dict to preserve annotation order # keys, values = [x[0] for x in attr], [x[1] for x in attr] # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" # assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \ "attribute column: %r. Valid attributes begin with " \ "\"gene_id\" and \"transcript_id\"" self.handler.defineFields(all_fields + keys) items += values # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # fields = [] info = {} # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict( [p.split('=', 1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error( "The track%s seems to have an invalid <track> header line", self.path, number) fields = [] continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if not fields: self.handler.newTrack(info, self.name) fields = all_fields[0:len(items)] self.handler.defineFields(fields) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error( "The track%s has non integers as interval bounds", self.path, number) if items[3] <= items[2]: self.handler.error( "The track%s has negative or null intervals", self.path, number) # Score field # if items[4] == '.' or items[4] == '': items[4] = 0.0 try: items[4] = float(items[4]) except ValueError: self.handler.error( "The track%s has non floats as score values", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error( "The track%s has non integers as frame value", self.path, number) # Group or attribute field # if items[7] == '.': items[7] = '' # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # info = {} declare_track = True # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict([p.split('=',1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number) declare_track = True continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() if len(items) > 8: items = items[0:8] + [' '.join(items[8:])] # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if declare_track: declare_track = False self.handler.newTrack(info, self.name) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error("The track%s has non integers as interval bounds", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error("The track%s has non integers as frame value", self.path, number) # The last special column # attr = shlex.split(items.pop()) attr = [(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)] # Not using dict to preserve annotation order # keys, values = [x[0] for x in attr], [x[1] for x in attr] # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" # assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \ "attribute column: %r. Valid attributes begin with " \ "\"gene_id\" and \"transcript_id\"" self.handler.defineFields(all_fields + keys) items += values # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # fields = [] info = {} # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict([p.split('=',1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number) fields = [] continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() # Chromosome # chrom = items.pop(0) # Have we started a track already ? # if not fields: self.handler.newTrack(info, self.name) fields = all_fields[0:len(items)] self.handler.defineFields(fields) # Start and end fields # try: items[0] = int(items[0]) items[1] = int(items[1]) except ValueError: self.handler.error("The track%s has non integers as interval bounds", self.path, number) except IndexError: self.handler.error("The track%s has less than two columns", self.path, number) # All following fields are optional # try: # Name field # if items[2] == '.': items[2] = '' # Score field # if items[3] == '.' or items[3] == '': items[3] = 0.0 try: items[3] = float(items[3]) except ValueError: self.handler.error("The track%s has non floats as score values", self.path, number) # Strand field # items[4] = strand_to_int(items[4]) # Thick starts # try: items[5] = float(items[5]) except ValueError: self.handler.error("The track%s has non integers as thick starts", self.path, number) # Thick ends # try: items[6] = float(items[6]) except ValueError: self.handler.error("The track%s has non integers as thick ends", self.path, number) # Too many fields # if len(items) > 11: self.handler.error("The track%s has more than twelve columns", self.path, number) # All index errors are ignored since the fields above three are optional # except IndexError: pass finally: self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # info = {} declare_track = True # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict( [p.split('=', 1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error( "The track%s seems to have an invalid <track> header line", self.path, number) declare_track = True continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() if len(items) > 8: items = items[0:8] + [' '.join(items[8:])] # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if declare_track: declare_track = False self.handler.newTrack(info, self.name) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error( "The track%s has non integers as interval bounds", self.path, number) # Score field # if items[4] == '.' or items[4] == '': items[4] = 0.0 try: items[4] = float(items[4]) except ValueError: self.handler.error( "The track%s has non floats as score values", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error( "The track%s has non integers as frame value", self.path, number) # The last special column # attr = shlex.split(items.pop()) attr = dict([(attr[i], attr[i + 1].strip(';')) for i in xrange(0, len(attr), 2)]) self.handler.defineFields(all_fields + attr.keys()) items += attr.values() # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # info = {} declare_track = True # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict([p.split('=',1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number) declare_track = True continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() if len(items) > 8: items = items[0:8] + [' '.join(items[8:])] # Chromosome # chrom = items.pop(0) # Length is nine # if len(items) != 8: self.handler.error("The track%s doesn't have nine columns", self.path, number) # Have we started a track already ? # if declare_track: declare_track = False self.handler.newTrack(info, self.name) # Source field # if items[0] == '.': items[0] = '' # Name field # if items[1] == '.': items[1] = '' # Start and end fields # try: items[2] = int(items[2]) items[3] = int(items[3]) except ValueError: self.handler.error("The track%s has non integers as interval bounds", self.path, number) # Score field # if items[4] == '.' or items[4] == '': items[4] = 0.0 try: items[4] = float(items[4]) except ValueError: self.handler.error("The track%s has non floats as score values", self.path, number) # Strand field # items[5] = strand_to_int(items[5]) # Frame field # if items[6] == '.': items[6] = None else: try: items[6] = int(items[6]) except ValueError: self.handler.error("The track%s has non integers as frame value", self.path, number) # The last special column # attr = shlex.split(items.pop()) attr = dict([(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)]) self.handler.defineFields(all_fields + attr.keys()) items += attr.values() # Yield it # self.handler.newFeature(chrom, items)
def parse(self): # Initial variables # fields = [] info = {} # Main loop # for number, line in iterate_lines(self.path): # Ignored lines # if line.startswith("browser "): continue # Track headers # if line.startswith("track "): try: info = dict( [p.split('=', 1) for p in shlex.split(line[6:])]) except ValueError: self.handler.error( "The track%s seems to have an invalid <track> header line", self.path, number) fields = [] continue # Split the lines # items = line.split('\t') if len(items) == 1: items = line.split() # Chromosome # chrom = items.pop(0) # Have we started a track already ? # if not fields: self.handler.newTrack(info, self.name) fields = all_fields[0:len(items)] self.handler.defineFields(fields) # Start and end fields # try: items[0] = int(items[0]) items[1] = int(items[1]) except ValueError: self.handler.error( "The track%s has non integers as interval bounds", self.path, number) except IndexError: self.handler.error("The track%s has less than two columns", self.path, number) # All following fields are optional # try: # Name field # if items[2] == '.': items[2] = '' # Score field # if items[3] == '.' or items[3] == '': items[3] = 0.0 try: items[3] = float(items[3]) except ValueError: self.handler.error( "The track%s has non floats as score values", self.path, number) # Strand field # items[4] = strand_to_int(items[4]) # Thick starts # try: items[5] = float(items[5]) except ValueError: self.handler.error( "The track%s has non integers as thick starts", self.path, number) # Thick ends # try: items[6] = float(items[6]) except ValueError: self.handler.error( "The track%s has non integers as thick ends", self.path, number) # Too many fields # if len(items) > 11: self.handler.error( "The track%s has more than twelve columns", self.path, number) # All index errors are ignored since the fields above three are optional # except IndexError: pass finally: self.handler.newFeature(chrom, items)