def load_gff(gff): genes = defaultdict(list) gene_exon_positions = defaultdict(lambda: defaultdict(tuple)) try: with open(gff) as g: for line in g: if line.startswith('#') or 'contig' in line: continue feature = GFF(line) gene_id = get_gene_attribute(feature, "ID") if feature.featuretype == 'exon': gene_exon_positions[feature.genename][gene_id] = ( feature.start, feature.end) if feature.featuretype == 'CDS': for exon in gene_exon_positions[feature.genename]: e = gene_exon_positions[feature.genename][exon] if e[0] <= feature.start <= e[1] and e[ 0] <= feature.end <= e[1]: gene_id = exon + "_CDS" if gene_id is None: print("No gene id for CDS found", feature, end="") feature.id = gene_id genes[feature.genename].append(feature) except IOError: print("Failed to load GFF file {}".format(gff)) sys.exit() return genes
def load_gff(gff): genes = defaultdict(list) gene_exon_positions = defaultdict(lambda: defaultdict(tuple)) try: with open(gff) as g: for line in g: if line.startswith('#') or 'contig' in line: continue feature = GFF(line) gene_id = get_gene_attribute(feature, "ID") if feature.featuretype == 'exon': gene_exon_positions[feature.genename][gene_id] = (feature.start, feature.end) if feature.featuretype == 'CDS': for exon in gene_exon_positions[feature.genename]: e = gene_exon_positions[feature.genename][exon] if e[0] <= feature.start <= e[1] and e[0] <= feature.end <= e[1]: gene_id = exon + "_CDS" if gene_id is None: print("No gene id for CDS found", feature, end="") feature.id = gene_id genes[feature.genename].append(feature) except IOError: print("Failed to load GFF file {}".format(gff)) sys.exit() return genes
def load_crossmap(crossmapout): genes = defaultdict(list) gene_exon_positions = defaultdict(lambda: defaultdict(tuple)) try: with open(crossmapout) as c: for line in c: cm = None if 'fail' in line: featureline = line.split('\tfail')[0] if '->' in line: featureline, cmline = line.split('\t->\t') cm = GFF(cmline) cm.attributes += ';Note=CrossMap' feature = GFF(featureline) feature.crossmap = cm gene_id = get_gene_attribute(feature, "ID") if feature.featuretype == 'exon': gene_exon_positions[feature.genename][gene_id] = ( feature.start, feature.end) if feature.featuretype == 'CDS': for exon in gene_exon_positions[feature.genename]: e = gene_exon_positions[feature.genename][exon] if e[0] <= feature.start <= e[1] and e[ 0] <= feature.end <= e[1]: gene_id = exon + "_CDS" if feature.crossmap: # Lose CrossMap features where the CDSs aren't the same length cds_len = feature.end - feature.start + 1 cm_len = feature.crossmap.end - feature.crossmap.start + 1 if cds_len != cm_len: feature.crossmap = None if gene_id is None: print("No gene id for CDS found", feature, end="") feature.id = gene_id genes[feature.genename].append(feature) except IOError: print("Failed to load CrossMap output {}".format(crossmapout)) sys.exit() return genes
def load_crossmap(crossmapout): genes = defaultdict(list) gene_exon_positions = defaultdict(lambda: defaultdict(tuple)) try: with open(crossmapout) as c: for line in c: cm = None if 'fail' in line: featureline = line.split('\tfail')[0] if '->' in line: featureline, cmline = line.split('\t->\t') cm = GFF(cmline) cm.attributes += ';Note=CrossMap' feature=GFF(featureline) feature.crossmap = cm gene_id = get_gene_attribute(feature, "ID") if feature.featuretype == 'exon': gene_exon_positions[feature.genename][gene_id] = (feature.start, feature.end) if feature.featuretype == 'CDS': for exon in gene_exon_positions[feature.genename]: e = gene_exon_positions[feature.genename][exon] if e[0] <= feature.start <= e[1] and e[0] <= feature.end <= e[1]: gene_id = exon + "_CDS" if feature.crossmap: # Lose CrossMap features where the CDSs aren't the same length cds_len = feature.end - feature.start + 1 cm_len = feature.crossmap.end - feature.crossmap.start + 1 if cds_len != cm_len: feature.crossmap = None if gene_id is None: print("No gene id for CDS found", feature, end="") feature.id = gene_id genes[feature.genename].append(feature) except IOError: print("Failed to load CrossMap output {}".format(crossmapout)) sys.exit() return genes