def output_exons_as_gff(self, base_diff=6, const_only=False, cds_only=False): """ Output constitutive exons for all genes as GFF. - const_only: if True, output only constitutive exons - cds_only: if True, output CDS only exons """ exons_type = "exons" exons_outdir = self.exons_dir if const_only: exons_type = "const_exons" exons_outdir = self.const_exons_dir if cds_only: exons_basename = "%s.cds_only.%s.gff" %(self.source, exons_type) else: exons_basename = "%s.%s.gff" %(self.source, exons_type) gff_output_filename = os.path.join(exons_outdir, exons_basename) print "Outputting exons..." print " - Exons type: %s" %(exons_type) print " - Output file: %s" %(gff_output_filename) print " - CDS only: %s" %(cds_only) if os.path.isfile(gff_output_filename): print "%s exists. Skipping.." %(gff_output_filename) return # Output a map from genes to constitutive exons # for convenience genes_to_exons_fname = os.path.join(exons_outdir, exons_basename.replace(".gff", ".to_genes.txt")) gff_out = gff_utils.Writer(open(gff_output_filename, "w")) rec_type = "exon" genes_to_exons = [] genes_to_exons_header = ["gene_id", "exons"] for gene_id, gene in self.genes.iteritems(): if const_only: # Get only constitutive exons exons = gene.compute_const_exons(base_diff=base_diff, cds_only=cds_only) elif cds_only: # Get all CDS exons exons = gene.cds_parts else: # Get all exons exons = gene.parts exon_labels = [e.label for e in exons] if len(exon_labels) == 0: exon_labels = self.na_val else: exon_labels = ",".join(exon_labels) entry = {"gene_id": gene_id, "exons": exon_labels} genes_to_exons.append(entry) # Output constitutive exons to GFF file GeneModel.output_parts_as_gff(gff_out, exons, gene.chrom, gene.strand, source=self.source, rec_type=rec_type, gene_id=gene_id) genes_to_exons = pandas.DataFrame(genes_to_exons) genes_to_exons.to_csv(genes_to_exons_fname, cols=genes_to_exons_header, index=False, sep="\t")