Пример #1
0
def flagExonStatus(infile, outfile):
    '''
    Adds two attributes to the gtf entry:
    exon_status_locus - specifies whether the gene model is multi- or
    single-exon
    exon_status - specifies whether the transcript is mult- or single exon
    '''

    PipelineLncRNA.flagExonStatus(infile, outfile)
Пример #2
0
def buildRefcodingGeneSetStats(infile, outfile):
    '''
    counts:
    no. of transcripts
    no. genes
    average number of exons per transcript
    average number of exons per gene
    no. multi-exon transcripts
    no. single exon transcripts
    no. multi-exon genes
    no. single exon genes

    in the coding and lncRNA genesets
    '''

    # calculate exon status for refcoding genes.
    tmpf = P.getTempFilename(".") + ".gz"
    PipelineLncRNA.flagExonStatus(infile, tmpf)

    outf = iotools.openFile(outfile, "w")
    outf.write("\t".join([
        "no_transcripts", "no_genes", "no_exons_per_transcript",
        "no_exons_per_gene", "no_single_exon_transcripts",
        "no_multi_exon_transcripts", "no_single_exon_genes",
        "no_multi_exon_genes"
    ]) + "\n")
    outf.write("\t".join(
        map(str, [
            PipelineLncRNA.CounterTranscripts(tmpf).count(),
            PipelineLncRNA.CounterGenes(tmpf).count(),
            PipelineLncRNA.CounterExonsPerTranscript(tmpf).count(),
            PipelineLncRNA.CounterExonsPerGene(tmpf).count(),
            PipelineLncRNA.CounterSingleExonTranscripts(tmpf).count(),
            PipelineLncRNA.CounterMultiExonTranscripts(tmpf).count(),
            PipelineLncRNA.CounterSingleExonGenes(tmpf).count(),
            PipelineLncRNA.CounterMultiExonGenes(tmpf).count()
        ])))

    os.unlink(tmpf)
    os.unlink(tmpf + ".log")
    os.unlink(P.snip(tmpf, ".gz"))