Пример #1
0
def buildGenomicFunctionalAnnotation(infiles, outfiles):

    territories_gtf_file = infiles[0]

    PipelineGeneset.buildGenomicFunctionalAnnotation(territories_gtf_file,
                                                     dbh=connect(),
                                                     outfiles=outfiles)
Пример #2
0
def annotateGenome(infile, outfile):
    """This task only considers protein coding genes as
    processed_transcripts tend to cover larger genomic regions and
    often overlap between adjacent protein coding genes.

    """
    PipelineGeneset.annotateGenome(infile, outfile, only_proteincoding=True)
Пример #3
0
def buildGenomicFunctionalAnnotation(infiles, outfiles):

    territories_gtf_file = infiles[0]

    PipelineGeneset.buildGenomicFunctionalAnnotation(
        territories_gtf_file,
        dbh=connect(),
        outfiles=outfiles,
        job_memory=PARAMS["job_memory"])
Пример #4
0
def annotateGeneStructure(infile, outfile):
    """This task only considers protein coding genes as
    processed_transcripts tend to cover larger genomic regions and
    often overlap between adjacent protein coding genes.

    """
    PipelineGeneset.annotateGeneStructure(infile,
                                          outfile,
                                          only_proteincoding=True,
                                          job_memory=PARAMS["job_memory"])
Пример #5
0
def collectCpGIslands(infile, outfile):
    '''select repeats from UCSC and write to *outfile* in gff format.
    '''

    dbhandle = PipelineGeneset.connectToUCSC()

    # Repeats are either stored in a single ``rmsk`` table (hg19) or in
    # individual ``rmsk`` tables (mm9) like chr1_rmsk, chr2_rmsk, ....
    # In order to do a single statement, the ucsc mysql database is
    # queried for tables that end in rmsk.
    cc = dbhandle.cursor()
    table = "cpgIslandExt"
    sql = """SELECT chrom, chromStart, chromEnd, name, obsExp
               FROM %(table)s
    """ % locals()
    E.debug("executing sql statement: %s" % sql)
    cc.execute(sql)
    outf = IOTools.openFile(outfile, "w")
    for data in cc.fetchall():
        outf.write("\t".join(map(str, data)) + "\n")

    outf.close()
Пример #6
0
def collectCpGIslands(infile, outfile):
    '''select repeats from UCSC and write to *outfile* in gff format.
    '''

    dbhandle = PipelineGeneset.connectToUCSC()

    # Repeats are either stored in a single ``rmsk`` table (hg19) or in
    # individual ``rmsk`` tables (mm9) like chr1_rmsk, chr2_rmsk, ....
    # In order to do a single statement, the ucsc mysql database is
    # queried for tables that end in rmsk.
    cc = dbhandle.cursor()
    table = "cpgIslandExt"
    sql = """SELECT chrom, chromStart, chromEnd, name, obsExp
               FROM %(table)s
    """ % locals()
    E.debug("executing sql statement: %s" % sql)
    cc.execute(sql)
    outf = IOTools.openFile(outfile, "w")
    for data in cc.fetchall():
        outf.write("\t".join(map(str, data)) + "\n")

    outf.close()
Пример #7
0
def loadGeneSetGeneInformation(infile, outfile):
    PipelineGeneset.loadGeneStats(infile, outfile)
Пример #8
0
def loadGeneSetGeneInformation(infile, outfile):
    PipelineGeneset.loadGeneStats(infile, outfile)