def buildGenomicFunctionalAnnotation(infiles, outfiles): territories_gtf_file = infiles[0] PipelineGeneset.buildGenomicFunctionalAnnotation(territories_gtf_file, dbh=connect(), outfiles=outfiles)
def annotateGenome(infile, outfile): """This task only considers protein coding genes as processed_transcripts tend to cover larger genomic regions and often overlap between adjacent protein coding genes. """ PipelineGeneset.annotateGenome(infile, outfile, only_proteincoding=True)
def buildGenomicFunctionalAnnotation(infiles, outfiles): territories_gtf_file = infiles[0] PipelineGeneset.buildGenomicFunctionalAnnotation( territories_gtf_file, dbh=connect(), outfiles=outfiles, job_memory=PARAMS["job_memory"])
def annotateGeneStructure(infile, outfile): """This task only considers protein coding genes as processed_transcripts tend to cover larger genomic regions and often overlap between adjacent protein coding genes. """ PipelineGeneset.annotateGeneStructure(infile, outfile, only_proteincoding=True, job_memory=PARAMS["job_memory"])
def collectCpGIslands(infile, outfile): '''select repeats from UCSC and write to *outfile* in gff format. ''' dbhandle = PipelineGeneset.connectToUCSC() # Repeats are either stored in a single ``rmsk`` table (hg19) or in # individual ``rmsk`` tables (mm9) like chr1_rmsk, chr2_rmsk, .... # In order to do a single statement, the ucsc mysql database is # queried for tables that end in rmsk. cc = dbhandle.cursor() table = "cpgIslandExt" sql = """SELECT chrom, chromStart, chromEnd, name, obsExp FROM %(table)s """ % locals() E.debug("executing sql statement: %s" % sql) cc.execute(sql) outf = IOTools.openFile(outfile, "w") for data in cc.fetchall(): outf.write("\t".join(map(str, data)) + "\n") outf.close()
def loadGeneSetGeneInformation(infile, outfile): PipelineGeneset.loadGeneStats(infile, outfile)