Python PipelineGtfsubset示例

编程语言: Python

命名空间/包名称: CGATPipelines

hotexamples.com的示例: 20

Python PipelineGtfsubset - 已找到20个示例。这些是从开源项目中提取的最受好评的CGATPipelines.PipelineGtfsubset现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SubsetGTF(4)

buildFlatGeneSet(2)

buildGenomicContext(2)

getRepeatDataFromUCSC(2)

loadGeneInformation(2)

SubsetGFF3(1)

connectToUCSC(1)

示例#1

显示文件

def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"])

示例#2

显示文件

def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(dbhandle=connectToUCSC(),
                                            repclasses=P.asList(
                                                PARAMS["ucsc_repeattypes"]),
                                            outfile=outfile)

示例#3

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"],
        job_memory=PARAMS["job_memory"])

示例#4

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_repeattypes"]),
        outfile=outfile,
        job_memory=PARAMS["job_memory"])

示例#5

显示文件

def buildNonCodingExonTranscript(infile, outfile):
    '''
    Output of the non-coding exon features from an ENSEMBL gene set

    Remove all of the features from a :term:`gtf` file
    that are features of ``exon`` and are protein-coding

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]
    filteritem = ["exon", "protein_coding"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and not")

示例#6

显示文件

def buildCdsTranscript(infile, outfile):
    '''
    Output the CDS features from an ENSEMBL gene set

    takes all of the features from a :term:`gtf` file
    that are feature types of ``CDS``.

    Note - we have not filtered on gene_biotype because some of the CDS
    are classified as polymorphic_pseudogene.

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''

    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["CDS"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)

示例#7

显示文件

def buildLincRNAExonTranscript(infile, outfile):
    '''
    Output of the lincRNA features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``lincRNA``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]

    filteritem = ["exon", "lincRNA"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and")

示例#8

显示文件

def buildmiRNonPrimaryTranscript(infile, outfile):
    '''
    This function will subset a miRbase annotation gff3 file.The GFF3
    file can be downloaded from miRbase. Make sure the annotation matches
    the genome build that you are using.

    This function will subset the GFF3 file by selecting annotations that are
    labled "miRNA". This will subset all of the non primary transcripts.
    '''

    m = PipelineGtfsubset.SubsetGFF3(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["miRNA"]

    m.filterGFF3(outfile, filteroption, filteritem)

示例#9

显示文件

def buildExonTranscript(infile, outfile):
    '''
    Output of the exon features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``exon``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.ini
    outfile : from ruffus
       Output filename named in pipeline.ini
    filteroption : string
       Filter option set in the piepline.ini as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["exon"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)

示例#10

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile,
                                          job_memory=PARAMS["job_highmemory"])

示例#11

显示文件

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile)

示例#12

显示文件

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])

示例#13

显示文件

def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(host=PARAMS["ucsc_host"],
                                           user=PARAMS["ucsc_user"],
                                           database=PARAMS["ucsc_database"])

示例#14

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(
        host=PARAMS["ucsc_host"],
        user=PARAMS["ucsc_user"],
        database=PARAMS["ucsc_database"])

示例#15

显示文件

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])

示例#16

显示文件

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile)

示例#17

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile,
                                       job_memory=PARAMS["job_highmemory"])

示例#18

显示文件

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile)

示例#19

显示文件

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile,
                                       outfile,
                                       job_memory=PARAMS["job_highmemory"])

示例#20

显示文件

文件： pipeline_genesets.py 项目： CGATOxford/CGATPipelines

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile,
                                          job_memory=PARAMS["job_highmemory"])