Python PipelineRrbs示例

def makeSummaryPlots(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.summaryPlots(infile, outfile,
                      submit=True, job_options=job_options)
    P.touch(outfile)

示例#2

显示文件

def runBiSeq_liver(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles,
                  outfile,
                  "Liver",
                  submit=True,
                  job_options=job_options)

示例#3

显示文件

def categorisePromoterCpGs(outfile):
    '''extract promoter sequences and categorise them by CpG density'''

    RRBS.categorisePromoterCpGs(
        outfile, PARAMS["methylation_summary_genome_fasta"],
        PARAMS['annotation_database'],
        submit=True, job_memory="4G")

示例#4

显示文件

def splitClustersDataframe(infile, outfiles):
    outprefix = "subframes.dir/cluster_subframe_"
    suffix = ".tsv"

    job_options = "-l mem_free=8G -pe dedicated 1"
    RRBS.splitDataframeClusters(infile, outprefix, suffix,
                                submit=True, job_options=job_options)

示例#5

显示文件

def extractRepeatCpGs(outfile):
    '''extract repeats sequences and identify CpG locations'''

    RRBS.findRepeatCpGs(
        outfile, PARAMS["methylation_summary_genome_fasta"],
        PARAMS["annotation_repeats_gff"],
        submit=True, job_memory="4G")

示例#6

显示文件

def calculateM3DSpikeClustersPvalue(infiles, outfile):
    job_options = "-l mem_free=4G -pe dedicated 1"
    design = infiles[-1]
    infiles = infiles[:-1]
    RRBS.calculateM3DSpikepvalue(infiles, outfile, design,
                                 submit=True, job_options=job_options)
    P.touch(outfile)

示例#7

显示文件

def mergeCpGAnnotations(infiles, outfile):
    '''merge together the CpG annotations for plotting'''

    meth_inf, prom_inf, repeat_inf, hcne_inf, dmr_inf = infiles

    RRBS.mergeCpGAnnotations(meth_inf, prom_inf, repeat_inf, hcne_inf, dmr_inf,
                             outfile, submit=True, job_memory="4G")

示例#8

显示文件

def runM3D(infile, outfile, root, design):
    job_options = "-l mem_free=4G -pe dedicated 1"
    groups = [x for x in itertools.combinations(EXPERIMENTS, 2)]

    # **code repeated - refactor**
    for pair in groups:
        pair = [re.sub("-agg", "", str(x)) for x in pair]
        pair1, pair2 = pair
        pair1_split = pair1.split("-")
        pair2_split = pair2.split("-")
        # only want pairs with one difference
        # e.g treatment or tissue but not both
        if not (pair1_split[0] != pair2_split[0] and
                pair1_split[1] != pair2_split[1]):
            outfile = ("%(root)s%(pair1)s_vs_%(pair2)s.tsv"
                       % locals())
            if pair1_split[0] != pair2_split[0]:
                groups = [pair1_split[0], pair2_split[0]]
            elif pair1_split[1] != pair2_split[1]:
                groups = [pair1_split[1], pair2_split[1]]
            else:
                E.error("This pair does not contain any comparisons: %(pair)s"
                        % locals())

            RRBS.calculateM3DStat(infile, outfile, design, pair=pair,
                                  groups=groups, submit=True,
                                  job_options=job_options)

示例#9

显示文件

def runBiSeq_germline(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles,
                  outfile,
                  "Germline",
                  submit=True,
                  job_options=job_options)

示例#10

显示文件

def addTreatmentMeans(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.addTreatmentMean(infile,
                          outfile,
                          submit=True,
                          job_options=job_options)

示例#11

显示文件

def findCpGs(outfile):
    genome_infile = PARAMS["methylation_summary_genome_fasta"]
    job_options = "-l mem_free=2G"

    RRBS.fasta2CpG(genome_infile,
                   outfile,
                   submit=True,
                   job_options=job_options)

示例#12

显示文件

def runM3DSpikeClusters(infiles, outfile):
    job_options = "-l mem_free=4G -pe dedicated 1"
    infile, design = infiles
    RRBS.calculateM3DStat(infile,
                          outfile,
                          design,
                          submit=True,
                          job_options=job_options)

示例#13

显示文件

def clusterSpikeInsPowerAnalysis(infiles, outfile):

    job_options = "-l mem_free=23G"

    RRBS.spikeInClustersAnalysis(infiles,
                                 outfile,
                                 submit=True,
                                 job_options=job_options)

示例#14

显示文件

def extractDMRCpGs(outfile):
    '''extract sequences for Highly conserved non-coding element and
    identify CpG locations'''

    RRBS.findCpGsFromBed(
        outfile, PARAMS["methylation_summary_genome_fasta"],
        PARAMS["annotation_dmr"], "DMR", both_strands=True,
        submit=True, job_memory="4G")

示例#15

显示文件

def subsetCpGsToCovered(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.subsetToCovered(infile,
                         outfile,
                         cov_threshold=10,
                         submit=True,
                         job_options=job_options)

示例#16

显示文件

def plotReadBias(infile, outfile):
    job_options = "-l mem_free=1G"

    m_bias_infile = P.snip(infile, ".bismark.cov") + ".M-bias.txt"

    print(m_bias_infile)

    RRBS.plotReadBias(m_bias_infile, outfile,
                      submit=True, job_options=job_options)

示例#17

显示文件

def mergeCoverage(infiles, outfile):
    cpgs_infile = infiles[-1]
    coverage_infiles = infiles[:-1]
    # this should be replaced with a non-pandas based solution
    # very memory intensive! - find out why and re-code
    job_options = "-l mem_free=48G"
    job_threads = 2

    RRBS.mergeAndDrop(cpgs_infile, coverage_infiles, outfile,
                      submit=True, job_options=job_options)

示例#18

显示文件

def calculateM3DClustersPvalue(infiles, outfile, pair1, pair2):
    job_options = "-l mem_free=4G -pe dedicated 1"
    infiles = infiles[:-1]
    print("pair1: %s" % pair1)
    print("pair2: %s" % pair2)
    pair = [pair1, pair2]

    print(infiles, outfile, pair)
    RRBS.calculateM3Dpvalue(infiles, outfile, pair,
                            submit=True, job_options=job_options)

示例#19

显示文件

def addCpGIs(infiles, outfile):
    infile, CpGI = infiles
    # TS: still memory intensive even after supplying data types
    # for all columns!
    # this should be replaced with a non-pandas based solution
    job_memory = "40G"
    job_threads = 1

    RRBS.pandasMerge(infile, CpGI, outfile, merge_type="left",
                     left=['contig', 'position'],
                     right=['contig', 'position'],
                     submit=True, job_memory=job_memory)

示例#20

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def subsetCpGsToCovered(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.subsetToCovered(infile, outfile, cov_threshold=10,
                         submit=True, job_options=job_options)

示例#21

显示文件

def plotMethylationFrequency(infile, outfile):
    RRBS.plotMethFrequency(infile, outfile, job_memory="2G", submit=True)

示例#22

显示文件

def plotCpGAnnotations(infile, outfiles):
    ''' make histogram and boxplots for the CpGs facetted per annotation'''
    outfile_hist, outfile_box = outfiles
    RRBS.plotCpGAnnotations(infile, outfile_hist, outfile_box)

示例#23

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def runBiSeq_liver(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles, outfile, "Liver",
                  submit=True, job_options=job_options)

示例#24

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def addTreatmentMeans(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.addTreatmentMean(infile, outfile,
                          submit=True, job_options=job_options)

示例#25

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def runM3DSpikeClusters(infiles, outfile):
    job_options = "-l mem_free=4G -pe dedicated 1"
    infile, design = infiles
    RRBS.calculateM3DStat(infile, outfile, design,
                          submit=True, job_options=job_options)

示例#26

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def clusterSpikeInsPowerAnalysis(infiles, outfile):

    job_options = "-l mem_free=23G"

    RRBS.spikeInClustersAnalysis(infiles, outfile,
                                 submit=True, job_options=job_options)

示例#27

显示文件

def summariseM3D(infile, outfile):
    ''' summarise the number of cluster passing threshold'''
    # adjusted p-value threshold
    threshold = 0.05
    print(infile, outfile, threshold)
    RRBS.summariseM3D(infile, outfile, threshold, submit=True)

示例#28

显示文件

def plotCoverage(infile, outfiles):
    RRBS.plotCoverage(infile, outfiles, submit=True, job_memory="6G")

示例#29

显示文件

def calculateCoverage(infile, outfile):
    RRBS.calculateCoverage(infile, outfile, submit=True, job_memory="2G")

示例#30

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def runBiSeq_germline(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles, outfile, "Germline",
                  submit=True, job_options=job_options)

示例#31

显示文件

文件： pipeline_rrbs.py 项目： CGATOxford/CGATPipelines

def findCpGs(outfile):
    genome_infile = PARAMS["methylation_summary_genome_fasta"]
    job_options = "-l mem_free=2G"

    RRBS.fasta2CpG(genome_infile, outfile,
                   submit=True, job_options=job_options)