Exemplo n.º 1
0
def runBiSeq_liver(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles,
                  outfile,
                  "Liver",
                  submit=True,
                  job_options=job_options)
Exemplo n.º 2
0
def runM3D(infile, outfile, root, design):
    job_options = "-l mem_free=4G -pe dedicated 1"
    groups = [x for x in itertools.combinations(EXPERIMENTS, 2)]

    # **code repeated - refactor**
    for pair in groups:
        pair = [re.sub("-agg", "", str(x)) for x in pair]
        pair1, pair2 = pair
        pair1_split = pair1.split("-")
        pair2_split = pair2.split("-")
        # only want pairs with one difference
        # e.g treatment or tissue but not both
        if not (pair1_split[0] != pair2_split[0]
                and pair1_split[1] != pair2_split[1]):
            outfile = ("%(root)s%(pair1)s_vs_%(pair2)s.tsv" % locals())
            if pair1_split[0] != pair2_split[0]:
                groups = [pair1_split[0], pair2_split[0]]
            elif pair1_split[1] != pair2_split[1]:
                groups = [pair1_split[1], pair2_split[1]]
            else:
                E.error(
                    "This pair does not contain any comparisons: %(pair)s" %
                    locals())

            RRBS.calculateM3DStat(infile,
                                  outfile,
                                  design,
                                  pair=pair,
                                  groups=groups,
                                  submit=True,
                                  job_options=job_options)
Exemplo n.º 3
0
def runBiSeq_germline(infiles, outfile):
    job_options = "-l mem_free=10G -pe dedicated 10"
    RRBS.runBiSeq(infiles,
                  outfile,
                  "Germline",
                  submit=True,
                  job_options=job_options)
Exemplo n.º 4
0
def clusterSpikeInsPowerAnalysis(infiles, outfile):

    job_options = "-l mem_free=23G"

    RRBS.spikeInClustersAnalysis(infiles,
                                 outfile,
                                 submit=True,
                                 job_options=job_options)
Exemplo n.º 5
0
def runM3DSpikeClusters(infiles, outfile):
    job_options = "-l mem_free=4G -pe dedicated 1"
    infile, design = infiles
    RRBS.calculateM3DStat(infile,
                          outfile,
                          design,
                          submit=True,
                          job_options=job_options)
Exemplo n.º 6
0
def findCpGs(outfile):
    genome_infile = PARAMS["methylation_summary_genome_fasta"]
    job_options = "-l mem_free=2G"

    RRBS.fasta2CpG(genome_infile,
                   outfile,
                   submit=True,
                   job_options=job_options)
Exemplo n.º 7
0
def addTreatmentMeans(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.addTreatmentMean(infile,
                          outfile,
                          submit=True,
                          job_options=job_options)
Exemplo n.º 8
0
def extractRepeatCpGs(outfile):
    '''extract repeats sequences and identify CpG locations'''

    RRBS.findRepeatCpGs(outfile,
                        PARAMS["methylation_summary_genome_fasta"],
                        PARAMS["annotation_repeats_gff"],
                        submit=True,
                        job_memory="4G")
Exemplo n.º 9
0
def categorisePromoterCpGs(outfile):
    '''extract promoter sequences and categorise them by CpG density'''

    RRBS.categorisePromoterCpGs(outfile,
                                PARAMS["methylation_summary_genome_fasta"],
                                PARAMS['annotation_database'],
                                submit=True,
                                job_memory="4G")
Exemplo n.º 10
0
def subsetCpGsToCovered(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.subsetToCovered(infile,
                         outfile,
                         cov_threshold=10,
                         submit=True,
                         job_options=job_options)
Exemplo n.º 11
0
def calculateM3DSpikeClustersPvalue(infiles, outfile):
    job_options = "-l mem_free=4G -pe dedicated 1"
    design = infiles[-1]
    infiles = infiles[:-1]
    RRBS.calculateM3DSpikepvalue(infiles,
                                 outfile,
                                 design,
                                 submit=True,
                                 job_options=job_options)
    P.touch(outfile)
Exemplo n.º 12
0
def splitClustersDataframe(infile, outfiles):
    outprefix = "subframes.dir/cluster_subframe_"
    suffix = ".tsv"

    job_options = "-l mem_free=8G -pe dedicated 1"
    RRBS.splitDataframeClusters(infile,
                                outprefix,
                                suffix,
                                submit=True,
                                job_options=job_options)
Exemplo n.º 13
0
def extractDMRCpGs(outfile):
    '''extract sequences for Highly conserved non-coding element and
    identify CpG locations'''

    RRBS.findCpGsFromBed(outfile,
                         PARAMS["methylation_summary_genome_fasta"],
                         PARAMS["annotation_dmr"],
                         "DMR",
                         both_strands=True,
                         submit=True,
                         job_memory="4G")
Exemplo n.º 14
0
def plotReadBias(infile, outfile):
    job_options = "-l mem_free=1G"

    m_bias_infile = P.snip(infile, ".bismark.cov") + ".M-bias.txt"

    print(m_bias_infile)

    RRBS.plotReadBias(m_bias_infile,
                      outfile,
                      submit=True,
                      job_options=job_options)
Exemplo n.º 15
0
def mergeCoverage(infiles, outfile):
    cpgs_infile = infiles[-1]
    coverage_infiles = infiles[:-1]
    # this should be replaced with a non-pandas based solution
    # very memory intensive! - find out why and re-code
    job_options = "-l mem_free=48G"
    job_threads = 2

    RRBS.mergeAndDrop(cpgs_infile,
                      coverage_infiles,
                      outfile,
                      submit=True,
                      job_options=job_options)
Exemplo n.º 16
0
def calculateM3DClustersPvalue(infiles, outfile, pair1, pair2):
    job_options = "-l mem_free=4G -pe dedicated 1"
    infiles = infiles[:-1]
    print("pair1: %s" % pair1)
    print("pair2: %s" % pair2)
    pair = [pair1, pair2]

    print(infiles, outfile, pair)
    RRBS.calculateM3Dpvalue(infiles,
                            outfile,
                            pair,
                            submit=True,
                            job_options=job_options)
Exemplo n.º 17
0
def mergeCpGAnnotations(infiles, outfile):
    '''merge together the CpG annotations for plotting'''

    meth_inf, prom_inf, repeat_inf, hcne_inf, dmr_inf = infiles

    RRBS.mergeCpGAnnotations(meth_inf,
                             prom_inf,
                             repeat_inf,
                             hcne_inf,
                             dmr_inf,
                             outfile,
                             submit=True,
                             job_memory="4G")
Exemplo n.º 18
0
def addCpGIs(infiles, outfile):
    infile, CpGI = infiles
    # TS: still memory intensive even after supplying data types
    # for all columns!
    # this should be replaced with a non-pandas based solution
    job_memory = "40G"
    job_threads = 1

    RRBS.pandasMerge(infile,
                     CpGI,
                     outfile,
                     merge_type="left",
                     left=['contig', 'position'],
                     right=['contig', 'position'],
                     submit=True,
                     job_memory=job_memory)
Exemplo n.º 19
0
def plotCoverage(infile, outfiles):
    RRBS.plotCoverage(infile, outfiles, submit=True, job_memory="6G")
Exemplo n.º 20
0
def makeSummaryPlots(infile, outfile):

    job_options = "-l mem_free=48G"

    RRBS.summaryPlots(infile, outfile, submit=True, job_options=job_options)
    P.touch(outfile)
Exemplo n.º 21
0
def plotMethylationFrequency(infile, outfile):
    RRBS.plotMethFrequency(infile, outfile, job_memory="2G", submit=True)
Exemplo n.º 22
0
def plotCpGAnnotations(infile, outfiles):
    ''' make histogram and boxplots for the CpGs facetted per annotation'''
    outfile_hist, outfile_box = outfiles
    RRBS.plotCpGAnnotations(infile, outfile_hist, outfile_box)
Exemplo n.º 23
0
def calculateCoverage(infile, outfile):
    RRBS.calculateCoverage(infile, outfile, submit=True, job_memory="2G")
Exemplo n.º 24
0
def summariseM3D(infile, outfile):
    ''' summarise the number of cluster passing threshold'''
    # adjusted p-value threshold
    threshold = 0.05
    print(infile, outfile, threshold)
    RRBS.summariseM3D(infile, outfile, threshold, submit=True)