Exemplo n.º 1
0
def uniqStat(bams):
    outs = []
    for bam in bams:
        cmd = ""
        log.run("stat unique mapped reads", cmd, para=2)
        outs.append(out)
    return outs
Exemplo n.º 2
0
def statCov(parms):
    '''statistic coverage of every region and every base site
    
    Args:
        parms (dict) : which has the following keys::
    
            {
                bams: a list, [[bam1, prefix1],[bam2, prefix2], ...]
                bed : bed file
            }
    
    Returns:
        dict : ``{"regionStats":"outputs_of_regionStat", "baseStats":"outputs_of_baseStat"}``
    '''
    bams = parms['bams']
    bed = parms['bed']
    region_outs = []
    base_outs = []
    for bam, prefix in bams:
        out = prefix + '.coverage.region.txt'
        cmd = "%s depth region -L %s %s -o %s" % (sambamba, bed, bam, out)
        log.run("stat coverage", cmd, para=2)
        region_outs.append(out)
        out = prefix + '.coverage.base.txt'
        cmd = "%s depth base -L %s %s -o %s" % (sambamba, bed, bam, out)
        log.run("stat coverage", cmd, para=2)
        base_outs.append(out)
    return {'regionStats': region_outs, 'baseStats': base_outs}
Exemplo n.º 3
0
def data_cycle(params):

    anno2conf = params.copy()
    del anno2conf["sv_annos"]
    sv_param = params["sv_annos"]

    circos_res = {}
    for key, value in sv_param.items():
        anno2conf["svinp"] = value
        anno2conf["prefix"] = str(key)

        ### filing the params to the input of transfer json file
        jsonfile = anno2conf['prefix'] + "_transfer.json"
        conf = anno2conf['prefix'] + '_circos.sv.conf'
        png = anno2conf['prefix'] + "_circos.sv.png"
        tpl = anno2conf['circos_sv_tmp']
        paramstr = json.dumps(anno2conf)
        cmd1 = "echo '%s' > %s " % (paramstr, jsonfile)
        log.run("produce a sv json file", cmd1, i=None, o=[jsonfile])

        circos_res[key] = {}
        circos_res[key]["conf"] = conf
        circos_res[key]["png"] = png
        ### wirte the command to the log file
        get_cmd(tpl, jsonfile)

    ### return a list
    return circos_res
Exemplo n.º 4
0
def sam2bam(sam, prefix):
    bam = prefix + '.raw.bam'
    cmd = "%s view -bS %s > %s" % (samtools, sam, bam)
    #print cmd
    tag = "sam2bam"
    log.run(tag, cmd)
    return bam
Exemplo n.º 5
0
def aln(parms):
    """bwa aligner
    
    Args:
        parms (dict) : which has the following keys:: 

            {
                fq1      : the first fastq file
                fq2      : the second fastq file
                prefix   : prefix of output file
                reference: reference file for bwa
                args     : extra args for map
            }
 
    Returns:
        dict: ``{"bam":"bam_file"}``
    """
    fq1 = parms['fq1']
    fq2 = parms['fq2']
    prefix = parms['prefix']
    ref = parms['reference']
    args = parms['args']
    if fq2:
        sam = DoubleQ(fq1, fq2, prefix, ref, args)
    else:
        sam = SingleQ(fq1, prefix, ref, args)
    bam = prefix + '.bam'
    cmd = "%s view -bS %s > %s" % (samtools, sam, bam)
    log.run('sam to bam', cmd)
    return {'bam': bam}
Exemplo n.º 6
0
def report(params):
    """ enrichreport to markdown file and html file

    Args: report input dict, key is `yaml`, value is yaml file path::

            "xx": path of xx.

    Returns:
        dict : key is `yaml`,value is yaml file path
    """
    # handle input
    yamlin = params["yaml"]
    indict = yaml.load(open(yamlin))

    enrichreportl = get_template("enrichreport")
    out = "enrichreport.md"
    cmd = "%s -t %s -j %s -o %s -y" % (render, enrichreportl, yamlin, out)
    log.run("render enrichreport template", cmd)

    cmd = "%s %s" % (md2html, out)
    log.run("md2html enrichreport ", cmd, o=["html.tgz"])
    outdict = {}
    outdict["enrichreport"] = out
    yamlout = yamladd(yamlin, outdict)
    yamlout["enrichreport_outdir"] = os.getcwd()
    return yamlout
Exemplo n.º 7
0
def SingleQ(fq1, prefix, ref, parms):
    sam = prefix + ".bwa.raw.sam"
    cmd = '%s mem %s -R "@RG\\tID:%s\\tSM:%s\\tLB:%s\\tPL:ILLUMINA" %s %s  > %s ' % (
        bwa, parms, prefix, prefix, prefix, ref, fq1, sam)
    #print cmd
    tag = "bwa mem"
    log.run(tag, cmd)
    return sam
Exemplo n.º 8
0
def flagstat(bams, suffix):
    stats = []
    for bam, prefix in bams:
        out = prefix + '.' + suffix
        cmd = "%s flagstat %s > %s" % (samtools, bam, out)
        log.run("mapping stat", cmd, para=2)
        stats.append(out)
    return stats
Exemplo n.º 9
0
def get_cmd(tpl, jsonfile):

    cmd2 = "%s %s" % (sv_filing, jsonfile)
    tag2 = "use the sv_filing script to calculate the MM value"
    log.run(tag2, cmd2, i=[jsonfile], o=[jsonfile])

    cmd3 = "%s %s" % (sv_proconf, jsonfile)
    tag3 = "get the conf file of circos"
    log.run(tag3, cmd3, i=[tpl, jsonfile], o=[jsonfile])
Exemplo n.º 10
0
def DoubleQ(fq1, fq2, prefix, ref, parms):
    sam = prefix + ".bwa.raw.sam"
    #logFile = prefix + ".bwa.align.log"
    cmd = '%s mem %s -R "@RG\\tID:%s\\tSM:%s\\tLB:%s\\tPL:ILLUMINA" %s %s %s > %s ' % (
        bwa, parms, prefix, prefix, prefix, ref, fq1, fq2, sam)
    #print cmd
    tag = "bwa mem"
    log.run(tag, cmd)
    return sam
Exemplo n.º 11
0
def data_cycle(params):
    
    circos_res = params['circos_res']    
    for patient,value in circos_res.items():
        conf = value['conf']
        png = value['png']
        cmd = "perl %s -conf %s" % (circostool,conf)
        tag = "Get the cmd of circos:"
        log.run(tag, cmd, i=[conf], o=[png])
Exemplo n.º 12
0
def genTempltRendrParms(mapRateFile, meanCovFile, nxs):
    out_dict = {}
    out_dict['mappingRateStat'] = mapRateFile
    out_dict['targetRegionCovStat'] = meanCovFile
    out_dict['nXimages'] = nxs
    outfile = "mapping_template.json"
    jstr = json.dumps(out_dict)
    cmd = "echo '%s' > %s " % (jstr, outfile)
    log.run('generate template json', cmd)
    return outfile
Exemplo n.º 13
0
def statCov(bams, bed):
    region_outs = []
    base_outs = []
    for bam, prefix in bams:
        out = prefix + '.coverage.region.txt'
        cmd = "%s depth region -L %s %s -o %s" % (sambamba, bed, bam, out)
        log.run("stat coverage", cmd, para=2)
        region_outs.append(out)
        out = prefix + '.coverage.base.txt'
        cmd = "%s depth base -L %s %s -o %s" % (sambamba, bed, bam, out)
        log.run("stat coverage", cmd, para=2)
        base_outs.append(out)
    return region_outs, base_outs
Exemplo n.º 14
0
def report(parms):
    ijson = parms['templtJson']
    enrichTemplt = parms['template']
    if enrichTemplt.startswith("http://"):
        enrichTemplt = get_templt(enrichTemplt)

    try:
        targetDir = parms['resultsDirectory']
    except:
        targetDir = './'
    out = os.path.join(targetDir, "geneEnrich_report.md")
    cmd = "%s -t %s -j %s -o %s" % (render, enrichTemplt, ijson, out)
    log.run("generating cancer drug report templete", cmd)
    cmd = "%s %s" % (md2html, out)
    log.run("generating mapping report", cmd)
Exemplo n.º 15
0
def indexs(parms):
    '''index bam
    
    Args:
        parms (dict) : which has the following keys::
        
            {
                bam: bam file
            }

    Returns: null
    '''
    bam = parms['bam']
    cmd = "%s index %s" % (sambamba, bam)
    log.run('bam index', cmd)
Exemplo n.º 16
0
def dedups(parms):
    '''dedup bam
        
    Args:
        parms (dict) : which has the following keys::
        
            {
                bam   : bam file
                prefix: prefix of output
            }
    
    Returns:
        dict : ``{"bam":"bam_dedup", "prefix":"prefix"}``
    '''
    bam = parms['bam']
    prefix = parms['prefix']
    bam_dedup = prefix + ".dedup.bam"
    cmd = "java -jar %s INPUT=%s OUTPUT=%s REMOVE_DUPLICATES=true AS=true VALIDATION_STRINGENCY=SILENT M=%s " % (
        picard_dedup, bam, bam_dedup, prefix + '.MarkDuplicates.stat')
    log.run('dedup bam', cmd)
    return {'bam': bam_dedup, 'prefix': prefix}
Exemplo n.º 17
0
def get_cmd(tpl, jsonfile):

    cmd2 = "%s %s" % (snv_filing, jsonfile)
    tag2 = "use the snv_filing script to get the separated file"
    log.run(tag2, cmd2, i=[jsonfile], o=[jsonfile])

    cmd3 = "%s %s" % (snv_count, jsonfile)
    tag3 = "use the snv_count script to get the counted file"
    log.run(tag3, cmd3, i=[jsonfile], o=[jsonfile])

    cmd4 = "%s %s" % (cnv_filing, jsonfile)
    tag4 = "use the cnv_filing script to calculate the MM value"
    log.run(tag4, cmd4, i=[jsonfile], o=[jsonfile])

    cmd5 = "%s %s" % (sv_filing, jsonfile)
    tag5 = "use the sv_filing script to get the circos input"
    log.run(tag5, cmd5, i=[jsonfile], o=[jsonfile])

    cmd6 = "%s %s" % (multi_proconf, jsonfile)
    tag6 = "get the conf file of circos"
    log.run(tag6, cmd6, i=[tpl, jsonfile], o=[jsonfile])
Exemplo n.º 18
0
def report(params):
    """ circos_report to markdown file and html file

    Args: report input dict, key is `yaml`, value is yaml file path::

            "xx": path of xx.

    Returns:
        dict : key is `yaml`,value is yaml file path
    """
    # handle input
    yamlin = params["yaml"]
    indict = yaml.load(open(yamlin))

    yamlfile = "circos_report.json"
    paramstr = json.dumps(indict)
    cmd1 = "echo '%s' > %s " % (paramstr, yamlfile)
    tag1 = "make sure the existed yaml file"
    log.run("tag1", cmd1, i=None, o=[yamlfile])

    templ = get_template("circos_report")
    out = "circos_report.md"
    cmd = "%s -t %s -j %s -o %s -y" % (render, templ, yamlfile, out)
    log.run("render circos_report template", cmd)

    cmd = "%s %s" % (md2html, out)
    log.run("md2html circos_report ", cmd)
    outdict = {}
    outdict["circos_report"] = out
    yamlout = yamladd(yamlin, outdict)
    yamlout["circos_report_outdir"] = os.getcwd()
    return yamlout
Exemplo n.º 19
0
def sorts(parms):
    '''sorting bam file
        
    Args:
        parms (dict) : which has the following keys::
        
            {
                bam   : bam file
                prefix: prefix of output
                args  : args of sambamba sort
            }
    
    Returns:
        dict : ``{"bam":"bam_sort", "prefix":"prefix"}``
    '''
    bam = parms['bam']
    prefix = parms['prefix']
    args = parms['args']
    bam_sort = prefix + ".sort.bam"
    cmd = "%s sort %s %s -o %s " % (sambamba,bam,args, bam_sort)
    log.run('sort bam', cmd)
    return {"bam":bam_sort, 'prefix': prefix}
Exemplo n.º 20
0
def intersects(parms):
    '''intersect bam with bed file
    
    Args:
        parms (dict) : which has the following keys::
        
            {
                bam   : bam file
                bed   : bed file
                prefix: prefix of output
            }
    
    Returns:
        dict : ``{"bam":"bam_target", "prefix":"prefix", "bed":"bed"}``
    '''
    bam = parms['bam']
    bed = parms['bed']
    prefix = parms['prefix']
    bam_target = prefix + ".target.bam"
    cmd = "%s -abam %s -b %s -wa -u > %s" % (intersectBed, bam, bed,
                                             bam_target)
    log.run('intersect bam', cmd)
    return {'bam': bam_target, 'prefix': prefix, 'bed': bed}
Exemplo n.º 21
0
def report(parms):
    '''Generating report

    Args:
        parms (dict) : which has the following keys::
        
            {
                template  : template of report
                templtJson: json file, input parameter for template
            }
    
    Returns:
        dict : ``{"outfile":"report"}``
    '''
    mappingTemplt = parms['template']
    if mappingTemplt.startswith("http://"):
        mappingTemplt = get_templt(mappingTemplt)
    ijson = parms['templtJson']
    out = "mapping_report.md"
    cmd = "%s -t %s -j %s -o %s" % (render, mappingTemplt, ijson, out)
    log.run("generating mapping report templete", cmd)
    cmd = "%s %s" % (md2html, out)
    log.run("generating mapping report", cmd)
    return {'outfile': out}
Exemplo n.º 22
0
def statMappingRate(parms):
    '''statistic bam files
    
    Args:
        parms (dict) : which has the following keys::
        
            {
                sortBams   : a list, [[bam1, prefix1],[bam2, prefix2], ...]
                dedupBams  : a list, [[bam1, prefix1],[bam2, prefix2], ...]
                targetBams : a list, [[bam1, prefix1],[bam2, prefix2], ...]
                bed        : bed file
                samples    : a list, prefixs of bams
            }

    Returns:
        dict : ``{"regionStats": "region_covs", "mapRateFile":"mapRateFile", "meanCovFile":"meanCovFile", "nXs": "nxs"}``
    '''
    sortbams = parms['sortBams']
    dedupbams = parms['dedupBams']
    targetbams = parms['targetBams']
    bed = parms['bed']
    samples = parms['samples']
    sstats = flagstat(sortbams, 'sort.mapping.stat')
    dstats = flagstat(dedupbams, 'dedup.mapping.stat')
    tstats = flagstat(targetbams, 'target.mapping.stat')
    region_covs, base_covs = statCov(dedupbams, bed)
    mapDir = 'report/mapping'
    nXdir = 'report/mapping/nX'
    mapRateFile = os.path.join(mapDir, "readsMappingRateStat.xlsx")
    cmd = "%s %s %s %s %s %s" % (mapRate, mapRateFile, '-'.join(sstats),
                                 '-'.join(dstats), '-'.join(tstats),
                                 '-'.join(samples))
    log.run("mapping rate stats", cmd)

    meanCovFile = os.path.join(mapDir, "AllFile.mean.coverage.xlsx")
    cmd = "%s %s %s %s " % (covFormat, '-'.join(base_covs), meanCovFile,
                            '-'.join(samples))
    log.run('bam coverage stats', cmd)

    nxs = []
    for sample in samples:
        f = sample + '.cov.txt'
        out1 = os.path.join(nXdir, sample + '.region.coverage.png')
        out2 = os.path.join(nXdir, sample + '.region.coverage.pdf')
        cmd = "%s %s %s %s" % (nXplot, f, os.path.join(nXdir, sample), sample)
        log.run('plot target region coverage rate', cmd)
        nxs.append(out1)

    res = {
        'regionStats': region_covs,
        'mapRateFile': mapRateFile,
        'meanCovFile': meanCovFile,
        'nXs': nxs
    }
    return res
Exemplo n.º 23
0
def get_cmd(tpl, jsonfile):

    cmd2 = "%s %s" % (snv_filing, jsonfile)
    tag2 = "use the snv_filing script to get the separated file"
    log.run(tag2, cmd2, i=[jsonfile], o=[jsonfile])

    cmd3 = "%s %s" % (snv_count, jsonfile)
    tag3 = "use the snv_count script to get the counted file"
    log.run(tag3, cmd3, i=[jsonfile], o=[jsonfile])

    cmd4 = "%s %s" % (snv_proconf, jsonfile)
    tag4 = "get the conf file of circos"
    log.run(tag4, cmd4, i=[tpl, jsonfile], o=[jsonfile])
Exemplo n.º 24
0
def enrich(parms):
    enrichFile = parms['enrichFile']
    prefix = parms['prefix']
    cmd = "%s %s %s" % (funcAnnoGO, enrichFile, prefix)
    log.run('func annotation', cmd)
    cmd = "%s %s %s" % (enrichGO, enrichFile, prefix)
    log.run('GO enrich analysis', cmd)
    cmd = "%s %s %s" % (enrichKEGG, enrichFile, prefix)
    log.run('KEGG enrich analysis', cmd)
    #func_outs = []
    #suffixs = ['go.CC.bar.func.png', 'go.MF.bar.func.png', 'go.BP.bar.func.png']
    #for item in suffixs:
    #   func_outs.append(prefix+'.'+item)
    #goEnrich_outs = []
    #suffixs = ['go.CC.net.enrich.png', 'go.MF.net.enrich.png', 'go.BP.net.enrich.png', 'go.CC.dot.enrich.png', 'go.MF.dot.enrich.png', 'go.BP.dot.enrich.png']
    #for item in suffixs:
    #    goEnrich_outs.append(prefix+'.'+item)
    #kegg_out = prefix+'.kegg.enrich.png'
    func = prefix + '.func.go.txt'
    go = prefix + '.enrich.go.txt'
    kegg = prefix + '.enrich.kegg.txt'
    out_dict = {'func': func, 'go': go, 'kegg': kegg}
    return out_dict
Exemplo n.º 25
0
def report(ymlfile):
    """ {{projName}} to markdown file and html file

    """
    # handle input
    indict = yaml.load(open(ymlfile))

    render_yml = "{{projName}}_render.yml"
    cmd = "echo '%s' > %s" % (json.dumps(indict),render_yml)
    log.run("get {{projName}} args to render",cmd)

    templ = get_template("{{projName}}")
    out = "{{projName}}.md"
    cmd = "%s -t %s -j %s -o %s -y" % (render,templ,render_yml,out)
    log.run("render {{projName}} template",cmd,docker="jbioi/report",singularity="report.img")
    
    cmd = "%s %s" % (md2html,out)
    log.run("md2html {{projName}} ",cmd,docker="jbioi/report",singularity="report.img")
    outdict = {}
    outdict["{{projName}}"] = out
    yamlout = yamladd(yamlin,outdict)
    yamlout["{{projName}}_outdir"] = os.getcwd()
    return ymlfile
Exemplo n.º 26
0
def arranger(parms):
    '''Arranging final results and generating report directory
    
    Args:
        parms (dict) : which has the following keys::

            {
                regionStats: a list, results of sambamba depth region
                mapRateFile: statistics of BAM mapping information
                meanCovFile: summary of mean coverage for bams
                nXs        : a list, plots of bam coverage
            }   

    Reruens:
        dict : ``{"templtJson":"templtParms"}``
    '''
    regionStats = parms['regionStats']
    mapRateFile = parms['mapRateFile']
    meanCovFile = parms['meanCovFile']
    nxs = parms['nXs']
    mapDir = 'report/mapping'
    nXdir = os.path.join(mapDir, 'nX')
    mapStat = os.path.join(mapDir, 'mapStat')
    if not os.path.exists(nXdir):
        cmd = "mkdir -p %s" % nXdir
        log.run('mkdir', cmd)
    if not os.path.exists(nXdir):
        cmd = "mkdir -p %s" % mapStat
        log.run('mkdir', cmd)
    for region in regionStats:
        cmd = 'mv %s %s' % (region, mapStat)
        log.run('mv coverage stat files', cmd)

    templtParms = genTempltRendrParms(mapRateFile, meanCovFile, nxs)

    return {'templtJson': templtParms}
Exemplo n.º 27
0
def enrich(parms):
    '''Gene GO functional annotation, GO enrichment and KEGG enrichment

    Args:
        parms (dict) : which has the following keys::
            
            {
                yaml : a yaml file including parms for gene enrich analysis
            }

    Returns:
        dict : ``{"yaml": yamlfile for arrange and report}``
    '''

    yaml_file = parms["yaml"]
    fp = open(yaml_file)
    enrich_dict = yaml.load(fp.read())
    fp.close()

    files = enrich_dict['enrichFiles']
    func_pdfs, func_csvs, func_xls = [], [], []
    go_dot_pdfs, go_net_pdfs, go_csvs, go_csvs_all, go_xls, go_xls_all = [], [], [], [], [], []
    kegg_pdfs, kegg_csvs, kegg_csvs_all, kegg_xls, kegg_xls_all = [], [], [], [], []
    id_files = []

    for prefix, enrichFile in files.items():
        # check file
        #
        with open(enrichFile, 'r') as f:
            head = f.readline().split('\t')
            try:
                gidx = head.index("Gene")
            except ValueError:
                print("\n\nInput file: {} need the head include 'Gene'.\n\n".
                      format(enrichFile))

        # functation annotation
        #
        cmd = "%s %s %s" % (funcAnnoGO, enrichFile, prefix)
        log.run('func annotation', cmd, i=[enrichFile])

        # GO enrich analysis
        #
        cmd = "%s %s %s" % (enrichGO, enrichFile, prefix)
        log.run('GO enrich analysis', cmd)

        # KEGG enrich analysis
        #
        cmd = "%s %s %s" % (enrichKEGG, enrichFile, prefix)
        log.run('KEGG enrich analysis', cmd)

        for item in ['CC', 'BP', 'MF']:
            func_pdfs.append('{prefix}.go.{item}.bar.func.pdf'.format(
                prefix=prefix, item=item))
            func_csvs.append('{prefix}.go.{item}.func.csv'.format(
                prefix=prefix, item=item))
            func_xls.append('{prefix}.go.{item}.func.xls'.format(prefix=prefix,
                                                                 item=item))

            go_dot_pdfs.append('{prefix}.go.{item}.dot.enrich.pdf'.format(
                prefix=prefix, item=item))

            go_net_pdfs.append('{prefix}.go.{item}.net.enrich.pdf'.format(
                prefix=prefix, item=item))

            go_csvs.append('{prefix}.go.{item}.enrich.csv'.format(
                prefix=prefix, item=item))
            go_csvs_all.append('{prefix}.go.{item}.enrich.all.csv'.format(
                prefix=prefix, item=item))
            go_xls.append('{prefix}.go.{item}.enrich.xls'.format(prefix=prefix,
                                                                 item=item))
            go_xls_all.append('{prefix}.go.{item}.enrich.all.xls'.format(
                prefix=prefix, item=item))

        id_files.append("{}.gene_id.csv".format(prefix))
        kegg_pdfs.append('{}.KEGG.enrich.pdf'.format(prefix))
        kegg_csvs.append('{}.KEGG.enrich.csv'.format(prefix))
        kegg_csvs_all.append('{}.KEGG.enrich.all.csv'.format(prefix))
        kegg_xls.append('{}.KEGG.enrich.xls'.format(prefix))
        kegg_xls_all.append('{}.KEGG.enrich.all.xls'.format(prefix))

    out_dict = {'func_pdfs':func_pdfs, 'go_dot_pdfs':go_dot_pdfs, 'go_net_pdfs': go_net_pdfs, 'kegg_pdfs':kegg_pdfs, \
            'func_csvs': func_csvs, 'func_xls': func_xls, 'go_csvs': go_csvs, 'go_xls': go_xls, 'go_csvs_all': go_csvs_all, 'go_xls_all': go_xls_all, 'kegg_csvs': kegg_csvs , \
            'kegg_csvs_all': kegg_csvs_all, 'kegg_xls': kegg_xls, 'kegg_xls_all': kegg_xls_all}

    res = yamladd(yaml_file, out_dict)

    # csv2xls
    #
    cmd = "{csv2xls} {yamlfile}".format(csv2xls=csv2xls, yamlfile=yaml_file)
    log.run('csv2xls', cmd)

    return res
Exemplo n.º 28
0
def arrange(parms):
    func = parms['func']
    go = parms['go']
    kegg = parms['kegg']
    cmd = "%s %s %s %s" % (arnge, func, go, kegg)
    log.run('arranger', cmd)