示例#1
0
文件: main.py 项目: demis001/raslpipe
def combine_alignment_summary(input, output):
    """Combine formatted alignment log files
    `input`: Formatted alignment stat log files (*fastqLog.final.txt)
    `output`: Combined alignment stat csv file named (DATA_alignment_summary.csv)
    """
    print tasks.comment()
    #print input
    #print output
    print colored("Stage 9: Aggrigate alignment summary ....", "green")
    print tasks.comment()
    result = tasks.combineAlignmentSummary(input, output)
    return result
示例#2
0
文件: main.py 项目: demis001/raslpipe
def format_count(input,output):
    """Format count csv file
    `input`: csv file
    `output`: Formatted *.csv file
    """
    print tasks.comment()
    print colored("Stage 8: Formatting count file ... ", "green")
    print input
    print output
    print tasks.comment()
    result = tasks.formatCount(input,output)
    return result
示例#3
0
def plot_alignment_summary(input, output):
    """Plot alignment summary
    `input`: Alignment summary csv file
    `output`: output png file bar plot
    """
    print tasks.comment()
    print colored("Stage 10: Plot alignment summary ...", "green")
    print input
    print output
    print tasks.comment()
    result = tasks.plotAlignmentStat(input, output)
    return result
示例#4
0
文件: main.py 项目: demis001/raslpipe
def combine_count_data(input, output):
    """Combine count files
    `input`: Formatted *.out.txt count files
    `output`: A single summary count csv file nammed 'DATA_COUNT_countcombined.csv' under project dir
    """
    print tasks.comment()
    print input
    print output
    print colored("Stage 7: Combining count data ...", "green")
    print tasks.comment()
    result = tasks.combineCount(input, output)
    return result
示例#5
0
def format_count(input,output):
    """Format count csv file
    `input`: csv file
    `output`: Formatted *.csv file
    """
    print tasks.comment()
    print colored("Stage 8: Formatting count file ... ", "green")
    #print input
    #print output
    print tasks.comment()
    result = tasks.formatCount(input,output)
    return result
示例#6
0
def combine_alignment_summary(input, output):
    """Combine formatted alignment log files
    `input`: Formatted alignment stat log files (*fastqLog.final.txt)
    `output`: Combined alignment stat csv file named (DATA_alignment_summary.csv)
    """
    print tasks.comment()
    #print input
    #print output
    print colored("Stage 9: Aggrigate alignment summary ....", "green")
    print tasks.comment()
    result = tasks.combineAlignmentSummary(input, output)
    return result
示例#7
0
def combine_count_data(input, output):
    """Combine count files
    `input`: Formatted *.out.txt count files
    `output`: A single summary count csv file nammed 'DATA_COUNT_countcombined.csv' under project dir
    """
    print tasks.comment()
    #print input
    #print output
    print colored("Stage 7: Combining count data ...", "green")
    print tasks.comment()
    result = tasks.combineCount(input, output)
    return result
示例#8
0
文件: main.py 项目: demis001/raslpipe
def plot_alignment_summary(input, output):
    """Plot alignment summary
    `input`: Alignment summary csv file
    `output`: output png file bar plot
    """
    print tasks.comment()
    print colored("Stage 10: Plot alignment summary ...", "green")
    print input
    print output
    print tasks.comment()
    result = tasks.plotAlignmentStat(input, output)
    return result
示例#9
0
def main():
    if options.indexed == "yes":
        click.echo(
            "Indexing the reference genome {}, be patient, it takes longer time"
            .format(genomeDir))
        pipeline_run(["indexGenome"], verbose=1, multiprocess=cpuNum)
        pipeline_run([
            "prepare_analysis", "cleanFastq", "bsAlign", "mergeBamSameTissue",
            "bamSort", "bamIndex", "createCGmap", "extractCG_Context",
            "mergeConCGcall", "icrHotSpot", "convertToBed", "unionBed",
            "mergeBed", "countICR"
        ],
                     verbose=1,
                     multiprocess=cpuNum)
        # Flowcharts can be printed in a large number of formats including jpg, svg, png and pdf
        pipeline_printout_graph("flowchart.pdf",
                                "pdf", [countICR],
                                user_colour_scheme={"colour_scheme_index": 6},
                                pipeline_name="Putative ICR pipeline",
                                no_key_legend=False)
    else:
        """Console script for puticr."""
        click.echo(tasks.comment())
        t0 = time.time()
        click.echo("Starting the process .....")
        #click.echo("Starting the pipeline, staring time ...{}".format(datetime.timedelta(seconds=t0)))
        #tasks_torun = [prepare_analysis, cleanFastq]
        # pipeline_run(["prepare_analysis", "cleanFastq", "bsAlign", "mergeBamSameTissue", "bamSort", "bamIndex", "createCGmap",
        #             "extractCG_Context", "mergeConCGcall", "icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR"], verbose=1, multiprocess=cpuNum)

        pipeline_run([
            "mergeBamSameTissue", "bamSort", "bamIndex", "createCGmap",
            "extractCG_Context", "mergeConCGcall", "icrHotSpot",
            "convertToBed", "unionBed", "mergeBed", "countICR"
        ],
                     verbose=1,
                     multiprocess=cpuNum)
        #pipeline_run(["icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR"], verbose=1, multiprocess=cpuNum)
        # Flowcharts can be printed in a large number of formats including jpg, svg, png and pdf
        pipeline_printout_graph("flowchart.pdf",
                                "pdf", [countICR],
                                user_colour_scheme={"colour_scheme_index": 6},
                                pipeline_name="Putative ICR pipeline",
                                no_key_legend=False)
        click.echo(".................. {}".format(resultDir))

        elapsedTime = int((time.time()) - t0)
        elapsedTime = str(datetime.timedelta(seconds=elapsedTime))
        click.echo("Time to complete the task .....{}".format(
            colored(elapsedTime, "red")))
        click.echo(tasks.comment())
示例#10
0
文件: main.py 项目: demis001/raslpipe
def alignment_summary(input, output):
    """Generate Alignment summary
    `input`: *fastqLog.final.out files
    `output`: Extracted necessary data and create *.txt file for each count log file
    """
    outfile = basename(input)
    out_suffix = splitext(outfile)[0]
    out_file_name = out_suffix + output
    out_file_name = join(tempDir, out_file_name)
    print tasks.comment()
    print colored("Stage 8: Generate Alingmnet summary ....", "green")
    #print input
    #print output
    print tasks.comment()
    result = tasks.alignmentSummary(input, out_file_name)
    return result
示例#11
0
文件: main.py 项目: demis001/raslpipe
def count_mapped_reads(bamFile, outfile):
    """Coun the mapped sequence to the genome featur5e
    `bamFile`: A bam alignment file
    `outfile`: Count txt file
    """
    import re
    p=re.match(r'(.*)_manifest.csv', probFile, re.M|re.I)
    gtfF = p.group(1) + ".gtf"
    gtfFile = join(resultDir,gtfF)
    print tasks.comment()
    print colored("Stage 6: Count Mapped file that overlap with genome feature ... ", "green")
    print bamFile
    print gtfFile
    print tasks.comment()
    result = tasks.count_mapped(bamFile, outfile, gtfFile)
    return result
示例#12
0
def alignment_summary(input, output):
    """Generate Alignment summary
    `input`: *fastqLog.final.out files
    `output`: Extracted necessary data and create *.txt file for each count log file
    """
    outfile = basename(input)
    out_suffix = splitext(outfile)[0]
    out_file_name = out_suffix + output
    out_file_name = join(tempDir, out_file_name)
    print tasks.comment()
    print colored("Stage 8: Generate Alingmnet summary ....", "green")
    #print input
    #print output
    print tasks.comment()
    result = tasks.alignmentSummary(input, out_file_name)
    return result
示例#13
0
def count_mapped_reads(bamFile, outfile):
    """Coun the mapped sequence to the genome featur5e
    `bamFile`: A bam alignment file
    `outfile`: Count txt file
    """
    import re
    p=re.match(r'(.*)_manifest.csv', probFile, re.M|re.I)
    gtfF = p.group(1) + ".gtf"
    gtfFile = join(resultDir,gtfF)
    print tasks.comment()
    print colored("Stage 6: Count Mapped file that overlap with genome feature ... ", "green")
    #print bamFile
    #print gtfFile
    print tasks.comment()
    result = tasks.count_mapped(bamFile, outfile, gtfFile)
    return result
示例#14
0
文件: main.py 项目: demis001/raslpipe
def map_to_probes(fastq, output):
    """Map the fastq file to the indexed probe sequences. The fastq must be in the gzipped with the following extension. (*.fastq.gz)
    `fastq`: a dir that contains all *.fastq.gz file for the experment
    `output`: output .bam files and '*fastqReadPrepGene.out.tab' count files
    """
    outfile = basename(output)
    outfile = join(tempDir, outfile)
    suf = splitext(outfile)[0]
    outPrefix = os.path.abspath(suf)
    print tasks.comment()
    print colored("Stage 5: Map sequence fastq file to the indexed genome file ... ", "green")
    print fastq
    print output
    print genomeDir
    print outPrefix
    print tasks.comment()
    result = tasks.map_seq_to_probes(fastq, genomeDir, cpuNum, outPrefix)
    return result
示例#15
0
def map_to_probes(fastq, output):
    """Map the fastq file to the indexed probe sequences. The fastq must be in the gzipped with the following extension. (*.fastq.gz)
    `fastq`: a dir that contains all *.fastq.gz file for the experment
    `output`: output .bam files and '*fastqReadPrepGene.out.tab' count files
    """
    outfile = basename(output)
    outfile = join(tempDir, outfile)
    suf = splitext(outfile)[0]
    outPrefix = os.path.abspath(suf)
    print tasks.comment()
    print colored("Stage 5: Map sequence fastq file to the indexed genome file ... ", "green")
    #print fastq
    #print output
    #print genomeDir
    #print outPrefix
    print tasks.comment()
    result = tasks.map_seq_to_probes(fastq, genomeDir, cpuNum, outPrefix)
    return result
示例#16
0
def main():
    t0 = time.time()
    print (" Starting time ..... :") + str(t0)
    tasks_torun = [prepare_analysis, prepareDB_file, create_gtf_file, indexGenomeFile,
                   map_to_probes, format_count, combine_count_data, alignment_summary,
                   combine_alignment_summary,plot_alignment_summary]

    pipeline_printout_graph('summary_pipeline_stages_to_run.ps', 'ps', tasks_torun, user_colour_scheme={"colour_scheme_index": 6},
                            no_key_legend=False, pipeline_name="TempO-seq Analysis", size=(11, 8), dpi = 30,
                            forcedtorun_tasks = [indexGenomeFile, combine_count_data],draw_vertically=True, ignore_upstream_of_target=False)
    pipeline_run(["prepare_analysis", "prepareDB_file",'create_gtf_file', 'indexGenomeFile', 'map_to_probes','count_mapped_reads', 'combine_count_data', 'format_count', 'alignment_summary','combine_alignment_summary'],verbose = 1, multiprocess = cpuNum)
    print "....................." + resultDir
    tasks.comment()
    psfile = options.flowchart

    #psfile = "./summary_pipeline_stages_to_run.ps"
    convertPs(psfile)
    tasks.comment()

    elapsedTime = int((time.time()) - t0)
    elapsedTime = str(datetime.timedelta(seconds=elapsedTime))
    print("Time to complete the task ....." ) + colored (elapsedTime, "red")
示例#17
0
文件: main.py 项目: demis001/raslpipe
def main():
    t0 = time.time()
    print (" Starting time ..... :") + str(t0)
    tasks_torun = [prepare_analysis, prepareDB_file, create_gtf_file, indexGenomeFile,
                   map_to_probes, format_count, combine_count_data, alignment_summary,
                   combine_alignment_summary,plot_alignment_summary]

    pipeline_printout_graph('summary_pipeline_stages_to_run.ps', 'ps', tasks_torun, user_colour_scheme={"colour_scheme_index": 6},
                            no_key_legend=False, pipeline_name="TempO-seq Analysis", size=(11, 8), dpi = 30,
                            forcedtorun_tasks = [indexGenomeFile, combine_count_data],draw_vertically=True, ignore_upstream_of_target=False)
    pipeline_run(["prepare_analysis", "prepareDB_file",'create_gtf_file', 'indexGenomeFile', 'map_to_probes','count_mapped_reads', 'combine_count_data', 'format_count', 'alignment_summary','combine_alignment_summary','plot_alignment_summary'],verbose = 1, multiprocess = cpuNum)
    print "....................." + resultDir
    tasks.comment()
    psfile = options.flowchart

    #psfile = "./summary_pipeline_stages_to_run.ps"
    convertPs(psfile)
    tasks.comment()

    elapsedTime = int((time.time()) - t0)
    elapsedTime = str(datetime.timedelta(seconds=elapsedTime))
    print("Time to complete the task ....." ) + colored (elapsedTime, "red")
示例#18
0
import sys

import tasks
import users
import log

if len(sys.argv) < 3:
    print('error: expected 2 arguments')
    print('usage: comment <path-to-task> <comments>')
    sys.exit()

sys.argv[0] = 'comment'
taskPath = sys.argv[1]
comments = ' '.join(sys.argv[2:])

print("adding comment: " + comments)
tasks.comment(taskPath, users.current, comments)
log.add(' '.join(sys.argv))
print('comment added.')