示例#1
0
文件: sigproc.py 项目: gourneau/TS
def sigproc(analysisArgs, libKey, tfKey, pathtorawblock, SIGPROC_RESULTS):

    if analysisArgs:
        cmd = analysisArgs  # e.g /home/user/Analysis --flowlimit 80
    else:
        cmd = "Analysis"
        printtime("ERROR: Analysis command not specified, using default: 'Analysis'")

    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --no-subdir"
    cmd += " --output-dir=%s" % (SIGPROC_RESULTS)
    cmd += " %s" % pathtorawblock

    printtime("Analysis command: " + cmd)
    proc = subprocess.Popen(shlex.split(cmd.encode('utf8')), shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout_value, stderr_value = proc.communicate()
    status = proc.returncode
    sys.stdout.write("%s" % stdout_value)
    sys.stderr.write("%s" % stderr_value)

    # Ion Reporter
    try:
        sigproc_log_path = os.path.join(SIGPROC_RESULTS, 'sigproc.log')
        with open(sigproc_log_path, 'a') as f:
            if stdout_value: f.write(stdout_value)
            if stderr_value: f.write(stderr_value)
    except IOError:
        traceback.print_exc()

    return status
示例#2
0
def processBlock(tf_basecaller_bam_filename, BASECALLER_RESULTS, tfkey, floworder, analysis_dir):

    try:

        # These files will be created
        tfstatsjson_path = os.path.join(BASECALLER_RESULTS, "TFStats.json")
        tfbam_filename = os.path.join(BASECALLER_RESULTS, "rawtf.bam")
        tfref_filename = os.path.join(BASECALLER_RESULTS, "DefaultTFs.fasta")
        ionstats_tf_filename = os.path.join(BASECALLER_RESULTS, "ionstats_tf.json")

        # TF analysis in 5 simple steps

        buildTFReference(tfref_filename, analysis_dir, tfkey)

        alignTFs(tf_basecaller_bam_filename, tfbam_filename, tfref_filename)

        ionstats.generate_ionstats_tf(tfbam_filename, tfref_filename, ionstats_tf_filename)

        ionstats_plots.tf_length_histograms(ionstats_tf_filename, ".")

        ionstats.generate_legacy_tf_files(ionstats_tf_filename, tfstatsjson_path)

    except NoTFDataException as e:
        printtime("No data to analyze Test Fragments (%s)" % e.msg)
        f = open(os.path.join(BASECALLER_RESULTS, "TFStats.json"), "w")
        f.write(json.dumps({}))
        f.close()

    except:
        traceback.print_exc()
示例#3
0
def reduce_stats (input_filename_list, output_filename):

    # wait for asynchronous process substitution processes # TODO
    import time
    time.sleep(10)

    try:
        #need to copy, cannot index an iterator
        copy_input_filename_list = list(input_filename_list)
        length=len(copy_input_filename_list)

        # process file list in smaller intervalls
        size = 100
        i=0
        while (i<length):
            if i+size<length:
                input_files = copy_input_filename_list[i:i+size]
                output_file = output_filename+"."+str(i+size)
            else:
                input_files = copy_input_filename_list[i:length]
                output_file = output_filename
            # add results from earlier iterations
            if i>0:
                input_files=input_files+[output_filename+"."+str(i)]
            i=i+size

            com = "ionstats reduce"
            com += " -o %s" % (output_file)
            com += " " + " ".join(input_files)
            printtime("DEBUG: Calling '%s'" % com)
            subprocess.call(com,shell=True)
    except:
        printtime('ERROR: Failed ionstats reduce')
        traceback.print_exc()
示例#4
0
def merge_bams(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets, mark_duplicates):

    for dataset in basecaller_datasets['datasets']:

        try:
            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group]['reference']

            filtered = True
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                    filtered = False

            if reference and not filtered:
                bamdir = ALIGNMENT_RESULTS
                bamfile = dataset['file_prefix']+'.bam'
            else:
                bamdir = BASECALLER_RESULTS
                bamfile = dataset['basecaller_bam']
            block_bam_list = [os.path.join(blockdir, bamdir, bamfile) for blockdir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filepath = os.path.join(bamdir, bamfile)
            if block_bam_list:
                if reference and not filtered:
                    composite_bai_filepath = composite_bam_filepath+'.bai'
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates)
                else:
                    composite_bai_filepath=""
                    mark_duplicates=False
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates, method='samtools')

        except:
            print traceback.format_exc()
            printtime("ERROR: merging %s unsuccessful" % bamfile)
示例#5
0
def old_read_length_histogram(ionstats_basecaller_filename, output_png_filename, max_length):
    
    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
        
        f = open(ionstats_basecaller_filename,'r')
        ionstats_basecaller = json.load(f);
        f.close()

        histogram_x = range(0,max_length,1)
        num_bins = len(histogram_x)
        histogram_y = [0] * num_bins
    
        for read_length,frequency in enumerate(ionstats_basecaller['full']['read_length_histogram']):
            current_bin = min(read_length,num_bins-1)
            if read_length < num_bins:
                histogram_y[current_bin] += frequency
    
        fig = plt.figure(figsize=(8,4),dpi=100)
        ax = fig.add_subplot(111)
        ax.bar(histogram_x, histogram_y, width=2, color="#2D4782",linewidth=0)
        ax.set_title('Read Length Histogram')
        ax.set_xlabel('Read Length')
        ax.set_ylabel('Count')
        fig.savefig(output_png_filename)

    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#6
0
def generate_ionstats_alignment_cmd(ionstatsArgs, bam_filenames, ionstats_alignment_filename, ionstats_alignment_h5_filename, basecaller_json, library_key, histogram_length):

    try:
        if ionstatsArgs:
            com = ionstatsArgs
        else:
            com = "ionstats alignment"
            printtime("ERROR: ionstats alignment command not specified, using default: 'ionstats alignment'")

        com += " -i %s" % (bam_filenames[0])
        for bam_filename in bam_filenames[1:]:
            com += ",%s" % (bam_filename)
        com += " -o %s" % (ionstats_alignment_filename)
        com += " -k %s" % (library_key)
        com += " -h %d" % (int(histogram_length))
        com += " --evaluate-hp true"
        com += " --output-h5 %s" % ionstats_alignment_h5_filename

        if basecaller_json:
            block_col_offset = basecaller_json["BaseCaller"]['block_col_offset']
            block_row_offset = basecaller_json["BaseCaller"]['block_row_offset']
            block_col_size   = basecaller_json["BaseCaller"]['block_col_size']
            block_row_size   = basecaller_json["BaseCaller"]['block_row_size']
            subregion_col_size, subregion_row_size = generate_ionstats_subregion_dims(block_col_size, block_row_size)

            com += " --chip-origin %s,%s" % (block_col_offset, block_row_offset)
            com += " --chip-dim %s,%s" % (block_col_size, block_row_size)
            com += " --subregion-dim %s,%s" % (subregion_col_size, subregion_row_size)
    except:
        traceback.print_exc()
        raise
    return com
示例#7
0
def basecaller_cmd(basecallerArgs,
                   SIGPROC_RESULTS,
                   libKey,
                   tfKey,
                   runID,
                   BASECALLER_RESULTS,
                   block_col_offset,
                   block_row_offset,
                   datasets_pipeline_path,
                   adapter):
    if basecallerArgs:
        cmd = basecallerArgs
    else:
        cmd = "BaseCaller"
        printtime("ERROR: BaseCaller command not specified, using default: 'BaseCaller'")
    
    cmd += " --input-dir=%s" % (SIGPROC_RESULTS)
    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --run-id=%s" % (runID)
    cmd += " --output-dir=%s" % (BASECALLER_RESULTS)
    cmd += " --block-col-offset %d" % (block_col_offset)
    cmd += " --block-row-offset %d" % (block_row_offset)
    cmd += " --datasets=%s" % (datasets_pipeline_path)
    cmd += " --trim-adapter %s" % (adapter)

    return cmd
示例#8
0
def generate_ionstats_alignment(
    bam_filenames, ionstats_alignment_filename, ionstats_alignment_h5_filename, basecaller_json, histogram_length
):

    try:
        com = "ionstats alignment"
        com += " -i %s" % (bam_filenames[0])
        for bam_filename in bam_filenames[1:]:
            com += ",%s" % (bam_filename)
        com += " -o %s" % (ionstats_alignment_filename)
        com += " -h %d" % (int(histogram_length))

        if basecaller_json:
            block_col_offset = basecaller_json["BaseCaller"]["block_col_offset"]
            block_row_offset = basecaller_json["BaseCaller"]["block_row_offset"]
            block_col_size = basecaller_json["BaseCaller"]["block_col_size"]
            block_row_size = basecaller_json["BaseCaller"]["block_row_size"]

            com += " --evaluate-hp true"
            com += " --output-h5 %s" % ionstats_alignment_h5_filename
            com += " --chip-origin %s,%s" % (block_col_offset, block_row_offset)
            com += " --chip-dim %s,%s" % (block_col_size, block_row_size)
            com += " --subregion-dim %s,%s" % (min(92, block_col_size - 1), min(74, block_row_size - 1))

        printtime("DEBUG: Calling '%s'" % com)
        subprocess.call(com, shell=True)
    except:
        printtime("Failed ionstats alignment")
        traceback.print_exc()
示例#9
0
def generate_legacy_tf_files(ionstats_tf_filename, tfstats_json_filename):

    try:
        f = open(ionstats_tf_filename, "r")
        ionstats_tf = json.load(f)
        f.close()

        tfstats_json = {}
        for tf_name, tf_data in ionstats_tf["results_by_tf"].iteritems():

            tfstats_json[tf_name] = {
                "TF Name": tf_name,
                "TF Seq": tf_data["sequence"],
                "Num": tf_data["full"]["num_reads"],
                "System SNR": tf_data["system_snr"],
                "Per HP accuracy NUM": tf_data["hp_accuracy_numerator"],
                "Per HP accuracy DEN": tf_data["hp_accuracy_denominator"],
                "Q10": tf_data["AQ10"]["read_length_histogram"],
                "Q17": tf_data["AQ17"]["read_length_histogram"],
                "Q10 Mean": tf_data["AQ10"]["mean_read_length"],
                "Q17 Mean": tf_data["AQ17"]["mean_read_length"],
                "50Q10": sum(tf_data["AQ10"]["read_length_histogram"][50:]),
                "50Q17": sum(tf_data["AQ17"]["read_length_histogram"][50:]),
            }

        f = open(tfstats_json_filename, "w")
        f.write(json.dumps(tfstats_json, indent=4))
        f.close()

    except:
        printtime("Failed to generate %s" % (tfstats_json_filename))
        traceback.print_exc()
示例#10
0
文件: combineReports.py 项目: rb94/TS
def barcode_report_stats(barcode_names):
    CA_barcodes_json = []
    ionstats_file_list = []
    printtime("DEBUG: creating CA_barcode_summary.json")

    for bcname in sorted(barcode_names):
        ionstats_file = bcname + '_rawlib.ionstats_alignment.json'
        barcode_json = {"barcode_name": bcname, "AQ7_num_bases":0, "full_num_reads":0, "AQ7_mean_read_length":0}
        try:
            stats = json.load(open(ionstats_file))
            for key in stats.keys():
                if key in ['AQ7', 'AQ10', 'AQ17', 'AQ20', 'AQ30', 'AQ47', 'full', 'aligned']:
                    barcode_json.update({
                        key+ "_max_read_length": stats[key].get("max_read_length"),
                        key+ "_mean_read_length": stats[key].get("mean_read_length"),
                        key+ "_num_bases": stats[key].get("num_bases"),
                        key+ "_num_reads": stats[key].get("num_reads")
                    })
            ionstats_file_list.append(ionstats_file)
        except:
            printtime("DEBUG: error reading ionstats from %s" % ionstats_file)
            traceback.print_exc()

        if bcname == 'nomatch':
            CA_barcodes_json.insert(0, barcode_json)
        else:
            CA_barcodes_json.append(barcode_json)

    with open('CA_barcode_summary.json','w') as f:
        f.write(json.dumps(CA_barcodes_json, indent=2))
    
    # generate merged ionstats_alignment.json
    if not os.path.exists('ionstats_alignment.json'):
        ionstats.reduce_stats(ionstats_file_list,'ionstats_alignment.json')
示例#11
0
文件: TLScript.py 项目: golharam/TS
def spawn_cluster_job(rpath, scriptname, args, holds=None):
    out_path = "%s/drmaa_stdout_block.html" % rpath
    err_path = "%s/drmaa_stderr_block.txt" % rpath
    logout = open(os.path.join(out_path), "w")
    logout.write("<html><pre> \n")
    logout.close()
    cwd = os.getcwd()

    # SGE
    sge_queue = "all.q"
    if is_thumbnail:
        sge_queue = "thumbnail.q"
    jt_nativeSpecification = "-pe ion_pe 1 -q " + sge_queue

    # TODO experiment
    if is_blockprocessing and ("X1" in rpath):  # process some blocks on instrument
        if env["pgmName"] == "Mustang":  # != ""
            sge_queue = "proton_" + env["pgmName"].lower() + ".q"
            jt_nativeSpecification = "-q " + sge_queue

    printtime("Use " + sge_queue)

    # TORQUE
    # jt_nativeSpecification = ""

    jt_remoteCommand = "python"
    jt_workingDirectory = os.path.join(cwd, rpath)
    jt_outputPath = ":" + os.path.join(cwd, out_path)
    jt_errorPath = ":" + os.path.join(cwd, err_path)
    jt_args = [os.path.join("/usr/bin", scriptname), args]
    jt_joinFiles = False

    if holds != None and len(holds) > 0:
        jt_nativeSpecification += " -hold_jid "
        for holdjobid in holds:
            jt_nativeSpecification += "%s," % holdjobid

    # TODO remove debug output
    print jt_nativeSpecification
    print jt_remoteCommand
    print jt_workingDirectory
    print jt_outputPath
    print jt_errorPath
    print jt_args

    try:
        jobid = jobserver.submitjob(
            jt_nativeSpecification,
            jt_remoteCommand,
            jt_workingDirectory,
            jt_outputPath,
            jt_errorPath,
            jt_args,
            jt_joinFiles,
        )
    except:
        traceback.print_exc()
        jobid = -1

    return jobid
示例#12
0
文件: combineReports.py 项目: rb94/TS
def get_parent_barcode_files(parent_folder, datasets_path, barcodeSet):
    # try to get barcode names from datasets json, fallback on globbing for older reports
    datasetsFile = os.path.join(parent_folder,datasets_path)
    barcode_bams = []
    try:
        with open(datasetsFile, 'r') as f:
            datasets_json = json.loads(f.read())
        for dataset in datasets_json.get("datasets",[]):
            bamfile = os.path.join(parent_folder, dataset["file_prefix"]+'.bam')
            if os.path.exists(bamfile):
                barcode_bams.append(bamfile)
            elif 'legacy_prefix' in dataset.keys():
                old_bamfile = os.path.join(parent_folder, dataset["legacy_prefix"]+'.bam')
                if os.path.exists(old_bamfile):
                    barcode_bams.append(old_bamfile)
    except:
        pass  
    
    if len(barcode_bams) == 0:
        printtime("DEBUG: no barcoded files found from %s" % datasetsFile)
        barcode_bams = glob( os.path.join(parent_folder, barcodeSet+'*_rawlib.bam') )
        barcode_bams.append( os.path.join(parent_folder, 'nomatch_rawlib.bam') )    
        barcode_bams.sort()
        
    printtime("DEBUG: found %i barcodes in %s" % (len(barcode_bams), parent_folder) )
    return barcode_bams
示例#13
0
def read_length_histogram(ionstats_basecaller_filename, output_png_filename, max_length):

    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
        
        f = open(ionstats_basecaller_filename,'r')
        ionstats_basecaller = json.load(f);
        f.close()

        histogram_x = range(0,max_length,1)
        num_bins = len(histogram_x)
        histogram_y = [0] * num_bins
        
        for read_length,frequency in enumerate(ionstats_basecaller['full']['read_length_histogram']):
            current_bin = min(read_length,num_bins-1)
            if read_length < num_bins:
                histogram_y[current_bin] += frequency
        
        max_y = max(histogram_y)
        max_y = max(max_y,1)
        
        fig = plt.figure(figsize=(4,3.5),dpi=100)
        ax = fig.add_subplot(111,frame_on=False,yticks=[],position=[0,0.15,1,0.88])
        ax.bar(histogram_x,histogram_y,width=2.5, color="#2D4782",linewidth=0, zorder=2)
    
    
        ax.set_ylim(0,1.2*max_y)
        ax.set_xlim(-5,max_length+15)
        ax.set_xlabel("Read Length")
        fig.patch.set_alpha(0.0)
        fig.savefig(output_png_filename)

    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#14
0
文件: TLScript.py 项目: Jorges1000/TS
def getExpLogMsgs(env):
    """
    Parses explog_final.txt for warning messages and dumps them to
    ReportLog.html.
    This only works if the raw data files have not been deleted.
    For a from-wells analysis, you may not have raw data.
    """
    inputFile = os.path.join(env["pathToRaw"], "explog_final.txt")
    outputFile = os.path.join("./", "ReportLog.html")
    try:
        f = open(inputFile, "r")
    except:
        printtime("Cannot open file %s" % inputFile)
        return True

    line = f.readline()
    while line:
        if "WARNINGS:" in line:
            if len("WARNINGS: ") < len(line):
                # print to output file
                try:
                    g = open(outputFile, "a")
                    g.write("From PGM explog_final.txt:\n")
                    g.write(line)
                    g.close()
                except:
                    printtime("Cannot open file %s" % outputFile)
        line = f.readline()

    f.close()

    return False
示例#15
0
文件: sigproc.py 项目: Brainiarc7/TS
def generate_raw_data_traces(libKey, tfKey, floworder, SIGPROC_RESULTS):
    ########################################################
    #Generate Raw Data Traces for lib and TF keys          #
    ########################################################
    printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png) and raw_peak_signal file")

    tfRawPath = os.path.join(SIGPROC_RESULTS, 'avgNukeTrace_%s.txt' % tfKey)
    libRawPath = os.path.join(SIGPROC_RESULTS, 'avgNukeTrace_%s.txt' % libKey)
    peakOut = 'raw_peak_signal'

    if os.path.exists(tfRawPath):
        try:
            kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment')
            kp.parse(tfRawPath)
            kp.dump_max(os.path.join('.',peakOut))
            kp.plot()
        except:
            printtime("TF key graph didn't render")
            traceback.print_exc()
    else:
        printtime("ERROR: %s is missing" % tfRawPath)

    if os.path.exists(libRawPath):
        try:
            kp = plotKey.KeyPlot(libKey, floworder, 'Library')
            kp.parse(libRawPath)
            kp.dump_max(os.path.join('.',peakOut))
            kp.plot()
        except:
            printtime("Lib key graph didn't render")
            traceback.print_exc()
    else:
        printtime("ERROR: %s is missing" % libRawPath)
示例#16
0
文件: PEScript.py 项目: gourneau/TS
def PE_set_value(key):
    if forward_env[key] == reverse_env[key]:
        return forward_env[key]
    else:
        printtime("ERROR forward run %s (%s) doesn't match reverse run %s (%s)"
                  % (key, forward_env[key], key, reverse_env[key]) )
        return 'unknown'
示例#17
0
文件: ionstats.py 项目: bdiegel/TS
def generate_legacy_tf_files (ionstats_tf_filename, tfstats_json_filename):

    try:
        f = open(ionstats_tf_filename,'r')
        ionstats_tf = json.load(f);
        f.close()
        
        tfstats_json = {}
        for tf_name,tf_data in ionstats_tf['results_by_tf'].iteritems():
            
            tfstats_json[tf_name] = {
                'TF Name' : tf_name,
                'TF Seq' : tf_data['sequence'],
                'Num' : tf_data['full']['num_reads'],
                'System SNR' : tf_data['system_snr'],
                'Per HP accuracy NUM' : tf_data['hp_accuracy_numerator'],
                'Per HP accuracy DEN' : tf_data['hp_accuracy_denominator'],
                'Q10' : tf_data['AQ10']['read_length_histogram'],
                'Q17' : tf_data['AQ17']['read_length_histogram'],
                'Q10 Mean' : tf_data['AQ10']['mean_read_length'],
                'Q17 Mean' : tf_data['AQ17']['mean_read_length'],
                '50Q10' : sum(tf_data['AQ10']['read_length_histogram'][50:]),
                '50Q17' : sum(tf_data['AQ17']['read_length_histogram'][50:]),
            }

            
        f = open(tfstats_json_filename,'w')
        f.write(json.dumps(tfstats_json, indent=4))
        f.close()

    except:
        printtime('Failed to generate %s' % (tfstats_json_filename))
        traceback.print_exc()
示例#18
0
def basecaller_cmd(basecallerArgs,
                   SIGPROC_RESULTS,
                   libKey,
                   tfKey,
                   runID,
                   BASECALLER_RESULTS,
                   block_col_offset,
                   block_row_offset,
                   datasets_pipeline_path,
                   adapter):
    if basecallerArgs:
        cmd = basecallerArgs
    else:
        cmd = "BaseCaller"
        printtime("ERROR: BaseCaller command not specified, using default: 'BaseCaller'")

    cmd += " --input-dir=%s" % (SIGPROC_RESULTS)
    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --run-id=%s" % (runID)
    cmd += " --output-dir=%s" % (BASECALLER_RESULTS)
    cmd += " --block-offset %d,%d" % (block_col_offset, block_row_offset)
    cmd += " --datasets=%s" % (datasets_pipeline_path)
    cmd += " --trim-adapter %s" % (adapter)

    phase_estimates_json = os.path.join(SIGPROC_RESULTS, "PhaseEstimates.json")
    if os.path.exists(phase_estimates_json):
        cmd += " --phase-estimation-file %s" % phase_estimates_json

    return cmd
示例#19
0
def reduce_stats_h5 (input_filename_list, output_filename):

    try:
        #need to copy, cannot index an iterator
        copy_input_filename_list = list(input_filename_list)
        length=len(copy_input_filename_list)

        # process file list in smaller intervalls
        size = 100
        i=0
        while (i<length):
            if i+size<length:
                input_files = copy_input_filename_list[i:i+size]
                output_file = output_filename+"."+str(i+size)
            else:
                input_files = copy_input_filename_list[i:length]
                output_file = output_filename
            # add results from earlier iterations
            if i>0:
                input_files=input_files+[output_filename+"."+str(i)]
            i=i+size

            com = "ionstats reduce-h5"
            com += " -o %s" % (output_file)
            com += " " + " ".join(input_files)
            printtime("DEBUG: Calling '%s'" % com)
            proc = subprocess.Popen(com, shell=True)
            status = proc.wait()
            if proc.returncode != 0:
                raise Exception('ERROR: ionstats reduce-h5 return code: %s' % proc.returncode)
    except:
        printtime('ERROR: Failed ionstats reduce-h5')
        traceback.print_exc()
        raise
示例#20
0
def old_aq_length_histogram(ionstats_alignment_filename, output_png_filename, aq_string, color):
    
    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
    
        f = open(ionstats_alignment_filename,'r')
        ionstats_alignment = json.load(f);
        f.close()

        data = ionstats_alignment[aq_string]['read_length_histogram']

        xaxis = range(len(data))
        ymax = max(data) + 10
        xlen = len(data) + 10
        xmax = len(data) - 1
        if xmax < 400:
            xmax = 400
            
        fig = plt.figure(figsize=(8,4),dpi=100)
        ax = fig.add_subplot(111)
        
        ax.bar(xaxis, data, facecolor = color, align = 'center', linewidth=0, alpha=1.0, width = 1.0)
        ax.set_xlabel('Filtered %s Read Length' % aq_string)
        ax.set_ylabel('Count')
        ax.set_title('Filtered %s Read Length' % aq_string)
        ax.set_xlim(0,xmax)
        ax.set_ylim(0,ymax)
        fig.savefig(output_png_filename)
        
    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#21
0
def generate_ionstats_subregion_dims(block_col_size, block_row_size):

    try:
        subregion_col_size = 92
        subregion_row_size = 74
        if (block_col_size == 1200 and block_row_size == 800): # Thumbnail
            subregion_col_size = 50
            subregion_row_size = 50
        elif ((block_col_size == 30912 and block_row_size == 21296) or (block_col_size == 2576 and block_row_size == 2662)): # P2
            subregion_col_size = 368
            subregion_row_size = 296
        elif ((block_col_size == 15456 and block_row_size == 10656) or (block_col_size == 1288 and block_row_size == 1332)): # P1
            subregion_col_size = 184
            subregion_row_size = 148
        elif ((block_col_size ==  7680 and block_row_size ==  5312) or (block_col_size ==  640 and block_row_size ==  664)): # P0
            subregion_col_size = 80
            subregion_row_size = 83
        elif (block_col_size == 3392 and block_row_size == 3792): # 318
            subregion_col_size = 53
            subregion_row_size = 48
        elif (block_col_size == 3392 and block_row_size == 2120): # 316v2
            subregion_col_size = 53
            subregion_row_size = 53
        elif (block_col_size == 2736 and block_row_size == 2640): # 316
            subregion_col_size = 48
            subregion_row_size = 48
        elif (block_col_size == 1280 and block_row_size == 1152): # 314
            subregion_col_size = 40
            subregion_row_size = 48
        return(subregion_col_size, subregion_row_size)
    except:
        printtime('ERROR: Failed to generate subregion dims from input %s,%s' % (block_col_size, block_row_size))
        traceback.print_exc()
示例#22
0
文件: combineReports.py 项目: rb94/TS
def submit_job(script, args, sge_queue = 'all.q', hold_jid = None):
    cwd = os.getcwd()
    #SGE    
    jt_nativeSpecification = "-pe ion_pe 1 -q " + sge_queue

    printtime("Use "+ sge_queue)

    jt_remoteCommand = "python"
    jt_workingDirectory = cwd
    jt_outputPath = ":" + "%s/drmaa_stdout_block.txt" % cwd
    jt_errorPath = ":" + "%s/drmaa_stderr_block.txt" % cwd
    jt_args = [script] + args
    jt_joinFiles = False

    if hold_jid != None and len(hold_jid) > 0:
        jt_nativeSpecification += " -hold_jid "
        for holdjobid in hold_jid:
            jt_nativeSpecification += "%s," % holdjobid

    try:
        jobid = jobserver.submitjob(
            jt_nativeSpecification,
            jt_remoteCommand,
            jt_workingDirectory,
            jt_outputPath,
            jt_errorPath,
            jt_args,
            jt_joinFiles)
        return jobid
    
    except:
        traceback.print_exc()
        printtime("FAILED submitting %s job" % script)
        sys.exit()
示例#23
0
def processBlock(tf_basecaller_bam_filename, BASECALLER_RESULTS, tfkey, floworder, analysis_dir):

    try:

        # These files will be created
        tfstatsjson_path = os.path.join(BASECALLER_RESULTS,"TFStats.json")
        tfbam_filename = os.path.join(BASECALLER_RESULTS,"rawtf.bam")
        tfref_filename = os.path.join(BASECALLER_RESULTS,"DefaultTFs.fasta")

        # TF analysis in 5 simple steps

        buildTFReference(tfref_filename,analysis_dir,tfkey)

        alignTFs(tf_basecaller_bam_filename, tfbam_filename, tfref_filename)

        doAlignStats(tfbam_filename)    # Note: alignStats dumps its results to files in current directory

        doTFMapper(tfbam_filename, tfref_filename, tfstatsjson_path)

        generatePlots(floworder,tfstatsjson_path)

    
    except NoTFDataException as e:
        printtime("No data to analyze Test Fragments (%s)" % e.msg)
        f = open(os.path.join(BASECALLER_RESULTS,'TFStats.json'),'w')
        f.write(json.dumps({}))
        f.close()

    except:
        traceback.print_exc()
示例#24
0
def find_barcodes_to_process(parentBAMs, barcodeSet):
    # get barcode files to process
    barcode_files = {}
    barcodeSet_Info = None
    datasets_path = 'basecaller_results/datasets_basecaller.json'
    barcodelist_path = 'barcodeList.txt'

    if not barcodeSet:
        return barcodeSet, barcode_files, barcodeSet_Info

    for bamfile in parentBAMs:
        parent_folder = os.path.dirname(bamfile)
        if os.path.exists(os.path.join(parent_folder, barcodelist_path)):
            bcList_file = os.path.join(parent_folder, barcodelist_path)
            bcSetName_new = open(bcList_file, 'r').readline().split('file_id')[1].strip()
            if barcodeSet != bcSetName_new:
                printtime("Warning: different barcode sets: %s and %s" % (barcodeSet, bcSetName_new))

            if not barcodeSet_Info:
                barcodeSet_Info = {'nomatch': {'index': 0}}
                try:
                    with open(bcList_file, 'r') as f:
                        for line in f.readlines():
                            if line.startswith('barcode'):
                                splitline = line.split(',')
                                name = splitline[1]
                                barcodeSet_Info[name] = {
                                    'index': splitline[0].split()[1],
                                    'sequence': splitline[2],
                                    'adapter': splitline[3]
                                }
                except:
                    traceback.print_exc()

            # get barcode BAM files
            barcode_bams = get_parent_barcode_files(parent_folder, datasets_path, barcodeSet)

            for bc_path in barcode_bams:
                try:
                    bcname = [name for name in barcodeSet_Info.keys() if os.path.basename(bc_path).startswith(name)][0]
                except:
                    bcname = 'unknown'

                if bcname not in barcode_files:
                    barcode_files[bcname] = {
                        'count': 0,
                        'bcfiles_to_merge': []
                    }
                barcode_files[bcname]['filename'] = bcname + '_rawlib.bam'
                barcode_files[bcname]['count'] += 1
                barcode_files[bcname]['bcfiles_to_merge'].append(bc_path)

    if barcodeSet:
        try:
            shutil.copy(bcList_file, barcodelist_path)
        except:
            traceback.print_exc()

    return barcodeSet, barcode_files, barcodeSet_Info
示例#25
0
 def set_result_status(status):
     try:
         if os.path.exists(primary_key_file):
             jobserver.updatestatus(primary_key_file, status, True)
             printtime("MergeTLStatus %s\tpid %d\tpk file %s started" % 
                 (status, os.getpid(), primary_key_file))
     except:
         traceback.print_exc()
示例#26
0
def doAlignStats(bam_filename):

    try:
        com = 'alignStats -i %s -p 1 -o TF -a TF.alignTable.txt -n 12' % bam_filename
        printtime("DEBUG: Calling '%s'" % com)
        os.system(com)
    except:
        printtime("ERROR: alignStats failed")
示例#27
0
文件: TLScript.py 项目: LBragg/TS
 def set_result_status(status):
     try:
         primary_key_file = os.path.join(os.getcwd(),'primary.key')
         jobserver.updatestatus(primary_key_file, status, True)
         printtime("TLStatus %s\tpid %d\tpk file %s started in %s" %
             (status, os.getpid(), primary_key_file, debugging_cwd))
     except:
         traceback.print_exc()
示例#28
0
def generate_ionstats_basecaller(unmapped_bam_filenames, ionstats_basecaller_filename, library_key, histogram_length):

    com = generate_ionstats_basecaller_cmd(unmapped_bam_filenames, ionstats_basecaller_filename, library_key, histogram_length)
    try:
        printtime("DEBUG: Calling '%s'" % com)
        subprocess.call(com, shell=True)
    except:
        printtime('Failed ionstats basecaller')
        traceback.print_exc()
示例#29
0
def generate_ionstats_alignment(ionstatsArgs, bam_filenames, ionstats_alignment_filename, ionstats_alignment_h5_filename, basecaller_json, library_key, histogram_length):

    com = generate_ionstats_alignment_cmd(ionstatsArgs, bam_filenames, ionstats_alignment_filename, ionstats_alignment_h5_filename, basecaller_json, library_key, histogram_length)
    try:
        printtime("DEBUG: Calling '%s'" % com)
        subprocess.call(com,shell=True)
    except:
        printtime('Failed ionstats alignment')
        traceback.print_exc()
示例#30
0
文件: ionstats_plots.py 项目: rb94/TS
def quality_histogram(ionstats_basecaller_filename,output_png_filename):
    
    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
        
        f = open(ionstats_basecaller_filename,'r')
        ionstats_basecaller = json.load(f);
        f.close()
    
        qv_histogram = ionstats_basecaller["qv_histogram"]
        
    
        sum_total = float(sum(qv_histogram))
        if sum_total > 0:
            percent_0_5 = 100.0 * sum(qv_histogram[0:5]) / sum_total
            percent_5_10 = 100.0 * sum(qv_histogram[5:10]) / sum_total
            percent_10_15 = 100.0 * sum(qv_histogram[10:15]) / sum_total
            percent_15_20 = 100.0 * sum(qv_histogram[15:20]) / sum_total
            percent_20 = 100.0 * sum(qv_histogram[20:]) / sum_total
        else:
            percent_0_5 = 0.0
            percent_5_10 = 0.0
            percent_10_15 = 0.0
            percent_15_20 = 0.0
            percent_20 = 0.0
    
        graph_x = [0,5,10,15,20]
        graph_y = [percent_0_5,percent_5_10,percent_10_15,percent_15_20,percent_20]
    
        max_y = max(graph_y)
        
        ticklabels = ['0-4','5-9','10-14','15-19','20+']
    
        fig = plt.figure(figsize=(4,4),dpi=100)
        ax = fig.add_subplot(111,frame_on=False,xticks=[],yticks=[],position=[.1,0.1,1,0.9])
        ax.bar(graph_x,graph_y,width=4.8, color="#2D4782",linewidth=0)
    
        for idx in range(5):
            label_bottom = ticklabels[idx]
            label_top = '%1.0f%%' % graph_y[idx]
            ax.text(idx*5 + 2.5,-max_y*0.04,label_bottom,horizontalalignment='center',verticalalignment='top',
                    fontsize=12)
            ax.text(idx*5 + 2.5,max_y*0.06+graph_y[idx],label_top,horizontalalignment='center',verticalalignment='bottom',
                    fontsize=12)
        
        ax.set_xlabel("Base Quality")
        
        ax.set_xlim(0,34.8)
        ax.set_ylim(-0.1*max_y,1.2*max_y)
        fig.patch.set_alpha(0.0)
        fig.savefig(output_png_filename)
        plt.close()

    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#31
0
def runplugins(plugins, env, level=RunLevel.DEFAULT, params={}):
    printtime("Starting plugins runlevel=%s" % level)
    params.setdefault('run_mode',
                      'pipeline')  # Plugins launched here come from pipeline
    try:
        pluginserver = xmlrpclib.ServerProxy(
            "http://%s:%d" % (PLUGINSERVER_HOST, PLUGINSERVER_PORT),
            allow_none=True)
        # call ionPlugin xmlrpc function to launch selected plugins
        # note that dependency plugins may be added to the plugins dict
        plugins, msg = call_launchPluginsXMLRPC(env['primary_key'], plugins,
                                                env['net_location'],
                                                env['username'], level, params,
                                                pluginserver)
        print msg
    except:
        traceback.print_exc()

    return plugins
示例#32
0
文件: ionstats.py 项目: tw7649116/TS
def generate_ionstats_alignment_cmd(ionstatsArgs, bam_filenames,
                                    ionstats_alignment_filename,
                                    ionstats_alignment_h5_filename,
                                    basecaller_json, library_key,
                                    histogram_length):

    try:
        if ionstatsArgs:
            com = ionstatsArgs
        else:
            com = "ionstats alignment"
            printtime(
                "ERROR: ionstats alignment command not specified, using default: 'ionstats alignment'"
            )

        com += " -i %s" % (bam_filenames[0])
        for bam_filename in bam_filenames[1:]:
            com += ",%s" % (bam_filename)
        com += " -o %s" % (ionstats_alignment_filename)
        com += " -k %s" % (library_key)
        com += " -h %d" % (int(histogram_length))
        com += " --evaluate-hp true"
        com += " --output-h5 %s" % ionstats_alignment_h5_filename

        if basecaller_json:
            block_col_offset = basecaller_json["BaseCaller"][
                'block_col_offset']
            block_row_offset = basecaller_json["BaseCaller"][
                'block_row_offset']
            block_col_size = basecaller_json["BaseCaller"]['block_col_size']
            block_row_size = basecaller_json["BaseCaller"]['block_row_size']
            subregion_col_size, subregion_row_size = generate_ionstats_subregion_dims(
                block_col_size, block_row_size)

            com += " --chip-origin %s,%s" % (block_col_offset,
                                             block_row_offset)
            com += " --chip-dim %s,%s" % (block_col_size, block_row_size)
            com += " --subregion-dim %s,%s" % (subregion_col_size,
                                               subregion_row_size)
    except:
        traceback.print_exc()
        raise
    return com
示例#33
0
文件: alignment.py 项目: kes1smmn/TS
def alignTFs(basecaller_bam_filename, bam_filename, fasta_path):

    com1 = "tmap mapall -n 12 -f %s -r %s -Y -v stage1 map4" % (
        fasta_path, basecaller_bam_filename)
    com2 = "samtools view -Sb -o %s - 2>> /dev/null" % bam_filename
    printtime("DEBUG: Calling '%s | %s':" % (com1, com2))
    p1 = subprocess.Popen(com1, stdout=subprocess.PIPE, shell=True)
    p2 = subprocess.Popen(com2, stdin=p1.stdout, shell=True)
    p2.communicate()
    p1.communicate()

    if p1.returncode != 0:
        raise subprocess.CalledProcessError(p1.returncode, com1)
    if p2.returncode != 0:
        # Assumption: samtools view only fails when there are zero reads.
        printtime(
            "Command '%s | %s' failed, presumably because there are no TF reads"
            % (com1, com2))
        raise Exception('No TF reads found')
示例#34
0
def beadfind_cmd(beadfindArgs, libKey, tfKey, pathtorawblock, SIGPROC_RESULTS,
                 block_offset_xy):
    if beadfindArgs:
        cmd = beadfindArgs  # e.g /home/user/Beadfind -xyz
    else:
        cmd = "justBeadFind"
        printtime(
            "ERROR: Beadfind command not specified, using default: 'justBeadFind'"
        )

    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --no-subdir"
    cmd += " --output-dir=%s" % (SIGPROC_RESULTS)
    # justBeadFind is currently internally deriving the block offset
    # cmd += " --block-offset %d,%d" % block_offset_xy
    cmd += " %s" % pathtorawblock

    return cmd
示例#35
0
def old_aq_length_histogram(ionstats_alignment_filename, output_png_filename,
                            aq_string, color):

    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)

        f = open(ionstats_alignment_filename, "r")
        ionstats_alignment = json.load(f)
        f.close()

        data = ionstats_alignment[aq_string]["read_length_histogram"]

        xaxis = range(len(data))
        ymax = max(data) + 10
        xlen = len(data) + 10
        xmax = len(data) - 1
        if xmax < 400:
            xmax = 400

        fig = plt.figure(figsize=(8, 4), dpi=100)
        ax = fig.add_subplot(111)

        ax.bar(
            xaxis,
            data,
            facecolor=color,
            align="center",
            linewidth=0,
            alpha=1.0,
            width=1.0,
        )
        ax.set_xlabel("Filtered %s Read Length" % aq_string)
        ax.set_ylabel("Count")
        ax.set_title("Filtered %s Read Length" % aq_string)
        ax.set_xlim(0, xmax)
        ax.set_ylim(0, ymax)
        fig.savefig(output_png_filename)
        plt.close()

    except Exception:
        printtime("Unable to generate plot %s" % output_png_filename)
        traceback.print_exc()
示例#36
0
def alignment_rate_plot(alignStats,
                        ionstats_basecaller_filename,
                        output_png_filename,
                        graph_max_x,
                        y_ticks=None):

    if not os.path.exists(alignStats):
        printtime("ERROR: %s does not exist" % alignStats)
        return

    def intWithCommas(x):
        if type(x) not in [type(0), type(0L)]:
            raise TypeError("Parameter must be an integer.")
        if x < 0:
            return '-' + intWithCommas(-x)
        result = ''
        while x >= 1000:
            x, r = divmod(x, 1000)
            result = ",%03d%s" % (r, result)
        return "%d%s" % (x, result)
示例#37
0
def merge_barcoded_basecaller_bams(BASECALLER_RESULTS, basecaller_datasets, method):

    try:
        composite_bam_filename = os.path.join(BASECALLER_RESULTS, 'rawlib.basecaller.bam')
        if not os.path.exists(composite_bam_filename):  # TODO

            bam_file_list = []
            for dataset in basecaller_datasets["datasets"]:
                print os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])
                if os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
                    bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            composite_bai_filepath = ""
            mark_duplicates = False
            blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename, composite_bai_filepath, mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" % composite_bam_filename)

    printtime("Finished basecaller barcode merging")
示例#38
0
def basecaller_cmd(basecallerArgs, SIGPROC_RESULTS, libKey, tfKey, runID,
                   BASECALLER_RESULTS, block_col_offset, block_row_offset,
                   datasets_pipeline_path, adapter):
    if basecallerArgs:
        cmd = basecallerArgs
    else:
        cmd = "BaseCaller"
        printtime(
            "ERROR: BaseCaller command not specified, using default: 'BaseCaller'"
        )

    cmd += " --input-dir=%s" % (SIGPROC_RESULTS)
    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --run-id=%s" % (runID)
    cmd += " --output-dir=%s" % (BASECALLER_RESULTS)
    cmd += " --block-offset %d,%d" % (block_col_offset, block_row_offset)
    cmd += " --datasets=%s" % (datasets_pipeline_path)
    cmd += " --trim-adapter %s" % (adapter)

    return cmd
示例#39
0
def align(libraryName, lib_path, output_dir, output_basename):
    #     Input -> output_basename.bam
    #     Output -> output_dir/output_basename.bam

    try:
        cmd = "alignmentQC.pl"
        cmd += " --logfile %s" % os.path.join(output_dir,
                                              "alignmentQC_out.txt")
        cmd += " --output-dir %s" % output_dir
        cmd += " --input %s" % lib_path
        cmd += " --genome %s" % libraryName
        cmd += " --max-plot-read-len %s" % str(int(400))
        cmd += " --out-base-name %s" % output_basename
        cmd += " --skip-alignStats"

        printtime("DEBUG: Calling '%s':" % cmd)
        ret = subprocess.call(cmd, shell=True)
        if ret != 0:
            raise RuntimeError('exit code: %d' % ret)
    except:
        raise
示例#40
0
def get_barcode_files(parent_folder, datasets_path, bcSetName):
    # try to get barcode names from datasets json, fallback on globbing for older reports
    datasetsFile = os.path.join(parent_folder,datasets_path)
    barcode_bams = []
    try:
        with open(datasetsFile, 'r') as f:
            datasets_json = json.loads(f.read())
        for dataset in datasets_json.get("datasets",[]):
            bamfile = os.path.join(parent_folder, dataset["legacy_prefix"]+'.bam')
            if os.path.exists(bamfile):
                barcode_bams.append(bamfile)
    except:
        pass  
    
    if len(barcode_bams) == 0:
        barcode_bams = glob( os.path.join(parent_folder, bcSetName+'*_rawlib.bam') )
        barcode_bams.append( os.path.join(parent_folder, 'nomatch_rawlib.bam') )    
        barcode_bams.sort()
        
    printtime("DEBUG: found %i barcodes in %s" % (len(barcode_bams), parent_folder) )
    return barcode_bams
示例#41
0
def wait_on_jobs(jobIds, jobName, status = "Processing"):
    try:
      jobserver.updatestatus(primary_key_file, status, True)
    except:
      traceback.print_exc()

    # wait for job to finish
    while len(jobIds) > 0:  
        for jid in jobIds:
            try:
                jobstatus = jobserver.jobstatus(jobId)
            except:
                traceback.print_exc()
                continue
                
            if jobstatus=='done' or jobstatus=='failed' or jobstatus=="DRMAA BUG":
                printtime("DEBUG: Job %s has ended with status %s" % (str(jid),jobstatus))
                jobIds.remove(jid)
                
        printtime("waiting for %s job(s) to finish ..." % jobName)    
        time.sleep(10)    
示例#42
0
文件: basecaller.py 项目: aidjek/TS
def merge_basecaller_bam(dirs, BASECALLER_RESULTS):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    # Iterate over datasets. Could be one for non-barcoded runs or multiple for barcoded runs
    
    for dataset in datasets_basecaller['datasets']:
        if 'basecaller_bam' not in dataset:
            continue
        
        ###############################################
        # Merge Per-barcode Unmapped BAMs             #
        ###############################################
        
        try:
            block_bam_list = [os.path.join(dir,BASECALLER_RESULTS, dataset['basecaller_bam']) for dir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bam_filename+'.bai',False)    
        except:
            printtime("ERROR: merging %s unsuccessful" % dataset['basecaller_bam'])

    ## Note! on barcoded runs, barcode files are NOT subsequently merged into one multi-barcode BAM. 

    printtime("Finished merging basecaller BAM files")
示例#43
0
def get_plugins_to_run(plugins, report_type):
    """ Sort out runtypes and runlevels of each plugin and return plugins appropriate for this analysis """
    blocklevel = False
    plugins_to_run = {}
    printtime("Get plugins to run, report type = %s" % report_type)
    for name in plugins.keys():
        plugin = plugins[name]

        # default is run on wholechip and thumbnail, but not composite
        selected = report_type in [RunType.FULLCHIP, RunType.THUMB]
        if plugin.get('runtypes'):
            selected = (report_type in plugin['runtypes'])

        if selected:
            plugin['runlevels'] = plugin.get('runlevels') if plugin.get(
                'runlevels') else [RunLevel.DEFAULT]
            printtime("Plugin %s is enabled, runlevels=%s" %
                      (plugin['name'], ','.join(plugin['runlevels'])))
            plugins_to_run[name] = plugin

            # check if have any blocklevel plugins
            if report_type == RunType.COMPOSITE and RunLevel.BLOCK in plugin[
                    'runlevels']:
                blocklevel = True
        else:
            printtime("Plugin %s (runtypes=%s) is not enabled for %s report" %
                      (plugin['name'], ','.join(plugin.get('runtypes',
                                                           '')), report_type))

    return plugins_to_run, blocklevel
示例#44
0
def calibrate(dir_recalibration, sampleBAMFile, recalibArgs, chipflow):
    try:
        if recalibArgs:
            cmd = recalibArgs
        else:
            cmd = "Calibration"

        # default parameters
        block_offset_x = chipflow["BaseCaller"]['block_col_offset']
        block_offset_y = chipflow["BaseCaller"]['block_row_offset']
        block_size_x   = chipflow["BaseCaller"]['block_col_size']
        block_size_y   = chipflow["BaseCaller"]['block_row_size']

        if "--block-offset" not in cmd:
            cmd += " --block-offset %d,%d" % (block_offset_x, block_offset_y)
        if "--block-size" not in cmd:
            cmd += " --block-size %d,%d" % (block_size_x, block_size_y)

        cmd += " -i %s" % sampleBAMFile
        cmd += " -o %s" % dir_recalibration

        printtime("DEBUG: Calling '%s':" % cmd)
        ret = subprocess.call(cmd,shell=True)
        if ret == 0:
            printtime("Calibration generated: %s" % (os.path.join(dir_recalibration,"Calibration.json")))
        else:
            raise RuntimeError('Calibration exit code: %d' % ret)
    except:
        printtime('ERROR: HP training failed')
        traceback.print_exc()
        raise
示例#45
0
def QVtable(dir_recalibration, genome_path, sampleBAMFile, xMin, xMax, xCuts,
            yMin, yMax, yCuts, flowSpan):
    '''Generates a QV table from the mapped sample reads'''
    try:
        cmd = "java -jar /usr/local/share/java/FlowspaceCalibration.jar"
        cmd += " I=%s" % sampleBAMFile
        cmd += " R=%s" % genome_path
        cmd += " O=%s" % os.path.join(dir_recalibration, 'sample.csv')
        cmd += " F=%s" % os.path.join(dir_recalibration, 'sample.flow.csv')
        cmd += " X_MIN=%d" % xMin  #X_MAX=3391 =0 Y_MAX=3791 Y_MIN=0 X_CUTS=1 Y_CUTS=1 FLOW_SPAN=520
        cmd += " X_MAX=%d" % xMax
        cmd += " X_CUTS=%d" % xCuts
        cmd += " Y_MIN=%d" % yMin
        cmd += " Y_MAX=%d" % yMax
        cmd += " Y_CUTS=%d" % yCuts
        cmd += " FLOW_SPAN=%d" % flowSpan
        cmd += " VALIDATION_STRINGENCY=SILENT NUM_THREADS=16 MAX_QUEUE_SIZE=8192 > %s 2>&1" % os.path.join(
            dir_recalibration, 'flowQVtable.log')
        printtime("DEBUG: Calling '%s':" % cmd)
        ret = subprocess.call(cmd, shell=True)
        if ret == 0:
            printtime("Finished flow QV table")
        else:
            raise RuntimeError('Flow QV table exit code: %d' % ret)
    except:
        printtime('ERROR: flow QV table failed')
        raise
示例#46
0
文件: alignment.py 项目: skner/TS
def merge_barcoded_alignment_bams(ALIGNMENT_RESULTS, basecaller_datasets,
                                  method):

    try:
        composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, 'rawlib.bam')

        bam_file_list = []
        for dataset in basecaller_datasets["datasets"]:
            bam_name = os.path.join(
                ALIGNMENT_RESULTS,
                os.path.basename(dataset['file_prefix']) + '.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)
            else:
                printtime("WARNING: exclude %s from merging into %s" %
                          (bam_name, composite_bam_filename))

        composite_bai_filename = composite_bam_filename + '.bai'
        mark_duplicates = False
        blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename,
                                        composite_bai_filename,
                                        mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" %
                  composite_bam_filename)

    printtime("Finished barcode merging of %s" % ALIGNMENT_RESULTS)
示例#47
0
def read_length_sparkline(ionstats_basecaller_filename, output_png_filename, max_length):

    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
        
        f = open(ionstats_basecaller_filename,'r')
        ionstats_basecaller = json.load(f);
        f.close()

        histogram_x = range(0,max_length,5)
        num_bins = len(histogram_x)
        histogram_y = [0] * num_bins
    
        for read_length,frequency in enumerate(ionstats_basecaller['full']['read_length_histogram']):
            current_bin = min(read_length/5,num_bins-1)
            histogram_y[current_bin] += frequency

        max_y = max(histogram_y)
        max_y = max(max_y,1)
        
        fig = plt.figure(figsize=(3,0.3),dpi=100)
        ax = fig.add_subplot(111,frame_on=False,xticks=[],yticks=[],position=[0,0,1,1])
        ax.bar(histogram_x,histogram_y,width=6.5, color="#2D4782",linewidth=0, zorder=2)
    
        for idx in range(0,max_length,50):
            label_bottom = str(idx)
            ax.text(idx,max_y*0.70,label_bottom,horizontalalignment='center',verticalalignment='center',
                    fontsize=8, zorder=1)
            ax.axvline(x=idx,color='#D0D0D0',ymax=0.5, zorder=0)
            ax.axvline(x=idx,color='#D0D0D0',ymin=0.9, zorder=0)
    
        ax.set_ylim(0,max_y)
        ax.set_xlim(-10,max_length)
        fig.patch.set_alpha(0.0)
        fig.savefig(output_png_filename)
        plt.close() # Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory.

    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#48
0
def generate_raw_data_traces(libKey,
                             tfKey,
                             floworder,
                             SIGPROC_RESULTS,
                             plot_output_dir=os.getcwd()):
    #
    # Generate Raw Data Traces for lib and TF keys          #
    #
    printtime("Generate Raw Data Traces for lib and TF keys " +
              "(iontrace_Test_Fragment.png, iontrace_Library.png) " +
              "and raw_peak_signal file")

    tfRawPath = os.path.join(SIGPROC_RESULTS, "avgNukeTrace_%s.txt" % tfKey)
    libRawPath = os.path.join(SIGPROC_RESULTS, "avgNukeTrace_%s.txt" % libKey)
    peakOut = "raw_peak_signal"

    if os.path.exists(tfRawPath):
        try:
            kp = plotKey.KeyPlot(tfKey, floworder, "Test Fragment")
            kp.parse(tfRawPath)
            kp.dump_max(os.path.join(plot_output_dir, peakOut))
            kp.plot(outdir=plot_output_dir)
        except Exception:
            printtime("TF key graph didn't render")
            traceback.print_exc()
    else:
        printtime("ERROR: %s is missing" % tfRawPath)

    if os.path.exists(libRawPath):
        try:
            kp = plotKey.KeyPlot(libKey, floworder, "Library")
            kp.parse(libRawPath)
            kp.dump_max(os.path.join(plot_output_dir, peakOut))
            kp.plot(outdir=plot_output_dir)
        except Exception:
            printtime("Lib key graph didn't render")
            traceback.print_exc()
    else:
        printtime("ERROR: %s is missing" % libRawPath)
示例#49
0
文件: alignment.py 项目: dkeren/TS
def merge_alignment_bigdata(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, mark_duplicates):

    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_json = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    for dataset in datasets_json['datasets']:
        # Merge BAMs
        try:
            block_bam_list = [os.path.join(dir,ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam') for dir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)
        except:
            printtime("ERROR: merging %s unsuccessful" % (dataset['file_prefix']+'.bam'))
示例#50
0
def wait_on_jobs(jobIds, jobName, status="Processing", max_running_jobs=0):
    try:
        jobserver.updatestatus(primary_key_file, status, True)
    except Exception:
        traceback.print_exc()

    # wait for job to finish
    while len(jobIds) > max_running_jobs:
        printtime("waiting for %s job(s) to finish ..." % jobName)
        for jobid in jobIds:
            try:
                jobstatus = jobserver.jobstatus(jobid)
            except Exception:
                traceback.print_exc()
                continue

            if jobstatus == "done" or jobstatus == "failed" or jobstatus == "DRMAA BUG":
                printtime("DEBUG: Job %s has ended with status %s" %
                          (str(jobid), jobstatus))
                jobIds.remove(jobid)

        time.sleep(20)
示例#51
0
文件: ionstats.py 项目: zjwang6/TS
def generate_ionstats_subregion_dims(block_col_size, block_row_size):

    try:
        subregion_col_size = 92
        subregion_row_size = 74
        if block_col_size == 1200 and block_row_size == 800:  # Thumbnail
            subregion_col_size = 50
            subregion_row_size = 50
        elif (block_col_size == 30912 and block_row_size == 21296) or (
                block_col_size == 2576 and block_row_size == 2662):  # P2
            subregion_col_size = 368
            subregion_row_size = 296
        elif (block_col_size == 15456 and block_row_size == 10656) or (
                block_col_size == 1288 and block_row_size == 1332):  # P1
            subregion_col_size = 184
            subregion_row_size = 148
        elif (block_col_size == 7680
              and block_row_size == 5312) or (block_col_size == 640
                                              and block_row_size == 664):  # P0
            subregion_col_size = 80
            subregion_row_size = 83
        elif block_col_size == 3392 and block_row_size == 3792:  # 318
            subregion_col_size = 53
            subregion_row_size = 48
        elif block_col_size == 3392 and block_row_size == 2120:  # 316v2
            subregion_col_size = 53
            subregion_row_size = 53
        elif block_col_size == 2736 and block_row_size == 2640:  # 316
            subregion_col_size = 48
            subregion_row_size = 48
        elif block_col_size == 1280 and block_row_size == 1152:  # 314
            subregion_col_size = 40
            subregion_row_size = 48
        return (subregion_col_size, subregion_row_size)
    except Exception:
        printtime("ERROR: Failed to generate subregion dims from input %s,%s" %
                  (block_col_size, block_row_size))
        traceback.print_exc()
示例#52
0
def merge_inlinecontrol_json(dirs, BASECALLER_RESULTS):
    printtime("Merging inline_control_stats.json files")
    print(dirs)
    try:
        inlinecontrolfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            inlinecontroljson = os.path.join(subdir,
                                             "inline_control_stats.json")
            if os.path.exists(inlinecontroljson):
                inlinecontrolfiles.append(subdir)
            else:
                printtime(
                    "Warning: Merging inline_control_stats.json files: skipped %s"
                    % inlinecontroljson)

        merge(inlinecontrolfiles, BASECALLER_RESULTS)
    except Exception:
        traceback.print_exc()
        printtime("Merging inline_control_stats.json files failed")

    printtime("Finished merging inline control stats")
示例#53
0
def merge_basecaller_json(dirs, BASECALLER_RESULTS):

    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, 'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
示例#54
0
def mergeAvgNukeTraces(dirs, SIGPROC_RESULTS, key, beads, from_rawdata=False):

    #
    # Merging avgNukeTrace_*.txt files            #
    #
    printtime("Merging avgNukeTrace_*.txt files")

    try:
        output_trace_file = os.path.join(SIGPROC_RESULTS,
                                         "avgNukeTrace_%s.txt" % key)
        sumAvgNukeTraceData = None
        sumWells = 0
        config = ConfigParser.RawConfigParser()

        for subdir in dirs:
            if from_rawdata:
                # from rawdata: onboard_results/sigproc_results/block_X0_Y0/ in rawdata
                input_basedir = os.path.join(SIGPROC_RESULTS, subdir)
            else:
                # original format: block_X0_Y0/sigproc_results/
                #                  from  results from RUO pipeline
                input_basedir = os.path.join(subdir, SIGPROC_RESULTS)

            try:
                input_trace_file = os.path.join(input_basedir,
                                                "avgNukeTrace_%s.txt" % key)
                if os.path.exists(input_trace_file):
                    config.read(os.path.join(input_basedir, "bfmask.stats"))
                    wells = config.getint("global", beads)
                    labels = numpy.genfromtxt(input_trace_file,
                                              delimiter=" ",
                                              usecols=[0],
                                              dtype=str)
                    currentAvgNukeTraceData = numpy.genfromtxt(
                        input_trace_file, delimiter=" ")[:, 1:]
                else:
                    continue
            except Exception:
                traceback.print_exc()
                continue

            if sumAvgNukeTraceData is None:
                sumAvgNukeTraceData = currentAvgNukeTraceData * wells
            else:
                sumAvgNukeTraceData += currentAvgNukeTraceData * wells
            sumWells += wells

        AvgNukeTraceData = sumAvgNukeTraceData / sumWells
        AvgNukeTraceTable = numpy.column_stack(
            (labels, AvgNukeTraceData.astype("|S10")))
        numpy.savetxt(output_trace_file, AvgNukeTraceTable, fmt="%s")

    except Exception:
        traceback.print_exc()
        printtime("ERROR: Merging %s failed" % output_trace_file)

    printtime("Finished mergeAvgNukeTraces")
示例#55
0
def buildTFReference(tfreffasta_filename, analysis_dir, tfkey):
    '''
    Build the DefaultTFs.fasta from DefaultTFs.conf
    '''

    DefaultTFconfPath = os.path.join(analysis_dir, 'DefaultTFs.conf')
    if not os.path.exists(DefaultTFconfPath):
        if not os.path.exists('/opt/ion/config/DefaultTFs.conf'):
            printtime(
                'ERROR: could not locate DefaultTFs.conf (tried %s and /opt/ion/config/DefaultTFs.conf)'
                % DefaultTFconfPath)
            raise IOError
        DefaultTFconfPath = '/opt/ion/config/DefaultTFs.conf'

    printtime('TFPipeline: Using TF sequences from %s' % DefaultTFconfPath)
    num_tfs = 0
    try:
        confFile = open(DefaultTFconfPath, 'r')
        fastaFile = open(tfreffasta_filename, 'w')

        for confLine in confFile.readlines():
            if len(confLine) == 0:
                continue
            if confLine[0] == '#':
                continue
            confEntries = confLine.split(',')
            if len(confEntries) != 3:
                continue
            if confEntries[1] != tfkey:
                continue

            fastaFile.write('>%s\n' % confEntries[0])
            fastaFile.write('%s\n' % str(confEntries[2]).strip())
            num_tfs += 1

        confFile.close()
        fastaFile.close()

    except Exception as e:
        printtime("ERROR: failed convert %s into %s" %
                  (DefaultTFconfPath, tfreffasta_filename))
        raise e

    if num_tfs == 0:
        printtime("No suitable TFs with key %s found in %s" %
                  (tfkey, DefaultTFconfPath))
        raise NoTFDataException('No TF reference sequences')
示例#56
0
文件: sigproc.py 项目: bharatpatel/TS
def beadfind(beadfindArgs, libKey, tfKey, pathtorawblock, SIGPROC_RESULTS):

    if beadfindArgs:
        cmd = beadfindArgs  # e.g /home/user/Beadfind -xyz
    else:
        cmd = "justBeadFind"
        printtime(
            "ERROR: Beadfind command not specified, using default: 'justBeadFind'"
        )

    cmd += " --librarykey=%s" % (libKey)
    cmd += " --tfkey=%s" % (tfKey)
    cmd += " --no-subdir"
    cmd += " --output-dir=%s" % (SIGPROC_RESULTS)
    cmd += " %s" % pathtorawblock

    printtime("Beadfind command: " + cmd)
    proc = subprocess.Popen(shlex.split(cmd.encode('utf8')),
                            shell=False,
                            stderr=subprocess.PIPE,
                            stdout=subprocess.PIPE)
    stdout_value, stderr_value = proc.communicate()
    status = proc.returncode
    sys.stdout.write("%s" % stdout_value)
    sys.stderr.write("%s" % stderr_value)

    # Ion Reporter
    try:
        sigproc_log_path = os.path.join(SIGPROC_RESULTS, 'sigproc.log')
        with open(sigproc_log_path, 'a') as f:
            if stdout_value: f.write(stdout_value)
            if stderr_value: f.write(stderr_value)
    except IOError:
        traceback.print_exc()

    return status
示例#57
0
def mergeRawPeakSignals(dirs):

    ###############################################
    # Merge raw_peak_signal files                 #
    ###############################################
    printtime("Merging raw_peak_signal files")

    try:
        raw_peak_signal_files = []
        for subdir in dirs:
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging raw_peak_signal files"):
                continue
            raw_peak_signal_file = os.path.join(subdir,'raw_peak_signal')
            if os.path.exists(raw_peak_signal_file):
                raw_peak_signal_files.append(raw_peak_signal_file)
            else:
                printtime("ERROR: Merging raw_peak_signal files: skipped %s" % raw_peak_signal_file)
        composite_raw_peak_signal_file = "raw_peak_signal"
        blockprocessing.merge_raw_key_signals(raw_peak_signal_files, composite_raw_peak_signal_file)
    except:
        printtime("Merging raw_peak_signal files failed")

    printtime("Finished mergeRawPeakSignals")
示例#58
0
文件: alignment.py 项目: skner/TS
def align(referenceName, lib_path, bidirectional, mark_duplicates, realign,
          skip_sorting, aligner_opts_extra, logfile, output_dir,
          output_basename):
    #     Input -> output_basename.bam
    #     Output -> output_dir/output_basename.bam

    try:
        cmd = "alignmentQC.py"
        cmd += " --logfile %s" % logfile
        cmd += " --output-dir %s" % output_dir
        cmd += " --input %s" % lib_path
        cmd += " --genome %s" % referenceName
        #cmd += " --max-plot-read-len %s" % str(int(800))
        cmd += " --out-base-name %s" % output_basename
        #cmd += " --skip-alignStats"
        #cmd += " --threads 8"
        #cmd += " --server-key 13"

        if realign:
            cmd += " --realign"
        if skip_sorting:
            cmd += " --skip-sorting"
        if bidirectional:
            cmd += ' --bidirectional'
        if aligner_opts_extra:
            cmd += ' --aligner-opts-extra "%s"' % aligner_opts_extra
        if mark_duplicates:
            cmd += ' --mark-duplicates'

        printtime("DEBUG: Calling '%s':" % cmd)
        ret = subprocess.call(cmd, shell=True)
        if ret != 0:
            printtime("ERROR: alignmentQC.py failed, return code: %d" % ret)
            raise RuntimeError('exit code: %d' % ret)
    except:
        raise
示例#59
0
def read_length_histogram(ionstats_basecaller_filename, output_png_filename, max_length):

    try:
        printtime("DEBUG: Generating plot %s" % output_png_filename)
        
        f = open(ionstats_basecaller_filename,'r')
        ionstats_basecaller = json.load(f);
        f.close()

        histogram_x = range(0,max_length,1)
        num_bins = len(histogram_x)
        histogram_y = [0] * num_bins
        
        for read_length,frequency in enumerate(ionstats_basecaller['full']['read_length_histogram']):
            current_bin = min(read_length,num_bins-1)
            if read_length < num_bins:
                histogram_y[current_bin] += frequency
        
        max_y = max(histogram_y)
        max_y = max(max_y,1)
        
        fig = plt.figure(figsize=(4,3.5),dpi=100)
        ax = fig.add_subplot(111,frame_on=False,yticks=[],position=[0,0.15,1,0.88])
        ax.bar(histogram_x,histogram_y,width=2.5, color="#2D4782",linewidth=0, zorder=2)
    
    
        ax.set_ylim(0,1.2*max_y)
        ax.set_xlim(-5,max_length+15)
        ax.set_xlabel("Read Length")
        fig.patch.set_alpha(0.0)
        fig.savefig(output_png_filename)
        plt.close()

    except:
        printtime('Unable to generate plot %s' % output_png_filename)
        traceback.print_exc()
示例#60
0
    def update_bfmask_artifacts(bfmaskPath, bfmaskstatspath, outputdir, plot_title):

        printtime("Make Bead Density Plots")
        try:
            beadDensityPlot.genHeatmap(bfmaskPath, bfmaskstatspath, outputdir, plot_title)
        except IOError as err:
            printtime("Bead Density Plot file error: %s" % err)
        except Exception as err:
            printtime("Bead Density Plot generation failure: %s" % err)
            traceback.print_exc()