Пример #1
0
def merge_basecaller_json(dirs, BASECALLER_RESULTS):

    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, "BaseCaller.json")
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime(
                    "ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson
                )

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except Exception:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Пример #2
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Пример #3
0
def merge_basecaller_json(dirs, BASECALLER_RESULTS):

    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, 'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Пример #4
0
def mergeBasecallerResults(dirs, QualityPath, merged_bead_mask_path, floworder, libsff, tfsff, BASECALLER_RESULTS):
    ############################################
    # Merge individual quality.summary files #
    ############################################
    printtime("Merging individual quality.summary files")

    config_out = ConfigParser.RawConfigParser()
    config_out.optionxform = str # don't convert to lowercase
    config_out.add_section('global')

    numberkeys = ['Number of 50BP Reads',
                  'Number of 100BP Reads',
                  'Number of 150BP Reads',
                  'Number of Reads at Q0',
                  'Number of Bases at Q0',
                  'Number of 50BP Reads at Q0',
                  'Number of 100BP Reads at Q0',
                  'Number of 150BP Reads at Q0',
                  'Number of Reads at Q17',
                  'Number of Bases at Q17',
                  'Number of 50BP Reads at Q17',
                  'Number of 150BP Reads at Q17',
                  'Number of 100BP Reads at Q17',
                  'Number of Reads at Q20',
                  'Number of Bases at Q20',
                  'Number of 50BP Reads at Q20',
                  'Number of 100BP Reads at Q20',
                  'Number of 150BP Reads at Q20']

    maxkeys = ['Max Read Length at Q0',
               'Max Read Length at Q17',
               'Max Read Length at Q20']

    meankeys = ['System SNR',
                'Mean Read Length at Q0',
                'Mean Read Length at Q17',
                'Mean Read Length at Q20']

    config_in = MyConfigParser()
    config_in.optionxform = str # don't convert to lowercase
    doinit = True
    for i,subdir in enumerate(dirs):
        if isbadblock(subdir, "Merging quality.summary"):
            continue
        summaryfile=os.path.join(BASECALLER_RESULTS, subdir, 'quality.summary')
        if os.path.exists(summaryfile):
            printtime("INFO: process %s" % summaryfile)
            config_in.read(summaryfile)
            for key in numberkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, int(value_in) + int(value_out))
            for key in maxkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, max(int(value_in),int(value_out)))
            for key in meankeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, float(value_out)+float(value_in)/len(dirs))
            doinit = False
        else:
            printtime("ERROR: skipped %s" % summaryfile)

    with open(QualityPath, 'wb') as configfile:
        config_out.write(configfile)

    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")

    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)

    except:
        printtime("ERROR: Merging TFMapper metrics failed")


    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ########################################
    # Merge individual block SFF files     #
    ########################################
    printtime("Merging Library SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawlib.sff'
        cmd = cmd + ' -o %s ' % libsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Library SFF files"):
                continue
            rawlibsff = os.path.join(subdir,'rawlib.sff')
            if os.path.exists(rawlibsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawlibsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (library)")

    printtime("Merging Test Fragment SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawtf.sff'
        cmd = cmd + ' -o %s ' % tfsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Test Fragment SFF files"):
                continue
            rawtfsff = os.path.join(subdir,'rawtf.sff')
            if os.path.exists(rawtfsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawtfsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (test fragments)")
Пример #5
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")
Пример #6
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs) == 96:
            composite_return_code = 96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir, "blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, "r") as f:
                        text = f.read()
                        if "Basecaller=0" in text:
                            composite_return_code -= 1
                        else:
                            with open(os.path.join(subdir, "sigproc_results", "analysis_return_code.txt"), "r") as g:
                                return_code_text = g.read()
                                if return_code_text == "3" and subdir in [
                                    "block_X0_Y0",
                                    "block_X14168_Y0",
                                    "block_X0_Y9324",
                                    "block_X14168_Y9324",
                                ]:
                                    printtime("INFO: suppress non-critical error in %s" % subdir)
                                    composite_return_code -= 1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS, "composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, "a")
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()

    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, "BaseCaller.json")
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Пример #7
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs) == 96:
            composite_return_code = 96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(
                    subdir, "blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code -= 1
                        else:
                            with open(
                                    os.path.join(subdir, "sigproc_results",
                                                 "analysis_return_code.txt"),
                                    'r') as g:
                                return_code_text = g.read()
                                if return_code_text == "3" and subdir in [
                                        'block_X0_Y0', 'block_X14168_Y0',
                                        'block_X0_Y9324', 'block_X14168_Y9324'
                                ]:
                                    printtime(
                                        "INFO: suppress non-critical error in %s"
                                        % subdir)
                                    composite_return_code -= 1

            composite_return_code_file = os.path.join(
                BASECALLER_RESULTS, "composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" %
                          composite_return_code_file)
    except:
        traceback.print_exc()

    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, 'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" %
                          basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")