示例#1
0
文件: combineReports.py 项目: rb94/TS
def barcode_report_stats(barcode_names):
    CA_barcodes_json = []
    ionstats_file_list = []
    printtime("DEBUG: creating CA_barcode_summary.json")

    for bcname in sorted(barcode_names):
        ionstats_file = bcname + '_rawlib.ionstats_alignment.json'
        barcode_json = {"barcode_name": bcname, "AQ7_num_bases":0, "full_num_reads":0, "AQ7_mean_read_length":0}
        try:
            stats = json.load(open(ionstats_file))
            for key in stats.keys():
                if key in ['AQ7', 'AQ10', 'AQ17', 'AQ20', 'AQ30', 'AQ47', 'full', 'aligned']:
                    barcode_json.update({
                        key+ "_max_read_length": stats[key].get("max_read_length"),
                        key+ "_mean_read_length": stats[key].get("mean_read_length"),
                        key+ "_num_bases": stats[key].get("num_bases"),
                        key+ "_num_reads": stats[key].get("num_reads")
                    })
            ionstats_file_list.append(ionstats_file)
        except:
            printtime("DEBUG: error reading ionstats from %s" % ionstats_file)
            traceback.print_exc()

        if bcname == 'nomatch':
            CA_barcodes_json.insert(0, barcode_json)
        else:
            CA_barcodes_json.append(barcode_json)

    with open('CA_barcode_summary.json','w') as f:
        f.write(json.dumps(CA_barcodes_json, indent=2))
    
    # generate merged ionstats_alignment.json
    if not os.path.exists('ionstats_alignment.json'):
        ionstats.reduce_stats(ionstats_file_list,'ionstats_alignment.json')
示例#2
0
def mergeBlocks(BASECALLER_RESULTS, dirs, floworder):

    ionstats_tf_filename = os.path.join(BASECALLER_RESULTS, "ionstats_tf.json")
    tfstatsjson_path = os.path.join(BASECALLER_RESULTS, "TFStats.json")
    composite_filename_list = [os.path.join(BASECALLER_RESULTS, dir, "ionstats_tf.json") for dir in dirs]
    composite_filename_list = [filename for filename in composite_filename_list if os.path.exists(filename)]

    ionstats.reduce_stats(composite_filename_list, ionstats_tf_filename)

    ionstats_plots.tf_length_histograms(ionstats_tf_filename, ".")

    ionstats.generate_legacy_tf_files(ionstats_tf_filename, tfstatsjson_path)
示例#3
0
def barcode_report_stats(barcode_names):
    CA_barcodes_json = []
    ionstats_file_list = []
    printtime("DEBUG: creating CA_barcode_summary.json")

    for bcname in sorted(barcode_names):
        ionstats_file = bcname + "_rawlib.ionstats_alignment.json"
        barcode_json = {
            "barcode_name": bcname,
            "AQ7_num_bases": 0,
            "full_num_reads": 0,
            "AQ7_mean_read_length": 0,
        }
        try:
            stats = json.load(open(ionstats_file))
            for key in list(stats.keys()):
                if key in [
                        "AQ7",
                        "AQ10",
                        "AQ17",
                        "AQ20",
                        "AQ30",
                        "AQ47",
                        "full",
                        "aligned",
                ]:
                    barcode_json.update({
                        key + "_max_read_length":
                        stats[key].get("max_read_length"),
                        key + "_mean_read_length":
                        stats[key].get("mean_read_length"),
                        key + "_num_bases":
                        stats[key].get("num_bases"),
                        key + "_num_reads":
                        stats[key].get("num_reads"),
                    })
            ionstats_file_list.append(ionstats_file)
        except Exception:
            printtime("DEBUG: error reading ionstats from %s" % ionstats_file)
            traceback.print_exc()

        if bcname == "nomatch":
            CA_barcodes_json.insert(0, barcode_json)
        else:
            CA_barcodes_json.append(barcode_json)

    with open("CA_barcode_summary.json", "w") as f:
        f.write(json.dumps(CA_barcodes_json, indent=2))

    # generate merged ionstats_alignment.json
    if not os.path.exists("ionstats_alignment.json"):
        ionstats.reduce_stats(ionstats_file_list, "ionstats_alignment.json")
示例#4
0
def barcode_report_stats(bcfile_names):
    CA_barcodes_json = []
    ionstats_file_list = []
    printtime("DEBUG: creating CA_barcode_summary.json")

    for bcname in bcfile_names:
        barcode_name = bcname.split('_rawlib.bam')[0]
        ionstats_file = bcname.split('.bam')[0] + '.ionstats_alignment.json'
        barcode_json = {
            "barcode_name": barcode_name,
            "AQ7_num_bases": 0,
            "full_num_reads": 0,
            "AQ7_mean_read_length": 0
        }
        try:
            stats = json.load(open(ionstats_file))
            for key in stats.keys():
                if key in [
                        'AQ7', 'AQ10', 'AQ17', 'AQ20', 'AQ30', 'AQ47', 'full',
                        'aligned'
                ]:
                    barcode_json.update({
                        key + "_max_read_length":
                        stats[key].get("max_read_length"),
                        key + "_mean_read_length":
                        stats[key].get("mean_read_length"),
                        key + "_num_bases":
                        stats[key].get("num_bases"),
                        key + "_num_reads":
                        stats[key].get("num_reads")
                    })
            ionstats_file_list.append(ionstats_file)
        except:
            printtime("DEBUG: error reading ionstats from %s" % ionstats_file)
            traceback.print_exc()

        if barcode_name == 'nomatch':
            CA_barcodes_json.insert(0, barcode_json)
        else:
            CA_barcodes_json.append(barcode_json)

    with open('CA_barcode_summary.json', 'w') as f:
        f.write(json.dumps(CA_barcodes_json, indent=2))

    # generate merged ionstats_alignment.json
    if not os.path.exists('ionstats_alignment.json'):
        ionstats.reduce_stats(ionstats_file_list, 'ionstats_alignment.json')
示例#5
0
def mergeBlocks(BASECALLER_RESULTS, dirs, floworder):

    ionstats_tf_filename = os.path.join(BASECALLER_RESULTS, "ionstats_tf.json")
    tfstatsjson_path = os.path.join(BASECALLER_RESULTS, "TFStats.json")
    composite_filename_list = [
        os.path.join(BASECALLER_RESULTS, dir, "ionstats_tf.json")
        for dir in dirs
    ]
    composite_filename_list = [
        filename for filename in composite_filename_list
        if os.path.exists(filename)
    ]

    ionstats.reduce_stats(composite_filename_list, ionstats_tf_filename)

    ionstats_plots.tf_length_histograms(ionstats_tf_filename, '.')

    ionstats.generate_legacy_tf_files(ionstats_tf_filename, tfstatsjson_path)
示例#6
0
   try:            
       # Merge ionstats_basecaller files from individual barcodes/dataset
       BASECALLER_RESULTS = 'basecaller_results'
       ionstats_file = 'ionstats_basecaller.json'
       file_list = []
       for filepath in args.files:
           ionstats_path = os.path.join(os.path.dirname(filepath), BASECALLER_RESULTS, ionstats_file)
           ionstats_path_CA = os.path.join(os.path.dirname(filepath), ionstats_file)                
           if os.path.exists(ionstats_path):
               file_list.append(ionstats_path)
           elif os.path.exists(ionstats_path_CA):
               file_list.append(ionstats_path_CA)
           else:
               raise Exception('')
               
       ionstats.reduce_stats(file_list, ionstats_file)
       
       # Make alignment_rate_plot.png        
       stats = json.load(open(ionstats_file))
       l = stats['full']['max_read_length']        
       graph_max_x = int(round(l + 49, -2)) 
       
       ionstats_plots.alignment_rate_plot(
           'alignStats_err.json',
           'ionstats_basecaller.json',
           'alignment_rate_plot.png', int(graph_max_x))
       print("Ionstats plot created successfully")            
   except:            
       print("ERROR: Failed to generate alignment rate plot")
 
   try:
示例#7
0
文件: basecaller.py 项目: aidjek/TS
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")
示例#8
0
文件: basecaller.py 项目: aidjek/TS
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows):

    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")

    if not os.path.exists(datasets_basecaller_path):
        printtime("ERROR: %s does not exist" % datasets_basecaller_path)
        raise Exception("ERROR: %s does not exist" % datasets_basecaller_path)
    
    datasets_basecaller = {}
    try:
        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % datasets_basecaller_path)
        raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path)

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    quality_file_list = []
    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue
                
        # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM
        ionstats.generate_ionstats_basecaller(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                graph_max_x)
        
        # Plot read length sparkline
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)
        
        quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))
        
    # Merge ionstats_basecaller files from individual barcodes/dataset
    ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))

    # Generate legacy stats file: quality.summary
    ionstats.generate_legacy_basecaller_files(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,''))

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))

    printtime("Finished basecaller post processing")
示例#9
0
文件: alignment.py 项目: skner/TS
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                   basecaller_datasets):

    # Merge *ionstats_alignment.json files across blocks

    # DEBUG: check if merging is commutative

    try:
        # DEBUG
        composite_filename_list = []
        composite_h5_filename_list = []

        for dataset in basecaller_datasets["datasets"]:

            # filter out based on flag
            keep_dataset = False
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get(
                        'filtered', False):
                    keep_dataset = True
            if not keep_dataset:
                printtime("INFO: filter out %s" % rg_name)
                continue

            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group][
                'reference']

            if reference:
                ionstats_folder = ALIGNMENT_RESULTS
                ionstats_file = 'ionstats_alignment.json'
            else:
                ionstats_folder = BASECALLER_RESULTS
                ionstats_file = 'ionstats_basecaller.json'

            block_filename_list = [
                os.path.join(dir, ionstats_folder,
                             dataset['file_prefix'] + '.' + ionstats_file)
                for dir in dirs
            ]
            block_filename_list = [
                filename for filename in block_filename_list
                if os.path.exists(filename)
            ]  # TODO, remove this check and provide list with valid blocks
            composite_filename = os.path.join(
                ionstats_folder, dataset['file_prefix'] +
                '.composite_allblocks_' + ionstats_file)
            ionstats.reduce_stats(block_filename_list, composite_filename)
            composite_filename_list.append(composite_filename)

            if reference:
                block_h5_filename_list = [
                    os.path.join(
                        dir, ALIGNMENT_RESULTS,
                        dataset['file_prefix'] + '.ionstats_error_summary.h5')
                    for dir in dirs
                ]
                block_h5_filename_list = [
                    filename for filename in block_h5_filename_list
                    if os.path.exists(filename)
                ]  # TODO, remove this check and provide list with valid blocks
                composite_h5_filename = os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_error_summary.h5')
                ionstats.reduce_stats_h5(block_h5_filename_list,
                                         composite_h5_filename)
                composite_h5_filename_list.append(composite_h5_filename)

        block_filename_list = [
            os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_alignment.json')
            for dir in dirs
        ]
        block_filename_list = [
            filename for filename in block_filename_list
            if os.path.exists(filename)
        ]
        composite_filename = os.path.join(
            ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json')
        ionstats.reduce_stats(block_filename_list, composite_filename)

        block_h5_filename_list = [
            os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')
            for dir in dirs
        ]
        block_h5_filename_list = [
            filename for filename in block_h5_filename_list
            if os.path.exists(filename)
        ]
        composite_filename = os.path.join(
            ALIGNMENT_RESULTS,
            'ionstats_error_summary.h5')  # composite_allblocks
        if len(block_h5_filename_list):
            ionstats.reduce_stats_h5(block_h5_filename_list,
                                     composite_filename)

        # DEBUG: this is used to check if merging is commutative, the length check is necessary in case  all datasets are 'filtered' (e.g.)
        if len(composite_filename_list) > 0:
            ionstats.reduce_stats(
                composite_filename_list,
                os.path.join(ALIGNMENT_RESULTS,
                             'composite_allbarcodes_ionstats_alignment.json'))
        if len(composite_h5_filename_list) > 0:
            ionstats.reduce_stats_h5(
                composite_h5_filename_list,
                os.path.join(
                    ALIGNMENT_RESULTS,
                    'composite_allbarcodes_ionstats_error_summary.h5'))

    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()
示例#10
0
文件: alignment.py 项目: skner/TS
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                    basecaller_meta_information, basecaller_datasets,
                    graph_max_x, activate_barcode_filter, evaluate_hp):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []

    ionstats_alignment_file_list = []
    if evaluate_hp:
        ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get(
                    'filtered', False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(
                os.path.join(ALIGNMENT_RESULTS,
                             dataset['file_prefix'] + '.bam'))

            ionstats.generate_ionstats_alignment(
                [
                    os.path.join(ALIGNMENT_RESULTS,
                                 dataset['file_prefix'] + '.bam')
                ],
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'),
                os.path.join(
                    ALIGNMENT_RESULTS, dataset['file_prefix'] +
                    '.ionstats_error_summary.h5') if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

            ionstats_alignment_file_list.append(
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'))
            if evaluate_hp:
                ionstats_alignment_h5_file_list.append(
                    os.path.join(
                        ALIGNMENT_RESULTS,
                        dataset['file_prefix'] + '.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'),
                graph_max_x)

            ionstats_basecaller_file_list.append(
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'))

    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_alignment_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')
                if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_basecaller_file_list,
            os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS,
                             'ionstats_tmp_basecaller.json'), graph_max_x)
        except:
            pass

    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats(
            ionstatslist,
            os.path.join(BASECALLER_RESULTS,
                         'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats(
            reversed(ionstatslist),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'))
    if evaluate_hp and len(ionstats_alignment_h5_file_list
                           ) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(
            ionstats_alignment_h5_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'))
    '''
示例#11
0
文件: alignment.py 项目: dkeren/TS
def merge_alignment_stats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return



    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800




    ########################################################
    # Merge ionstats_alignment.json
    # First across blocks, then across barcoded
    ########################################################

    try:
        composite_filename_list = []
        for dataset in datasets_basecaller["datasets"]:
            composite_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')
            barcode_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_alignment.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()

    # Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')



    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)
示例#12
0
文件: alignment.py 项目: dkeren/TS
def alignment_post_processing(
        libraryName,
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800



    alignment_file_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        ionstats.generate_ionstats_alignment(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                graph_max_x)
        ionstats2alignstats(libraryName,
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary'))

        alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))

    # In Progress: merge ionstats alignment results
    ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))    
    ionstats2alignstats(libraryName,
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            os.path.join(ALIGNMENT_RESULTS,'alignment.summary'))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)

    # Generate alignment_barcode_summary.csv
    #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    #makeAlignGraphs()

    # In Progress: Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
示例#13
0
            BASECALLER_RESULTS = 'basecaller_results'
            ionstats_file = 'ionstats_basecaller.json'
            file_list = []
            for filepath in args.files:
                ionstats_path = os.path.join(os.path.dirname(filepath),
                                             BASECALLER_RESULTS, ionstats_file)
                ionstats_path_CA = os.path.join(os.path.dirname(filepath),
                                                ionstats_file)
                if os.path.exists(ionstats_path):
                    file_list.append(ionstats_path)
                elif os.path.exists(ionstats_path_CA):
                    file_list.append(ionstats_path_CA)
                else:
                    raise Exception('')

            ionstats.reduce_stats(file_list, ionstats_file)

            # Make alignment_rate_plot.png
            stats = json.load(open(ionstats_file))
            l = stats['full']['max_read_length']
            graph_max_x = int(round(l + 49, -2))

            ionstats_plots.alignment_rate_plot('alignStats_err.json',
                                               'ionstats_basecaller.json',
                                               'alignment_rate_plot.png',
                                               int(graph_max_x))
            print("Ionstats plot created successfully")
        except:
            print("ERROR: Failed to generate alignment rate plot")

        try:
示例#14
0
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets):

    # Merge *ionstats_alignment.json files across blocks

    # DEBUG: check if merging is commutative

    try:
        # DEBUG
        composite_filename_list = []
        composite_h5_filename_list = []

        for dataset in basecaller_datasets["datasets"]:

            # filter out based on flag
            keep_dataset = False
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                    keep_dataset = True
            if not keep_dataset:
                printtime("INFO: filter out %s" % rg_name)
                continue

            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group]['reference']

            if reference:
                ionstats_folder = ALIGNMENT_RESULTS
                ionstats_file = 'ionstats_alignment.json'
            else:
                ionstats_folder = BASECALLER_RESULTS
                ionstats_file = 'ionstats_basecaller.json'

            block_filename_list = [os.path.join(dir,ionstats_folder,dataset['file_prefix']+'.'+ionstats_file) for dir in dirs]
            block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks
            composite_filename = os.path.join(ionstats_folder, dataset['file_prefix']+'.composite_allblocks_'+ionstats_file)
            ionstats.reduce_stats(block_filename_list, composite_filename)
            composite_filename_list.append(composite_filename)

            if reference:
                block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_error_summary.h5') for dir in dirs]
                block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)]  # TODO, remove this check and provide list with valid blocks
                composite_h5_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')
                ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename)
                composite_h5_filename_list.append(composite_h5_filename)


        block_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_alignment.json') for dir in dirs]
        block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)]
        composite_filename = os.path.join(ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json')
        ionstats.reduce_stats(block_filename_list, composite_filename)

        block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_error_summary.h5') for dir in dirs]
        block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)]
        composite_filename = os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks
        if len(block_h5_filename_list):
            ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename)

        # DEBUG: this is used to check if merging is commutative, the length check is necessary in case  all datasets are 'filtered' (e.g.)
        if len(composite_filename_list) > 0:
            ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_alignment.json'))
        if len(composite_h5_filename_list) > 0:
            ionstats.reduce_stats_h5(composite_h5_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_error_summary.h5'))

    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()
示例#15
0
def create_ionstats(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        basecaller_meta_information,
        basecaller_datasets,
        graph_max_x,
        activate_barcode_filter):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []


    ionstats_alignment_file_list = []
    ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'))

            ionstats.generate_ionstats_alignment(
                [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')],
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

            ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))
            ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)

            ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))


    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)
        except:
            pass


    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
    if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5'))

    '''