Пример #1
0
def merge_barcoded_alignment_bams(ALIGNMENT_RESULTS, basecaller_datasets,
                                  method):

    try:
        composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, 'rawlib.bam')

        bam_file_list = []
        for dataset in basecaller_datasets["datasets"]:
            bam_name = os.path.join(
                ALIGNMENT_RESULTS,
                os.path.basename(dataset['file_prefix']) + '.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)
            else:
                printtime("WARNING: exclude %s from merging into %s" %
                          (bam_name, composite_bam_filename))

        composite_bai_filename = composite_bam_filename + '.bai'
        mark_duplicates = False
        blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename,
                                        composite_bai_filename,
                                        mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" %
                  composite_bam_filename)

    printtime("Finished barcode merging of %s" % ALIGNMENT_RESULTS)
Пример #2
0
def merge_basecaller_bam(dirs, BASECALLER_RESULTS):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    # Iterate over datasets. Could be one for non-barcoded runs or multiple for barcoded runs
    
    for dataset in datasets_basecaller['datasets']:
        if 'basecaller_bam' not in dataset:
            continue
        
        ###############################################
        # Merge Per-barcode Unmapped BAMs             #
        ###############################################
        
        try:
            block_bam_list = [os.path.join(dir,BASECALLER_RESULTS, dataset['basecaller_bam']) for dir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bam_filename+'.bai',False)    
        except:
            printtime("ERROR: merging %s unsuccessful" % dataset['basecaller_bam'])

    ## Note! on barcoded runs, barcode files are NOT subsequently merged into one multi-barcode BAM. 

    printtime("Finished merging basecaller BAM files")
Пример #3
0
def merge_barcoded_basecaller_bams(BASECALLER_RESULTS):

    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")

    if not os.path.exists(datasets_basecaller_path):
        printtime("ERROR: %s does not exist" % datasets_basecaller_path)
        raise Exception("ERROR: %s does not exist" % datasets_basecaller_path)
    
    datasets_basecaller = {}
    try:
        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % datasets_basecaller_path)
        raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path)

    try:
        composite_bam_filename = os.path.join(BASECALLER_RESULTS,'rawlib.basecaller.bam')
        if not os.path.exists(composite_bam_filename):

            bam_file_list = []
            for dataset in datasets_basecaller["datasets"]:
                print os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])
                if os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
                    bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',False)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged rawlib.basecaller.bam on barcoded runs failed")

    printtime("Finished basecaller barcode merging")
Пример #4
0
def merge_alignment_bigdata(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                            mark_duplicates):

    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"),
                 'r')
        datasets_json = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" %
                  os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"))
        traceback.print_exc()
        return

    for dataset in datasets_json['datasets']:
        # Merge BAMs
        try:
            block_bam_list = [
                os.path.join(dir, ALIGNMENT_RESULTS,
                             dataset['file_prefix'] + '.bam') for dir in dirs
            ]
            block_bam_list = [
                block_bam_filename for block_bam_filename in block_bam_list
                if os.path.exists(block_bam_filename)
            ]
            composite_bam_filename = os.path.join(
                ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam')
            if block_bam_list:
                blockprocessing.merge_bam_files(
                    block_bam_list, composite_bam_filename,
                    composite_bam_filename + '.bai', mark_duplicates)
        except:
            printtime("ERROR: merging %s unsuccessful" %
                      (dataset['file_prefix'] + '.bam'))
Пример #5
0
def merge_bams(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets, mark_duplicates):

    for dataset in basecaller_datasets['datasets']:

        try:
            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group]['reference']

            filtered = True
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                    filtered = False

            if reference and not filtered:
                bamdir = ALIGNMENT_RESULTS
                bamfile = dataset['file_prefix']+'.bam'
            else:
                bamdir = BASECALLER_RESULTS
                bamfile = dataset['basecaller_bam']
            block_bam_list = [os.path.join(blockdir, bamdir, bamfile) for blockdir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filepath = os.path.join(bamdir, bamfile)
            if block_bam_list:
                if reference and not filtered:
                    composite_bai_filepath = composite_bam_filepath+'.bai'
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates)
                else:
                    composite_bai_filepath=""
                    mark_duplicates=False
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates, method='samtools')

        except:
            print traceback.format_exc()
            printtime("ERROR: merging %s unsuccessful" % bamfile)
Пример #6
0
def merge_barcoded_basecaller_bams(BASECALLER_RESULTS, basecaller_datasets,
                                   method):

    try:
        composite_bam_filename = os.path.join(BASECALLER_RESULTS,
                                              'rawlib.basecaller.bam')
        if not os.path.exists(composite_bam_filename):  # TODO

            bam_file_list = []
            for dataset in basecaller_datasets["datasets"]:
                print os.path.join(BASECALLER_RESULTS,
                                   dataset['basecaller_bam'])
                if os.path.exists(
                        os.path.join(BASECALLER_RESULTS,
                                     dataset['basecaller_bam'])):
                    bam_file_list.append(
                        os.path.join(BASECALLER_RESULTS,
                                     dataset['basecaller_bam']))

            composite_bai_filepath = ""
            mark_duplicates = False
            blockprocessing.merge_bam_files(bam_file_list,
                                            composite_bam_filename,
                                            composite_bai_filepath,
                                            mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" %
                  composite_bam_filename)

    printtime("Finished basecaller barcode merging")
Пример #7
0
def merge_bams(dirs, BASECALLER_RESULTS, basecaller_datasets, method):

    for dataset in basecaller_datasets['datasets']:

        try:
            bamdir = BASECALLER_RESULTS
            bamfile = dataset['basecaller_bam']
            block_bam_list = [
                os.path.join(blockdir, bamdir, bamfile) for blockdir in dirs
            ]
            block_bam_list = [
                block_bam_filename for block_bam_filename in block_bam_list
                if os.path.exists(block_bam_filename)
            ]
            composite_bam_filepath = os.path.join(bamdir, bamfile)
            if block_bam_list:
                composite_bai_filepath = ""
                mark_duplicates = False
                blockprocessing.merge_bam_files(block_bam_list,
                                                composite_bam_filepath,
                                                composite_bai_filepath,
                                                mark_duplicates, method)
        except:
            traceback.print_exc()
            printtime("ERROR: merging %s unsuccessful" % bamfile)

    printtime("Finished merging basecaller BAM files")
Пример #8
0
def merge_bams(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS,
               basecaller_datasets, mark_duplicates):

    for dataset in basecaller_datasets['datasets']:

        try:
            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group][
                'reference']

            filtered = True
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get(
                        'filtered', False):
                    filtered = False

            if reference and not filtered:
                bamdir = ALIGNMENT_RESULTS
                bamfile = dataset['file_prefix'] + '.bam'
            else:
                bamdir = BASECALLER_RESULTS
                bamfile = dataset['basecaller_bam']
            block_bam_list = [
                os.path.join(blockdir, bamdir, bamfile) for blockdir in dirs
            ]
            block_bam_list = [
                block_bam_filename for block_bam_filename in block_bam_list
                if os.path.exists(block_bam_filename)
            ]
            composite_bam_filepath = os.path.join(bamdir, bamfile)
            if block_bam_list:
                if reference and not filtered:
                    composite_bai_filepath = composite_bam_filepath + '.bai'
                    blockprocessing.merge_bam_files(block_bam_list,
                                                    composite_bam_filepath,
                                                    composite_bai_filepath,
                                                    mark_duplicates)
                else:
                    composite_bai_filepath = ""
                    mark_duplicates = False
                    blockprocessing.merge_bam_files(block_bam_list,
                                                    composite_bam_filepath,
                                                    composite_bai_filepath,
                                                    mark_duplicates,
                                                    method='samtools')

        except:
            print traceback.format_exc()
            printtime("ERROR: merging %s unsuccessful" % bamfile)
Пример #9
0
def merge_bams(dirs, BASECALLER_RESULTS, basecaller_datasets, method):

    for dataset in basecaller_datasets['datasets']:

        try:
            bamdir = BASECALLER_RESULTS
            bamfile = dataset['basecaller_bam']
            block_bam_list = [os.path.join(blockdir, bamdir, bamfile) for blockdir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filepath = os.path.join(bamdir, bamfile)
            if block_bam_list:
                composite_bai_filepath=""
                mark_duplicates=False
                blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates, method)
        except:
            traceback.print_exc()
            printtime("ERROR: merging %s unsuccessful" % bamfile)

    printtime("Finished merging basecaller BAM files")
Пример #10
0
def merge_barcoded_basecaller_bams(BASECALLER_RESULTS, basecaller_datasets, method):

    try:
        composite_bam_filename = os.path.join(BASECALLER_RESULTS,'rawlib.basecaller.bam')
        if not os.path.exists(composite_bam_filename): #TODO

            bam_file_list = []
            for dataset in basecaller_datasets["datasets"]:
                print os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])
                if os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
                    bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            composite_bai_filepath=""
            mark_duplicates=False
            blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename, composite_bai_filepath, mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" % composite_bam_filename)

    printtime("Finished basecaller barcode merging")
Пример #11
0
def merge_barcoded_alignment_bams(ALIGNMENT_RESULTS, basecaller_datasets, method):

    try:
        composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')

        bam_file_list = []
        for dataset in basecaller_datasets["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)
            else:
                printtime("WARNING: exclude %s from merging into %s" % (bam_name,composite_bam_filename))

        composite_bai_filename = composite_bam_filename+'.bai'
        mark_duplicates = False
        blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename, composite_bai_filename, mark_duplicates, method)
    except:
        traceback.print_exc()
        printtime("ERROR: Generate merged %s on barcoded run failed" % composite_bam_filename)

    printtime("Finished barcode merging of %s" % ALIGNMENT_RESULTS)
Пример #12
0
def merge_alignment_bigdata(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, mark_duplicates):

    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_json = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    for dataset in datasets_json['datasets']:
        # Merge BAMs
        try:
            block_bam_list = [os.path.join(dir,ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam') for dir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)
        except:
            printtime("ERROR: merging %s unsuccessful" % (dataset['file_prefix']+'.bam'))
Пример #13
0
def merge_alignment_bigdata(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, mark_duplicates):
    
    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_json = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return
    
    for dataset in datasets_json['datasets']:
        # Merge BAMs
        try:
            block_bam_list = [os.path.join(dir,ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam') for dir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)
        except:
            printtime("ERROR: merging %s unsuccessful" % (dataset['file_prefix']+'.bam'))

        printtime("Creating legacy name links")
        if dataset.has_key('legacy_prefix'):
            link_src = [
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam.bai')]
            link_dst = [
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam.bai')]
            for (src,dst) in zip(link_src,link_dst):
                try:
                    os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
                except:
                    printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))
Пример #14
0
def alignment_post_processing(BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows,
                              mark_duplicates, force_alignstats):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"),
                 'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" %
                  os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,
                           dataset['file_prefix'] + '.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][
                    read_group]:
                X_name = datasets_basecaller['read_groups'][read_group][
                    'barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS,
                               'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, 'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(
                ALIGNMENT_RESULTS,
                os.path.basename(dataset['file_prefix']) + '.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename,
                                        composite_bam_filename + '.bai',
                                        mark_duplicates)
        force_alignstats = True

    if force_alignstats:
        ## Generate data for error plot for barcoded run from composite bam
        printtime("Call alignStats to generate raw accuracy")
        try:
            cmd = "alignStats"
            cmd += " -n 12"
            cmd += " --alignSummaryFile alignStats_err.txt"
            cmd += " --alignSummaryJsonFile alignStats_err.json"
            cmd += " --alignSummaryMinLen  1"
            #cmd += " --alignSummaryMaxLen  %s" % str(int(graph_max_x))
            cmd += " --alignSummaryMaxLen  %s" % str(int(400))
            cmd += " --alignSummaryLenStep 1"
            cmd += " --alignSummaryMaxErr  10"
            cmd += " --infile %s" % composite_bam_filename
            cmd = cmd + " --outputDir %s" % ALIGNMENT_RESULTS
            printtime("DEBUG: Calling '%s'" % cmd)
            os.system(cmd)
        except:
            printtime("alignStats failed")

    mergeAlignStatsResults(input_prefix_list, ALIGNMENT_RESULTS + "/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS, 'base_error_plot.png'),
            int(graph_max_x))
        ionstats_plots.alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'),
            os.path.join(ALIGNMENT_RESULTS, 'alignment_rate_plot.png'),
            int(graph_max_x))

        # Create aligned histogram plot

        # Create AQ20 plot

        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment("./", barcodelist_path)

    # These graphs are likely obsolete
    makeAlignGraphs()
Пример #15
0
    parser.add_argument('-i', '--add-file', dest='files', action='append', default=[], help="list of files to process")
    parser.add_argument('-m', '--merge-bams', dest='merge_out', action='store', default = "", help='merge bam files')
    parser.add_argument('-d', '--mark-duplicates', dest='duplicates', action='store_true', default = False, help='mark duplicates')
    parser.add_argument('-a', '--align-stats', dest='align_stats', action='store', default = "", help='generate alignment stats')
    parser.add_argument('-g', '--genomeinfo', dest='genomeinfo', action='store', default = "", help='genome info file for alignment stats')
    parser.add_argument('-p', '--merge-plots', dest='merge_plots', action='store_true', default = "", help='generate report plots')
    parser.add_argument('-z', '--zip', dest='zip', action='store', default = "", help='zip input files')

    args = parser.parse_args()
    
    
    if args.merge_out and len(args.files) > 1:   
       # Merge BAM files 
       outputBAM = args.merge_out
       print "Merging bam files to %s, mark duplicates is %s" % (outputBAM, args.duplicates)
       merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam','.bam.bai'), args.duplicates)


    if args.align_stats:
       # Call alignStats on merged bam file       
       inputBAM = args.align_stats    
       print "Running alignStats on %s" % inputBAM
       
       cmd = "alignStats"
       
       if '_rawlib.bam' in inputBAM:
          bcid = inputBAM.split('_rawlib.bam')[0]
          cmd += " -o %s" % bcid
          # make alignment_BC.summary links to BC.alignment.summary output of alignStats
          os.symlink('%s.alignment.summary' % bcid, 'alignment_%s.summary' % bcid)  
       
Пример #16
0
            cmd = "calibrate --hpmodelMerge"
            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
        except:
            traceback.print_exc()
            printtime("ERROR: Merge Basecaller Recalibration Results failed")



        try:
            printtime("INFO: merging rawtf.basecaller.bam")
            block_bam_list = [os.path.join(adir, env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam') for adir in dirs]
            block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
            composite_bam_filename = os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')
            if block_bam_list:
                blockprocessing.merge_bam_files(block_bam_list,composite_bam_filename,composite_bai_filepath="",mark_duplicates=False,method='picard')
        except:
            print traceback.format_exc()
            printtime("ERROR: merging rawtf.basecaller.bam unsuccessful")

        if do_unfiltered_processing:

            basecaller_datasets = blockprocessing.get_datasets_basecaller(env['BASECALLER_RESULTS'])

            try:
                os.mkdir(os.path.join(env['BASECALLER_RESULTS'],'unfiltered.untrimmed'))

                basecaller.merge_datasets_basecaller_json(
                    dirs,
                    os.path.join(env['BASECALLER_RESULTS'],"unfiltered.untrimmed"))
Пример #17
0
            printtime("INFO: merging rawtf.basecaller.bam")
            block_bam_list = [
                os.path.join(adir, env["BASECALLER_RESULTS"],
                             "rawtf.basecaller.bam") for adir in dirs
            ]
            block_bam_list = [
                block_bam_filename for block_bam_filename in block_bam_list
                if os.path.exists(block_bam_filename)
            ]
            composite_bam_filename = os.path.join(env["BASECALLER_RESULTS"],
                                                  "rawtf.basecaller.bam")
            if block_bam_list:
                blockprocessing.merge_bam_files(
                    block_bam_list,
                    composite_bam_filename,
                    composite_bai_filepath="",
                    mark_duplicates=False,
                    method="picard",
                )
        except Exception:
            print(traceback.format_exc())
            printtime("ERROR: merging rawtf.basecaller.bam unsuccessful")

        if do_unfiltered_processing:

            basecaller_datasets = blockprocessing.get_datasets_basecaller(
                env["BASECALLER_RESULTS"])

            try:
                os.mkdir(
                    os.path.join(env["BASECALLER_RESULTS"],
Пример #18
0
def process_datasets(
        blocks,
        alignmentArgs,
        ionstatsArgs,
        BASECALLER_RESULTS,
        basecaller_meta_information,
        library_key,
        graph_max_x,
        basecaller_datasets,
        ALIGNMENT_RESULTS,
        do_realign,
        do_ionstats,
        do_mark_duplicates,
        do_indexing,
        barcodeInfo):

    printtime("Attempt to align")

    do_sorting = True

    # compare with pipeline/python/ion/utils/ionstats.py
    ionstats_basecaller_file_list = []
    ionstats_alignment_file_list = []
    ionstats_basecaller_filtered_file_list = []
    ionstats_alignment_filtered_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        #print "DEBUG: reference: %s' % reference

        filtered = True
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                filtered = False

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        if reference:

            # merge unmapped bam files TODO move into align
            try:
                bamdir = BASECALLER_RESULTS
                bamfile = dataset['basecaller_bam']
                block_bam_list = [os.path.join(blockdir, bamdir, bamfile) for blockdir in blocks]
                block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
                composite_bam_filepath = os.path.join(bamdir, bamfile)
                if block_bam_list:
                    composite_bai_filepath=""
                    mark_duplicates=False
                    method='samtools'
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates, method)
            except:
                traceback.print_exc()
                printtime("ERROR: merging %s unsuccessful" % bamfile)


            try:
                align(
                    blocks,
                    alignmentArgs,
                    ionstatsArgs,
                    reference,
                    basecaller_meta_information,
                    library_key,
                    graph_max_x,
                    os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']),
                    do_realign,
                    do_ionstats,
                    do_sorting,
                    do_mark_duplicates,
                    do_indexing,
                    logfile=os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.alignmentQC_out.txt'),
                    output_dir=ALIGNMENT_RESULTS,
                    output_basename=dataset['file_prefix'])
            except:
                traceback.print_exc()

            if filtered:
                ionstats_alignment_filtered_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))
            else:
                ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))

            '''
            if do_indexing:
                try:
                    composite_bam_filepath = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')
                    composite_bai_filepath = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam.bai')
                    blockprocessing.create_index_file(composite_bam_filepath, composite_bai_filepath)
                except:
                    traceback.print_exc()
            '''

        else:

            # merge unmapped bam file without reference
            try:
                bamdir = BASECALLER_RESULTS
                bamfile = dataset['basecaller_bam']
                block_bam_list = [os.path.join(blockdir, bamdir, bamfile) for blockdir in blocks]
                block_bam_list = [block_bam_filename for block_bam_filename in block_bam_list if os.path.exists(block_bam_filename)]
                composite_bam_filepath = os.path.join(bamdir, bamfile)
                if block_bam_list:
                    composite_bai_filepath=""
                    mark_duplicates=False
                    method='samtools'
                    blockprocessing.merge_bam_files(block_bam_list, composite_bam_filepath, composite_bai_filepath, mark_duplicates, method)
            except:
                traceback.print_exc()
                printtime("ERROR: merging %s unsuccessful" % bamfile)


            if do_ionstats:
                # TODO: move ionstats basecaller into basecaller
                ionstats.generate_ionstats_basecaller(
                    [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                    os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                    library_key,
                    graph_max_x)

                if filtered:
                    ionstats_basecaller_filtered_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))
                else:
                    ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))

    if do_ionstats:

        # Merge ionstats files from individual (barcoded) datasets
        if len(ionstats_alignment_file_list) > 0:
            ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
        else: # barcode classification filtered all barcodes or no reads available
            # TODO: ionstats needs to produce initial json file
            try:
                #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
                cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

                printtime("DEBUG: Calling '%s':" % cmd)
                ret = subprocess.call(cmd,shell=True)
                if ret != 0:
                    printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                    raise RuntimeError('exit code: %d' % ret)

                ionstats.generate_ionstats_alignment(
                    ionstatsArgs,
                    ['empty_dummy.bam'],
                    os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                    os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'),
                    basecaller_meta_information,
                    library_key,
                    graph_max_x)

            except:
                raise

        if len(ionstats_basecaller_file_list) > 0:
            ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json'))
        else: # barcode classification filtered all barcodes or no reads available
            # TODO: ionstats needs to produce initial json file
            try:
                #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
                cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

                printtime("DEBUG: Calling '%s':" % cmd)
                ret = subprocess.call(cmd,shell=True)
                if ret != 0:
                    printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                    raise RuntimeError('exit code: %d' % ret)

                ionstats.generate_ionstats_basecaller(
                    ['empty_dummy.bam'],
                    os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'),
                    library_key,
                    graph_max_x)
            except:
                raise


        ionstatslist = []
        a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')
        b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')
        if os.path.exists(a):
            ionstatslist.append(a)
        if os.path.exists(b):
            ionstatslist.append(b)
        if len(ionstatslist) > 0:
            ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json'))
            ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
#        if len(ionstats_alignment_h5_file_list) > 0:
#            ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5'))


    printtime("**** Alignment completed ****")
Пример #19
0
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows):

    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")

    if not os.path.exists(datasets_basecaller_path):
        printtime("ERROR: %s does not exist" % datasets_basecaller_path)
        open('badblock.txt', 'w').close()
        return
    
    datasets_basecaller = {}
    try:
        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % datasets_basecaller_path)
        traceback.print_exc()
        open('badblock.txt', 'w').close()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    quality_file_list = []
    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue
                
        # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM
        ionstats.generate_ionstats_basecaller(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                graph_max_x)
        
        # Plot read length sparkline
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)
        
        quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))
        
    # Merge ionstats_basecaller files from individual barcodes/dataset
    ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))

    # Generate legacy stats file: quality.summary
    ionstats.generate_legacy_basecaller_files(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,''))

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))


    # Generate merged rawlib.basecaller.bam on barcoded runs, TODO, can this be removed?

    composite_bam_filename = os.path.join(BASECALLER_RESULTS,'rawlib.basecaller.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            if os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
                bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',False)

    printtime("Finished basecaller post processing")
Пример #20
0
def alignment_post_processing(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates,
        force_alignstats):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][read_group]:
                X_name = datasets_basecaller['read_groups'][read_group]['barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS, 'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

        printtime("Creating legacy name links")
        if dataset.has_key('legacy_prefix'):
            link_src = [
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam.bai')]
            link_dst = [
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam.bai')]
            for (src,dst) in zip(link_src,link_dst):
                try:
                    os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
                except:
                    printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

    # Special legacy post-processing.
    # Generate merged rawlib.basecaller.bam and rawlib.sff on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)
        force_alignstats = True

    if force_alignstats:        
        ## Generate data for error plot for barcoded run from composite bam
        printtime("Call alignStats to generate raw accuracy")
        try:
            cmd = "alignStats"
            cmd += " -n 12"
            cmd += " --alignSummaryFile alignStats_err.txt"
            cmd += " --alignSummaryJsonFile alignStats_err.json"
            cmd += " --alignSummaryMinLen  1"
            #cmd += " --alignSummaryMaxLen  %s" % str(int(graph_max_x))
            cmd += " --alignSummaryMaxLen  %s" % str(int(400))
            cmd += " --alignSummaryLenStep 1"
            cmd += " --alignSummaryMaxErr  10"
            cmd += " --infile %s" % composite_bam_filename
            cmd = cmd + " --outputDir %s" % ALIGNMENT_RESULTS
            printtime("DEBUG: Calling '%s'" % cmd)
            os.system(cmd)
        except:
            printtime("alignStats failed")


    mergeAlignStatsResults(input_prefix_list,ALIGNMENT_RESULTS+"/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS,'base_error_plot.png'),int(graph_max_x))
        base_error_plot.generate_alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS,'readLen.txt'),
            os.path.join(ALIGNMENT_RESULTS,'alignment_rate_plot.png'),int(graph_max_x))

        # Create aligned histogram plot
        
        # Create AQ20 plot
        
        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    makeAlignGraphs()
Пример #21
0
def alignment_post_processing(
        libraryName,
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800



    alignment_file_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        ionstats.generate_ionstats_alignment(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                graph_max_x)
        ionstats2alignstats(libraryName,
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary'))

        alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))

    # In Progress: merge ionstats alignment results
    ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))    
    ionstats2alignstats(libraryName,
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            os.path.join(ALIGNMENT_RESULTS,'alignment.summary'))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)

    # Generate alignment_barcode_summary.csv
    #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    #makeAlignGraphs()

    # In Progress: Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
Пример #22
0
                        help='generate report plots')
    parser.add_argument('-z',
                        '--zip',
                        dest='zip',
                        action='store',
                        default="",
                        help='zip input files')

    args = parser.parse_args()

    if args.merge_out and len(args.files) > 1:
        # Merge BAM files
        outputBAM = args.merge_out
        print "Merging bam files to %s, mark duplicates is %s" % (
            outputBAM, args.duplicates)
        merge_bam_files(args.files, outputBAM,
                        outputBAM.replace('.bam', '.bam.bai'), args.duplicates)

    if args.align_stats:
        # Call alignStats on merged bam file
        inputBAM = args.align_stats
        print "Running alignStats on %s" % inputBAM

        cmd = "alignStats"

        if '_rawlib.bam' in inputBAM:
            bcid = inputBAM.split('_rawlib.bam')[0]
            cmd += " -o %s" % bcid
            # make alignment_BC.summary links to BC.alignment.summary output of alignStats
            os.symlink('%s.alignment.summary' % bcid,
                       'alignment_%s.summary' % bcid)