def __init__(self, dirpath, az_prjname_by_subprj, samplesheet=None): info('Parsing the NextSeq500 project structure') self.kind = 'nextseq500' DatasetStructure.__init__(self, dirpath, az_prjname_by_subprj, samplesheet=samplesheet) info('az_prjname_by_subprj: ' + str(az_prjname_by_subprj)) verify_dir(self.unaligned_dirpath, is_critical=True) for pname, project in self.project_by_name.items(): az_proj_name = az_prjname_by_subprj.get(pname) if not isinstance( az_prjname_by_subprj, basestring) else az_prjname_by_subprj if az_proj_name is None: if len(self.project_by_name) > 1: warn( 'Warn: cannot correspond subproject ' + pname + ' and project names and JIRA cases. ' 'Please, follow the SOP for multiple-project run: http://wiki.rd.astrazeneca.net/display/NG/SOP+-+Pre+Processing+QC+Reporting' ) continue az_proj_name = az_prjname_by_subprj.values()[0] project.set_dirpath(self.unaligned_dirpath, az_proj_name) for sample in project.sample_by_name.values(): sample.source_fastq_dirpath = project.dirpath sample.set_up_out_dirs(project.fastq_dirpath, project.fastqc_dirpath, project.downsample_targqc_dirpath) self.basecall_stat_html_reports = self.__get_basecall_stats_reports() self.get_fastq_regexp_fn = get_nextseq500_regexp
def set_dirpath(self, dirpath, az_project_name): self.dirpath = dirpath self.az_project_name = az_project_name verify_dir(self.dirpath, is_critical=True) merged_dirpath = join(self.dirpath, 'merged') if verify_dir(merged_dirpath, silent=True): self.mergred_dir_found = True self.fastq_dirpath = self.fastqc_dirpath = merged_dirpath else: self.mergred_dir_found = False self.fastq_dirpath = join(self.dirpath, 'fastq') self.fastqc_dirpath = join(self.fastq_dirpath, 'FastQC') info() self.comb_fastqc_fpath = join(self.fastqc_dirpath, 'FastQC.html') self.downsample_targqc_report_fpath = None self.project_report_html_fpath = None self.downsample_metamapping_dirpath = join(self.dirpath, 'Downsample_MetaMapping') self.downsample_targqc_dirpath = join(self.dirpath, 'Downsample_TargQC') self.downsample_targqc_report_fpath = join( self.downsample_targqc_dirpath, 'targQC.html') self.project_report_html_fpath = join(self.dirpath, az_project_name + '.html')
def proc_args(argv): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input. ' \ 'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir' parser = OptionParser(description=description, usage=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c')) parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis') parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :') parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.') parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: parser.print_usage() sys.exit(1) if len(args) == 1 and not args[0].endswith('.bam'): sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv')) bam_by_sample = OrderedDict() for s, b in zip(sample_names, bam_fpaths): bam_by_sample[s] = b else: bam_by_sample = find_bams(args) run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) check_genome_resources(cnf) cnf.output_dir = adjust_path(cnf.output_dir) verify_dir(dirname(cnf.output_dir), is_critical=True) safe_mkdir(cnf.output_dir) if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'Seq2C' set_up_dirs(cnf) samples = [ source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath) for s_name, bam_fpath in bam_by_sample.items()] info('Samples: ') for s in samples: info(' ' + s.name) samples.sort(key=lambda _s: _s.key_to_sort()) target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) return cnf, samples, target_bed, cnf.output_dir
def __init__(self, dirpath, az_prjname_by_subprj=None, samplesheet=None): info('Parsing the HiSeq project structure') self.kind = 'hiseq' DatasetStructure.__init__(self, dirpath, az_prjname_by_subprj, samplesheet=samplesheet) verify_dir(self.unaligned_dirpath, is_critical=True) self.basecall_stat_html_reports = self.__get_basecall_stats_reports() for pname, project in self.project_by_name.items(): proj_dirpath = join( self.unaligned_dirpath, 'Project_' + pname.replace( ' ', '-')) #.replace('-', '_').replace('.', '_')) az_proj_name = az_prjname_by_subprj.get(pname) if not isinstance( az_prjname_by_subprj, basestring) else az_prjname_by_subprj if az_proj_name is None: if len(self.project_by_name) > 1: warn( 'Warn: cannot correspond subproject ' + pname + ' and project names and JIRA cases. ' 'Please, follow the SOP for multiple-project run: http://wiki.rd.astrazeneca.net/display/NG/SOP+-+Pre+Processing+QC+Reporting' ) continue az_proj_name = az_prjname_by_subprj.values()[0] project.set_dirpath(proj_dirpath, az_proj_name) for sname, sample in project.sample_by_name.items(): sample.source_fastq_dirpath = join( project.dirpath, 'Sample_' + sname.replace( ' ', '-')) #.replace('-', '_').replace('.', '_')) sample.set_up_out_dirs(project.fastq_dirpath, project.fastqc_dirpath, project.downsample_targqc_dirpath) basecalls_symlink = join(project.dirpath, 'BaseCallsReports') if not exists(basecalls_symlink): info('Creating BaseCalls symlink ' + self.basecalls_dirpath + ' -> ' + basecalls_symlink) try: os.symlink(self.basecalls_dirpath, basecalls_symlink) except OSError: err('Cannot create symlink') traceback.print_exc() else: info('Created') if exists(basecalls_symlink): self.basecalls_dirpath = basecalls_symlink self.get_fastq_regexp_fn = get_hiseq_regexp
def main(): info(' '.join(sys.argv)) info() description = 'This script runs preprocessing.' parser = OptionParser(description=description) parser.add_option('-1', dest='left_reads_fpath', help='Left reads fpath') parser.add_option('-2', dest='right_reads_fpath', help='Right reads fpath') parser.add_option('--sample', dest='sample_name', help='Sample name') parser.add_option('-o', dest='output_dir', help='Output directory path') parser.add_option( '--downsample-to', dest='downsample_to', default=None, type='int', help= 'Downsample reads to avoid excessive processing times with large files. ' 'Default is 1 million. Set to 0 to turn off downsampling.') add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) (opts, args) = parser.parse_args() logger.is_debug = opts.debug cnf = Config(opts.__dict__, determine_sys_cnf(opts), determine_run_cnf(opts)) left_reads_fpath = verify_file(opts.left_reads_fpath, is_critical=True) right_reads_fpath = verify_file(opts.right_reads_fpath, is_critical=True) output_dirpath = adjust_path( opts.output_dir) if opts.output_dir else critical( 'Please, specify output directory with -o') verify_dir(dirname(output_dirpath), description='output_dir', is_critical=True) with workdir(cnf): sample_name = cnf.sample_name if not sample_name: sample_name = _get_sample_name(left_reads_fpath, right_reads_fpath) results_dirpath = run_fastq(cnf, sample_name, left_reads_fpath, right_reads_fpath, output_dirpath, downsample_to=cnf.downsample_to) verify_dir(results_dirpath, is_critical=True) info() info('*' * 70) info('Fastqc results:') info(' ' + results_dirpath)
def main(): info(' '.join(sys.argv)) info() description = 'This script runs preprocessing.' parser = OptionParser(description=description) parser.add_option('-1', dest='left_reads_fpath', help='Left reads fpath') parser.add_option('-2', dest='right_reads_fpath', help='Right reads fpath') parser.add_option('--sample', dest='sample_name', help='Sample name') parser.add_option('-o', dest='output_dir', help='Output directory path') parser.add_option('--downsample-to', dest='downsample_to', default=None, type='int', help='Downsample reads to avoid excessive processing times with large files. ' 'Default is 1 million. Set to 0 to turn off downsampling.') add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) (opts, args) = parser.parse_args() if not opts.left_reads_fpath or not opts.right_reads_fpath or not opts.output_dir: parser.print_usage() verify_file(opts.left_reads_fpath, is_critical=False) left_reads_fpath = adjust_path(opts.left_reads_fpath) verify_file(opts.right_reads_fpath, is_critical=False) right_reads_fpath = adjust_path(opts.right_reads_fpath) output_dirpath = adjust_path(opts.output_dir) if opts.output_dir else critical('Please, specify output directory with -o') verify_dir(dirname(output_dirpath), description='output_dir', is_critical=True) left_reads_fpath, right_reads_fpath, output_dirpath =\ map(_proc_path, [left_reads_fpath, right_reads_fpath, output_dirpath]) ssh = connect_to_server(server_url='blue.usbod.astrazeneca.net', username='******', password='******') fastqc_py = get_script_cmdline(None, 'python', 'scripts/pre/fastqc.py') fastqc_py = fastqc_py.replace(REPORTING_SUITE_PATH_CLARITY, REPORTING_SUITE_PATH_WALTHAM) fastqc_py = fastqc_py.replace(PYTHON_PATH_CLARITY, PYTHON_PATH_WALTHAM) cmdl = '{fastqc_py} -1 {left_reads_fpath} -2 {right_reads_fpath} -o {output_dirpath}' if opts.sample_name: cmdl += ' --sample {opts.sample_name}' if opts.downsample_to: cmdl += ' --downsample-to ' + str(int(opts.downsample_to)) cmdl = cmdl.format(**locals()) cmdl += ' 2>&1' info(cmdl) stdin, stdout, stderr = ssh.exec_command(cmdl) for l in stdout: err(l, ending='') info() ssh.close()
def check_dirs_and_files(cnf, file_keys=list(), dir_keys=list()): errors = [] def _verify_input_file(_key): cnf[_key] = adjust_path(cnf[_key]) if not verify_file(cnf[_key], _key): return False if 'bam' in _key and not verify_bam(cnf[_key]): return False if 'bed' in _key and not verify_bed(cnf[_key]): return False return True for key in file_keys: if key and key in cnf and cnf[key]: if not _verify_input_file(key): errors.append('File ' + cnf[key] + ' is empty or cannot be found') else: cnf[key] = adjust_path(cnf[key]) for key in dir_keys: if key and key in cnf and cnf[key]: cnf[key] = adjust_path(cnf[key]) if not verify_dir(cnf[key], key): errors.append('Directory ' + cnf[key] + ' is empty or cannot be found') else: cnf[key] = adjust_path(cnf[key]) return errors
def __find_unaligned_dir(self): unaligned_dirpath = join(self.dirpath, 'Unalign') if verify_dir(unaligned_dirpath, description='"Unalign" directory', silent=True): unaligned_dirpath = unaligned_dirpath else: unaligned_dirpath = None warn('No unalign directory') return unaligned_dirpath
def __init__(self, dirpath, az_prjname_by_subprj, samplesheet=None): self.az_prjname_by_subprj = az_prjname_by_subprj illumina_project_name = None if '/Unalign/' in dirpath: self.dirpath = dirpath.split('/Unalign/')[0] self.unaligned_dirpath = self.__find_unaligned_dir() verify_dir(self.unaligned_dirpath, description='Unalign dir', is_critical=True) illumina_project_name = dirpath.split( '/Unalign/' )[1] # something like AURA.FFPE.AZ300, in contast with project_name which is something like Bio_123_AURA_FFPE_AZ300 info('Processing sub-project ' + illumina_project_name) else: self.dirpath = dirpath self.unaligned_dirpath = self.__find_unaligned_dir() self.basecalls_dirpath = join(self.dirpath, 'Data/Intensities/BaseCalls') verify_dir(self.basecalls_dirpath, is_critical=True) self.bcl2fastq_dirpath = None self.source_fastq_dirpath = None if samplesheet: self.samplesheet_fpath = samplesheet else: self.samplesheet_fpath = self.__find_sample_sheet() self.project_by_name = self._parse_sample_sheet(self.samplesheet_fpath) if illumina_project_name: # we want only a specific project if illumina_project_name not in self.project_by_name: info() critical('Err: project ' + illumina_project_name + ' not in the SampleSheet ' + self.samplesheet_fpath) else: self.project_by_name = { illumina_project_name: self.project_by_name[illumina_project_name] }
def __init__(self, dirpath, az_prjname_by_subprj, samplesheet=None): info('Parsing the MiSeq project structure') self.kind = 'miseq' DatasetStructure.__init__(self, dirpath, az_prjname_by_subprj, samplesheet=samplesheet) base_dirpath = self.unaligned_dirpath if not verify_dir(base_dirpath, silent=True): base_dirpath = self.basecalls_dirpath verify_dir(base_dirpath, description='Source fastq dir') for pname, project in self.project_by_name.items(): proj_dirpath = join(base_dirpath, pname) if not verify_dir(proj_dirpath, silent=True): proj_dirpath = base_dirpath az_proj_name = az_prjname_by_subprj.get(pname) if not isinstance( az_prjname_by_subprj, basestring) else az_prjname_by_subprj if az_proj_name is None: if len(self.project_by_name) > 1: warn( 'Warn: cannot correspond subproject ' + pname + ' and project names and JIRA cases. ' 'Please, follow the SOP for multiple-project run: http://wiki.rd.astrazeneca.net/display/NG/SOP+-+Pre+Processing+QC+Reporting' ) continue az_proj_name = az_prjname_by_subprj.values()[0] project.set_dirpath(proj_dirpath, az_proj_name) for sample in project.sample_by_name.values(): sample.source_fastq_dirpath = project.dirpath sample.set_up_out_dirs(project.fastq_dirpath, project.fastqc_dirpath, project.downsample_targqc_dirpath) self.basecall_stat_html_reports = [] self.get_fastq_regexp_fn = get_hiseq4000_miseq_regexp
def proc_opts(): parser = OptionParser(description='') (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) < 1: critical('First argument should be a root datasets dir') # if len(args) < 2: # info('No dataset path specified, assuming it is the current working directory') # dataset_dirpath = adjust_path(os.getcwd()) # jira_url = args[0] root_dirpath = verify_dir(args[0], is_critical=True, description='Dataset directory') # /ngs/oncology/datasets/hiseq/150521_D00443_0159_AHK2KTADXX info(' '.join(sys.argv)) return root_dirpath
def create_jbrowse_symlink(genome, project_name, sample, file_fpath): jbrowse_data_path, _, _ = set_folders(genome) jbrowse_dirpath = join(jbrowse_data_path, 'tracks') jbrowse_project_dirpath = join(jbrowse_dirpath, project_name) base, ext = splitext_plus(file_fpath) if ext in ['.tbi', '.bai']: base, ext2 = splitext_plus(base) ext = ext2 + ext sym_link = join(jbrowse_project_dirpath, sample + ext) if not verify_dir(jbrowse_project_dirpath): safe_mkdir(jbrowse_project_dirpath) if isfile(file_fpath) and not isfile(sym_link): try: os.symlink(file_fpath, sym_link) except OSError: warn(traceback.format_exc()) if isfile(sym_link): change_permissions(sym_link) return sym_link
def _run_multisample_qualimap(cnf, output_dir, samples, targqc_full_report): """ 1. Generates Qualimap2 plots and put into plots_dirpath 2. Adds records to targqc_full_report.plots """ plots_dirpath = join(output_dir, 'plots') if cnf.reuse_intermediate and verify_dir(plots_dirpath) and [ f for f in listdir(plots_dirpath) if not f.startswith('.') ]: info('Qualimap miltisample plots exist - ' + plots_dirpath + ', reusing...') else: # Qualimap2 run for multi-sample plots if len( [s.qualimap_html_fpath for s in samples if s.qualimap_html_fpath]) > 0: qualimap = get_system_path(cnf, interpreter_or_name=None, name='qualimap') if qualimap is not None and get_qualimap_type(qualimap) == 'full': qualimap_output_dir = join(cnf.work_dir, 'qualimap_multi_bamqc') _correct_qualimap_genome_results(cnf, samples) _correct_qualimap_insert_size_histogram(cnf, samples) safe_mkdir(qualimap_output_dir) rows = [] for sample in samples: if sample.qualimap_html_fpath: rows += [[sample.name, sample.qualimap_html_fpath]] data_fpath = write_tsv_rows( rows, join(qualimap_output_dir, 'qualimap_results_by_sample.tsv')) qualimap_plots_dirpath = join(qualimap_output_dir, 'images_multisampleBamQcReport') cmdline = '{qualimap} multi-bamqc --data {data_fpath} -outdir {qualimap_output_dir}'.format( **locals()) res = call(cnf, cmdline, exit_on_error=False, return_err_code=True, env_vars=dict(DISPLAY=None), output_fpath=qualimap_plots_dirpath, output_is_dir=True) if res is None or not verify_dir(qualimap_plots_dirpath): warn( 'Warning: Qualimap for multi-sample analysis failed to finish. TargQC will not contain plots.' ) return None else: if exists(plots_dirpath): shutil.rmtree(plots_dirpath) shutil.move(qualimap_plots_dirpath, plots_dirpath) else: warn( 'Warning: Qualimap for multi-sample analysis was not found. TargQC will not contain plots.' ) return None targqc_full_report.plots = [] for plot_fpath in listdir(plots_dirpath): plot_fpath = join(plots_dirpath, plot_fpath) if verify_file(plot_fpath) and plot_fpath.endswith('.png'): targqc_full_report.plots.append(relpath(plot_fpath, output_dir))
def main(args): if len(args) < 2: sys.exit('Usage ' + __file__ + ' input.tsv bcbio.csv [dir_with_bams] [bina_dir]') inp_fpath = args[0] verify_file(args[0], is_critical=True) out_fpath = args[1] verify_dir(dirname(adjust_path(out_fpath)), is_critical=True) bam_dirpath = None if len(args) > 2: bam_dirpath = args[2] verify_dir(adjust_path(bam_dirpath), is_critical=True) # bam_opt = args[2] # try: # bam_col = int(bam_opt) # bam_dirpath = None # except ValueError: # bam_col = None # verify_dir(bam_opt, is_critical=True) # bam_dirpath = args[2] bina_dirpath = None if len(args) > 3: bina_dirpath = args[3] verify_dir(dirname(adjust_path(bina_dirpath)), is_critical=True) # filtered_bams_dirpath = adjust_path(sys.argv[3]) # verify_dir(join(filtered_bams_dirpath, os.pardir), is_critical=True) columns_names = 'study barcode disease disease_name sample_type sample_type_name analyte_type library_type center center_name platform platform_name assembly filename files_size checksum analysis_id aliquot_id participant_id sample_id tss_id sample_accession published uploaded modified state reason' samples_by_patient = defaultdict(list) delim = '\t' barcode_col = 1 bam_col = 13 is_tcga_tsv = True with open(inp_fpath) as fh: for i, l in enumerate(fh): if not l.strip(): continue if i == 0: if len(l.split('\t')) == 27: err('Interpreting as TCGA tsv') if l.split('\t')[0] != 'TCGA': continue # skipping header else: delim = None for j, f in enumerate(l.split()): if f.startswith('TCGA'): barcode_col = j err('barcode col is ' + str(j)) if f.endswith('bam'): bam_col = j err('bam col is ' + str(j)) is_tcga_tsv = False fs = l.split(delim) barcode = fs[barcode_col].split( '-') # TCGA-05-4244-01A-01D-1105-08 sample = Sample() sample.bam = fs[bam_col] sample.bam_base_name = basename(os.path.splitext(fs[bam_col])[0]) sample.description = fs[barcode_col] sample.patient = '-'.join(barcode[:3]) if is_tcga_tsv: sample.reason = fs[26] sample_type = int(barcode[3][:2]) if sample_type >= 20 or sample_type <= 0: continue sample.is_normal = 10 <= sample_type < 20 sample.is_blood = sample_type in [ 3, 4, 9, 10 ] # https://tcga-data.nci.nih.gov/datareports/codeTablesReport.htm if any(s.description == sample.description for s in samples_by_patient[sample.patient]): prev_sample = next(s for s in samples_by_patient[sample.patient] if s.description == sample.description) # comp reason # if 'Fileset modified' not in prev_sample.reason and 'Fileset modified' in sample.reason: # err('Duplicated sample: ' + sample.description + ' Fileset modified not in old ' + prev_sample.name + ' over ' + sample.name) # pass # elif 'Fileset modified' in prev_sample.reason and 'Fileset modified' not in sample.reason: # samples_by_patient[sample.patient].remove(prev_sample) # samples_by_patient[sample.patient].append(sample) # err('Duplicated sample: ' + sample.description + ' Fileset modified not in new ' + sample.name + ' over ' + prev_sample.name) # else: # comp version prev_version = get_bam_version(prev_sample.bam_base_name) version = get_bam_version(sample.bam_base_name) err('Duplicated sample: ' + sample.description + ' Resolving by version (' + ' over '.join( map(str, sorted([prev_version, version])[::-1])) + ')') if version > prev_version: samples_by_patient[sample.patient].remove(prev_sample) samples_by_patient[sample.patient].append(sample) else: samples_by_patient[sample.patient].append(sample) batches = [] final_samples = set() if bina_dirpath: safe_mkdir(bina_dirpath) for patient, patient_samples in samples_by_patient.iteritems(): tumours = [s for s in patient_samples if not s.is_normal] normals = [s for s in patient_samples if s.is_normal] main_normal = None if len(normals) >= 1: if any(n.is_blood for n in normals): main_normal = next(n for n in normals if n.is_blood) else: main_normal = normals[0] if tumours: for n in normals[1:]: b = Batch(n.description + '-batch') b.tumour = n batches.append(b) for t in tumours: b = Batch(t.description + '-batch') b.tumour = t t.batches.add(b) final_samples.add(t) if main_normal: b.normal = main_normal main_normal.batches.add(b) final_samples.add(main_normal) batches.append(b) ################## ###### Bina ###### if bina_dirpath: bina_patient_dirpath = join(bina_dirpath, patient) safe_mkdir(bina_patient_dirpath) normals_csv_fpath = join(bina_patient_dirpath, 'normals.csv') tumours_csv_fpath = join(bina_patient_dirpath, 'tumors.csv') if main_normal: with open(normals_csv_fpath, 'w') as f: f.write('name,bam\n') bam_fpath = join( bam_dirpath, main_normal.bam) if bam_dirpath else main_normal.bam f.write(main_normal.description + ',' + bam_fpath + '\n') with open(tumours_csv_fpath, 'w') as f: f.write('name,bam\n') for t in tumours: bam_fpath = join(bam_dirpath, t.bam) if bam_dirpath else t.bam f.write(t.description + ',' + bam_fpath + '\n') if bina_dirpath: err('Saved bina CSVs to ' + bina_dirpath) ########################### ######## Bcbio CSV ######## print 'bcbio_nextgen.py -w template bcbio.yaml', out_fpath, with open(out_fpath, 'w') as out: out.write('sample,description,batch,phenotype\n') for s in sorted(final_samples, key=lambda s: s.bam_base_name): out.write(','.join([ s.bam_base_name, s.description, ';'.join( sorted(b.name for b in s.batches)), ('normal' if s.is_normal else 'tumor') ]) + '\n') bam_fpath = join(bam_dirpath, s.bam) if bam_dirpath else s.bam if verify_bam(bam_fpath, is_critical=False): try: bam = pysam.Samfile(bam_fpath, "rb") except ValueError: err(traceback.format_exc()) err('Cannot read ' + bam_fpath) err() # n_rgs = max(1, len(bam.header.get("RG", []))) else: print bam_fpath,
def __get_basecall_stats_reports(self): dirpath = join(self.unaligned_dirpath, 'Reports', 'html') index_html_fpath = join(dirpath, 'index.html') if verify_dir(dirpath) and verify_file(index_html_fpath): return [index_html_fpath]