示例#1
0
def launch_bedcoverage_hist(work_dir,
                            bed,
                            bam,
                            chr_lengths_fpath,
                            bedcov_output_fpath=None,
                            bedtools='bedtools'):
    if not bedcov_output_fpath:
        bedcov_output_fpath = join(
            work_dir,
            splitext_plus(basename(bed))[0] + '__' +
            splitext_plus(basename(bam))[0] + '_bedcov_output.txt')

    if bam.endswith('bam'):
        bam = bam_to_bed_nocnf(bam, bedtools)
    verify_file(bam,
                is_critical=True,
                description='BAM to BED conversion result')

    v = bedtools_version(bedtools)
    if v and v >= 24:
        cmdline = '{bedtools} coverage -sorted -g {chr_lengths_fpath} -a {bed} -b {bam} -hist'.format(
            **locals())
    else:
        cmdline = '{bedtools} coverage -a {bam} -b {bed} -hist'.format(
            **locals())
    cmdline += ' > ' + bedcov_output_fpath
    info(cmdline)
    os.system(cmdline)
    res = verify_file(bedcov_output_fpath)
    if res:
        info('Done, saved to ' + bedcov_output_fpath)
    else:
        err('Error, result is non-existent or empty')
示例#2
0
    def set_up_out_dirs(self, fastq_dirpath, fastqc_dirpath,
                        downsample_targqc_dirpath):
        self.fastq_dirpath = fastq_dirpath
        self.fastqc_dirpath = fastqc_dirpath
        self.downsample_targqc_dirpath = downsample_targqc_dirpath

        self.l_fpath = join(fastq_dirpath, self.name + '_R1.fastq.gz')
        self.r_fpath = join(fastq_dirpath, self.name + '_R2.fastq.gz')

        self.sample_fastqc_dirpath = join(fastqc_dirpath,
                                          self.name + '.fq_fastqc')
        self.fastqc_html_fpath = join(fastqc_dirpath,
                                      self.name + '.fq_fastqc.html')
        self.l_fastqc_base_name = splitext_plus(basename(self.l_fpath))[0]
        self.r_fastqc_base_name = splitext_plus(basename(self.r_fpath))[0]
        # self.l_fastqc_html_fpath = None  # join(ds.fastqc_dirpath,  + '_fastqc.html')
        # self.r_fastqc_html_fpath = None  # join(ds.fastqc_dirpath, splitext_plus(self.r_fpath)[0] + '_fastqc.html')

        if not isfile(self.fastqc_html_fpath):
            self.fastqc_html_fpath = join(self.sample_fastqc_dirpath,
                                          'fastqc_report.html')

        self.targqc_sample = TargQC_Sample(
            self.name, join(downsample_targqc_dirpath, self.name))
        self.targetcov_html_fpath = self.targqc_sample.targetcov_html_fpath
        self.ngscat_html_fpath = self.targqc_sample.ngscat_html_fpath
        self.qualimap_html_fpath = self.targqc_sample.qualimap_html_fpath
示例#3
0
def intersect_bed(cnf, bed1, bed2):
    bed1_fname, _ = splitext_plus(basename(bed1))
    bed2_fname, _ = splitext_plus(basename(bed2))
    output_fpath = join(cnf['work_dir'],
                        bed1_fname + '__' + bed2_fname + '.bed')
    bedtools = get_system_path(cnf, 'bedtools')
    cmdline = '{bedtools} intersect -u -a {bed1} -b {bed2}'.format(**locals())
    call(cnf, cmdline, output_fpath, verify_output_not_empty=False)
    return output_fpath
示例#4
0
def _intersect_with_tricky_regions(cnf, selected_bed_fpath, sample):
    info()
    info('Detecting problematic regions for ' + sample)

    bed_filenames = [fn + '.bed.gz' for fn in tricky_regions_fnames_d.keys()]

    merged_bed_fpaths = [
        join(cnf.genome.tricky_regions, 'merged', bed_filename)
        for bed_filename in bed_filenames
    ]

    info('Intersecting BED ' + selected_bed_fpath +
         ' using BED files with tricky regions')

    intersection_fpath = join(
        cnf.work_dir,
        splitext_plus(basename(selected_bed_fpath))[0] +
        '_tricky_vcf_bed.intersect')
    if not cnf.reuse_intermediate or not verify_file(
            intersection_fpath, silent=True, is_critical=False):
        bedtools = get_system_path(cnf, 'bedtools')
        cmdline = bedtools + ' intersect -header -a ' + selected_bed_fpath + ' -b ' + ' '.join(
            merged_bed_fpaths) + ' -wo -filenames'
        call(cnf,
             cmdline,
             output_fpath=intersection_fpath,
             exit_on_error=False)

    return intersection_fpath
示例#5
0
def markdup_bam(cnf, in_bam_fpath, bammarkduplicates=None):
    """Perform non-stream based deduplication of BAM input files using biobambam.
    """
    if not bammarkduplicates:
        bammarkduplicates = get_system_path(cnf, 'bammarkduplicates')
        if not bammarkduplicates:
            warn('No biobambam bammarkduplicates, can\'t mark duplicates.')
            return None

    out_bam_fpath = add_suffix(in_bam_fpath, 'markdup')
    tmp_fpath = join(cnf.work_dir,
                     splitext_plus(basename(in_bam_fpath))[0] + '_markdup')
    safe_mkdir(dirname(tmp_fpath))
    cmdline = (
        '{bammarkduplicates} tmpfile={tmp_fpath} I={in_bam_fpath} O={out_bam_fpath}'
    ).format(**locals())
    res = call(cnf,
               cmdline,
               output_fpath=out_bam_fpath,
               stdout_to_outputfile=False,
               exit_on_error=False)
    if res:
        return out_bam_fpath
    else:
        return None
示例#6
0
def _tracks(cnf, track_fpath, input_fpath):
    if not verify_file(track_fpath):
        return None

    field_name = splitext_plus(basename(track_fpath))[0]

    step_greetings('Intersecting with ' + field_name)

    output_fpath = intermediate_fname(cnf, input_fpath, field_name)
    if output_fpath.endswith('.gz'):
        output_fpath = output_fpath[:-3]
    if cnf.reuse_intermediate and verify_vcf(output_fpath):
        info('VCF ' + output_fpath + ' exists, reusing...')
        return output_fpath

    toolpath = get_system_path(cnf, 'vcfannotate')
    if not toolpath:
        err('WARNING: Skipping annotation with tracks: vcfannotate '
            'executable not found, you probably need to specify path in system_config, or '
            'run load bcbio:  . /group/ngs/bin/bcbio-prod.sh"')
        return None

    # self.all_fields.append(field_name)

    cmdline = '{toolpath} -b {track_fpath} -k {field_name} {input_fpath}'.format(
        **locals())

    assert input_fpath
    output_fpath = call_subprocess(cnf,
                                   cmdline,
                                   input_fpath,
                                   output_fpath,
                                   stdout_to_outputfile=True,
                                   overwrite=True)
    if not verify_vcf(output_fpath):
        err('Error: tracks resulted ' + str(output_fpath) + ' for ' +
            track_fpath)
        return output_fpath

    # Set TRUE or FALSE for tracks
    def proc_line(line, i):
        if field_name in line:
            if not line.startswith('#'):
                fields = line.split('\t')
                info_line = fields[7]
                info_pairs = [attr.split('=') for attr in info_line.split(';')]
                info_pairs = [[pair[0], ('TRUE' if pair[1] else 'FALSE')] if
                              pair[0] == field_name and len(pair) > 1 else pair
                              for pair in info_pairs]
                info_line = ';'.join(
                    '='.join(pair) if len(pair) == 2 else pair[0]
                    for pair in info_pairs)
                fields = fields[:7] + [info_line] + fields[8:]
                return '\t'.join(fields)
        return line

    assert output_fpath
    output_fpath = iterate_file(cnf, output_fpath, proc_line, suffix='trk')
    return verify_vcf(output_fpath, is_critical=True)
示例#7
0
def sambamba_depth(cnf,
                   bed,
                   bam,
                   output_fpath=None,
                   use_grid=False,
                   depth_thresholds=None,
                   sample_name=None,
                   only_depth=False,
                   silent=False):
    sample_name = sample_name or splitext_plus(basename(bam))[0]

    if not output_fpath:
        output_fpath = join(
            cnf.work_dir,
            splitext_plus(basename(bed))[0] + '_' + sample_name +
            '_sambamba_depth.txt')

    if cnf.reuse_intermediate and verify_file(output_fpath, silent=True):
        info(output_fpath + ' exists, reusing.')
        if use_grid:
            return None
        else:
            return output_fpath
    sambamba = get_system_path(cnf,
                               join(get_ext_tools_dirname(), 'sambamba'),
                               is_critical=True)
    thresholds_str = ''
    if not only_depth:
        depth_thresholds = depth_thresholds or cnf.coverage_reports.depth_thresholds
        thresholds_str = '-T ' + ' -T'.join([str(d) for d in depth_thresholds])
    cmdline = 'depth region -F "not duplicate and not failed_quality_control" -L {bed} {thresholds_str} {bam}'.format(
        **locals())

    return call_sambamba(cnf,
                         cmdline,
                         output_fpath=output_fpath,
                         bam_fpath=bam,
                         sambamba=sambamba,
                         use_grid=use_grid,
                         command_name='depth_' +
                         splitext_plus(basename(bed))[0],
                         sample_name=sample_name,
                         silent=silent)
示例#8
0
def create_jbrowse_symlink(genome, project_name, sample, file_fpath):
    jbrowse_data_path, _, _ = set_folders(genome)
    jbrowse_dirpath = join(jbrowse_data_path, 'tracks')
    jbrowse_project_dirpath = join(jbrowse_dirpath, project_name)
    base, ext = splitext_plus(file_fpath)
    if ext in ['.tbi', '.bai']:
        base, ext2 = splitext_plus(base)
        ext = ext2 + ext
    sym_link = join(jbrowse_project_dirpath, sample + ext)
    if not verify_dir(jbrowse_project_dirpath):
        safe_mkdir(jbrowse_project_dirpath)
    if isfile(file_fpath) and not isfile(sym_link):
        try:
            os.symlink(file_fpath, sym_link)
        except OSError:
            warn(traceback.format_exc())
    if isfile(sym_link):
        change_permissions(sym_link)
    return sym_link
示例#9
0
def bam_to_bed_nocnf(bam_fpath, bedtools='bedtools', gzip='gzip'):
    info(
        'Converting the BAM to BED to save some memory.'
    )  # from here: http://davetang.org/muse/2015/08/05/creating-a-coverage-plot-using-bedtools-and-r/
    bam_bed_fpath = splitext_plus(bam_fpath)[0] + '.bed.gz'
    cmdline = '{bedtools} bamtobed -i {bam_fpath} | {gzip} > {bam_bed_fpath}'.format(
        **locals())
    info(cmdline)
    os.system(cmdline)
    bam_bed_fpath = verify_file(bam_bed_fpath)
    if bam_bed_fpath:
        info('Done, saved to ' + bam_bed_fpath)
    else:
        err('Error, result is non-existent or empty')
    return bam_bed_fpath
示例#10
0
def bam_to_bed(cnf, bam_fpath, to_gzip=True):
    info(
        'Converting the BAM to BED to save some memory.'
    )  # from here: http://davetang.org/muse/2015/08/05/creating-a-coverage-plot-using-bedtools-and-r/
    bam_bed_fpath = splitext_plus(bam_fpath)[0] + ('.bed.gz'
                                                   if to_gzip else '.bed')
    bedtools = get_system_path(cnf, 'bedtools')
    gzip = get_system_path(cnf, 'gzip')
    cmdline = '{bedtools} bamtobed -i {bam_fpath}'.format(**locals())
    cmdline += ' | {gzip}'.format(**locals()) if to_gzip else ''
    call(cnf,
         cmdline,
         output_fpath=bam_bed_fpath,
         verify_output_not_empty=False)
    return bam_bed_fpath
示例#11
0
def markdup_sam(cnf, in_sam_fpath, samblaster=None):
    """Perform non-stream based deduplication of SAM input files using samblaster.
    """
    if not samblaster:
        samblaster = get_system_path(cnf, 'samblaster')
        if not samblaster:
            warn('No samblaster, can\'t mark duplicates.')
            return None

    out_sam_fpath = add_suffix(in_sam_fpath, 'markdup')
    tmp_fpath = join(cnf.work_dir,
                     splitext_plus(basename(in_sam_fpath))[0] + '_markdup')
    safe_mkdir(dirname(tmp_fpath))
    cmdline = '{samblaster} -i {in_sam_fpath} -o {out_sam_fpath}'.format(
        **locals())
    res = call(cnf,
               cmdline,
               output_fpath=out_sam_fpath,
               stdout_to_outputfile=False,
               exit_on_error=False)
    if res:
        return out_sam_fpath
    else:
        return None
def read_samples_info_and_split(common_cnf, options, inputs):
    #TODO: _set_up_dirs(cnf) for each sample

    info('')
    info('Processing input details...')

    details = None
    for key in inputs:
        if options.get(key):
            common_cnf[key] = adjust_path(options[key])
            info('Using ' + common_cnf[key])
            details = [common_cnf]
    if not details:
        details = common_cnf.get('details')
    if not details:
        critical('Please, provide input ' + ', '.join(inputs) +
                 ' in command line or in run info yaml config.')

    all_samples = OrderedDict()

    for one_item_cnf in details:
        if 'vcf' not in one_item_cnf:
            critical('ERROR: A section in details does not contain field "var".')
        one_item_cnf['vcf'] = adjust_path(one_item_cnf['vcf'])
        verify_file(one_item_cnf['vcf'], 'Input file', is_critical=True)

        join_parent_conf(one_item_cnf, common_cnf)

        work_vcf = join(one_item_cnf['work_dir'], basename(one_item_cnf['vcf']))
        check_file_changed(one_item_cnf, one_item_cnf['vcf'], work_vcf)
        if not one_item_cnf.get('reuse_intermediate'):
            with open_gzipsafe(one_item_cnf['vcf']) as inp, open_gzipsafe(work_vcf, 'w') as out:
                out.write(inp.read())
        one_item_cnf['vcf'] = work_vcf

        vcf_header_samples = read_sample_names_from_vcf(one_item_cnf['vcf'])

        # MULTIPLE SAMPELS
        if ('samples' in one_item_cnf or one_item_cnf.get('split_samples')) and len(vcf_header_samples) == 0:
            sample_cnfs = _verify_sample_info(one_item_cnf, vcf_header_samples)

            for header_sample_name in vcf_header_samples:
                if header_sample_name not in sample_cnfs:
                    sample_cnfs[header_sample_name] = one_item_cnf.copy()

                if header_sample_name in all_samples:
                    critical('ERROR: duplicated sample name: ' + header_sample_name)

                cnf = all_samples[header_sample_name] = sample_cnfs[header_sample_name]
                cnf['name'] = header_sample_name
                if cnf.get('keep_intermediate'):
                    cnf['log'] = join(cnf['work_dir'], cnf['name'] + '.log')

                # cnf['vcf'] = extract_sample(cnf, one_item_cnf['vcf'], cnf['name'])
                info()

        # SINGLE SAMPLE
        else:
            cnf = one_item_cnf

            if 'bam' in cnf:
                cnf['bam'] = adjust_path(cnf['bam'])
                verify_bam(cnf['bam'], is_critical=True)

            cnf['name'] = splitext_plus(basename(cnf['vcf']))[0]

            if cnf.get('keep_intermediate'):
                cnf['log'] = join(cnf['work_dir'], cnf['name'] + '.log')

            cnf['vcf'] = work_vcf
            all_samples[cnf['name']] = cnf

    if not all_samples:
        info('No samples.')
    else:
        info('Using samples: ' + ', '.join(all_samples) + '.')

    return all_samples
示例#13
0
def _extract_fields(cnf, vcf_fpath, samplename, main_sample_index=0):
    fname, _ = splitext_plus(basename(vcf_fpath))
    tsv_fpath = join(cnf.work_dir, fname + '.tsv')

    if cnf.get('reuse_intermediate'):
        if file_exists(tsv_fpath):
            info(tsv_fpath + ' exists, reusing')
            return tsv_fpath

    manual_tsv_fields = cnf.annotation['tsv_fields']
    if not manual_tsv_fields:
        return None

    all_fields = []
    basic_fields = []
    info_fields = []
    eff_fields = []
    gt_fields = []
    tumor_gt = 'GEN[' + str(main_sample_index) + '].'
    normal_gt = 'GEN[' + str(1 - main_sample_index) + '].'

    lines = []

    with open(vcf_fpath) as inp:
        reader = vcf.Reader(inp)

        info('TSV saver: Building field list')
        for f in [rec.keys()[0] for rec in manual_tsv_fields]:
            if f.startswith('GEN'):
                _f = f.split('.')[1]
                if len(reader.samples) > 0:
                    if _f in reader.formats:
                        gt_fields.append(_f)
                        all_fields.append(f.replace('GEN[*].', tumor_gt))
                        if len(reader.samples) > 1:
                            all_fields.append(f.replace('GEN[*].', normal_gt))
                else:
                    warn('TSV Saver: Warning: ' + f + ' is not in VCF header FORMAT records')

            elif f in ['CHROM', 'POS', 'REF', 'ALT', 'ID', 'FILTER', 'QUAL']:
                all_fields.append(f)
                basic_fields.append(f)

            elif any(f.startswith(af) and af in reader.infos for af in ['EFF', 'ANN']):
                all_fields.append(f)
                eff_fields.append(f)

            else:
                if f in reader.infos:
                    info_fields.append(f)
                    all_fields.append(f)
                elif f == 'SAMPLE':
                    all_fields.append(f)
                else:
                    warn('TSV Saver: Warning: ' + f + ' is not in VCF header INFO records')

        info('TSV saver: Iterating over records...')
        d = OrderedDict()
        for rec in reader:
            for f in basic_fields:
                d[f] = rec.__dict__[f]

            for f in info_fields:
                d[f] = rec.INFO[f] if f in rec.INFO else ''

            if 'SAMPLE' not in d:
                d['SAMPLE'] = samplename

            if eff_fields:
                eff = rec.INFO.get(eff_fields[0][:3])
                if not eff:
                    for f in eff_fields:
                        d[f] = ''
                else:
                    eff_fs = eff[0].split('|')
                    eff_d = dict()
                    for val, header in zip(eff_fs, ['ALLELE', 'EFFECT', 'IMPACT', 'GENE', 'GENEID', 'FEATURE', 'FEATUREID', 'BIOTYPE', 'RANK', 'HGVS_C', 'HGVS_P', 'CDNA_POSLEN', 'CDS_POSLEN', 'AA_POSLEN', 'DISTANCE', 'LOG']):
                        if 'POSLEN' in header:
                            eff_d[header.split('_')[0] + '_POS'] = val.split('/')[0] if val else ''
                            eff_d[header.split('_')[0] + '_LEN'] = val.split('/')[1] if val else ''
                        else:
                            eff_d[header] = val
                    #ANN=GA |3_prime_UTR_variant|MODIFIER|RPL22|RPL22|transcript|NM_000983.3|Coding|4/4|c.*173dupT|||||173|;
                    #Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'
                    for f in eff_fields:
                        d[f] = eff_d[f.split('.')[1]]

            if rec.FORMAT:
                for _f in gt_fields:
                    if _f in rec.FORMAT:
                        d[tumor_gt + _f] = rec.samples[main_sample_index][_f]
                        if len(rec.samples) > 1 - main_sample_index:
                            d[normal_gt + _f] = rec.samples[1 - main_sample_index][_f]
                        else:
                            d[normal_gt + _f] = ''
                    else:
                        d[tumor_gt + _f] = ''
                        d[normal_gt + _f] = ''

            fs = []
            for f in all_fields:
                v = d[f]
                fs.append(v if v != '.' else '')
            lines.append(fs)

    info('TSV saver: Adding GEN[*] fields both for sample and for matched normal...')
    field_map = dict()
    for rec in manual_tsv_fields:
        k = rec.keys()[0]
        v = rec.values()[0]
        if k.startswith('GEN[*].'):
            _f = k.split('.')[1]
            field_map[tumor_gt + _f] = v
            field_map[normal_gt + _f] = 'Matched_' + v
        else:
            field_map[k] = v

    info('TSV saver: Writing TSV to ' + tsv_fpath)
    with file_transaction(cnf.work_dir, tsv_fpath) as tx:
        with open(tx, 'w') as out:
            out.write('\t'.join(field_map[f] for f in all_fields) + '\n')
            for fs in lines:
                new_fs = []
                for f in fs:
                    if isinstance(f, list):
                        new_fs.append(','.join(map(str, f)))
                    elif f is None:
                        new_fs.append('')
                    else:
                        new_fs.append(str(f))
                out.write('\t'.join(new_fs) + '\n')

    info('TSV saver: saved ' + tsv_fpath)
    return tsv_fpath
示例#14
0
def read_samples(args, caller_name=None):
    vcf_fpath_by_sample = OrderedDict()
    bad_vcf_fpaths = []

    info('Reading samples...')

    if len(args) == 1:
        first_fpath = args[0]
        if not first_fpath.endswith('.vcf') and not first_fpath.endswith(
                '.vcf.gz'):  # TODO: check ##fileformat=VCF ?
            info(
                'First argument file name does not look like VCF, assuming TSV with files names'
            )

            with open(first_fpath) as f:
                for i, l in enumerate(f):
                    fs = l.strip().split('\t')
                    if len(fs) != 2:
                        critical('Line ' + str(i) + ' has only ' +
                                 str(len(fs)) +
                                 ' fields. Expecting 2 (sample and vcf_fpath)')
                    sn, vcf_fpath = fs
                    if not verify_file(vcf_fpath):
                        bad_vcf_fpaths.append(vcf_fpath)
                    vcf_fpath_by_sample[sn] = adjust_path(vcf_fpath)

            if bad_vcf_fpaths:
                critical('VCF files cannot be found, empty or not VCFs:' +
                         ', '.join(bad_vcf_fpaths))
            info('Done reading ' + str(len(vcf_fpath_by_sample)) + ' samples')
            return vcf_fpath_by_sample

    for arg in args or [os.getcwd()]:
        vcf_fpath = verify_vcf(arg.split(',')[0])
        if not verify_file(vcf_fpath):
            bad_vcf_fpaths.append(vcf_fpath)
        if len(arg.split(',')) > 1:
            sn = arg.split(',')[1]
        else:
            sn = basename(splitext_plus(vcf_fpath)[0])
            if caller_name and sn.endswith('-' + caller_name):
                sn = sn[:-len(caller_name) - 1]
            info('  ' + sn)
        if sn in vcf_fpath_by_sample:
            if vcf_fpath_by_sample[sn] != vcf_fpath:
                warn('Duplicated record ' + sn +
                     ', VCF file is different (existing: ' +
                     vcf_fpath_by_sample[sn] + ', new: ' + vcf_fpath + ')')
            else:
                warn('Duplicated record ' + sn + ', VCF file is the same: ' +
                     vcf_fpath)
        else:
            vcf_fpath_by_sample[sn] = vcf_fpath
    if bad_vcf_fpaths:
        critical('VCF files cannot be found, empty or not VCFs:' +
                 ', '.join(bad_vcf_fpaths))
    info('Done reading ' + str(len(vcf_fpath_by_sample)) + ' samples')

    # TODO: read sample names from VCF
    # def get_main_sample(self, main_sample_index=None):
    #     if len(self._sample_indexes) == 0:
    #         return None
    #     if main_sample_index is not None:
    #         return self.samples[main_sample_index]
    #     try:
    #         sample_index = [sname.lower() for sname in self._sample_indexes] \
    #                         .index(self.sample_name_from_file.lower())
    #     except ValueError:
    #         return self.samples[0]
    #     else:
    #         return self.samples[sample_index]

    return vcf_fpath_by_sample
示例#15
0
def make_fastqc_reports(cnf, fastq_fpaths, output_dir):
    # if isdir(fastqc_dirpath):
    #     if isdir(fastqc_dirpath + '.bak'):
    #         try:
    #             shutil.rmtree(fastqc_dirpath + '.bak')
    #         except OSError:
    #             pass
    #     if not isdir(fastqc_dirpath + '.bak'):
    #         os.rename(fastqc_dirpath, fastqc_dirpath + '.bak')
    # if isdir(fastqc_dirpath):
    #     err('Could not run and combine fastqc because it already exists and could not be moved to fastqc.bak')
    #     return None

    fastqc = get_system_path(cnf, 'fastqc')
    if not fastqc:
        err('FastQC is not found, cannot make reports')
        return None

    else:
        safe_mkdir(output_dir)

        fqc_samples = []
        fastqc_jobs = []
        for fastq_fpath in fastq_fpaths:
            s = FQC_Sample(name=splitext_plus(basename(fastq_fpath))[0],
                           fastq_fpath=fastq_fpath)
            fqc_samples.extend([s])
            info('Added sample ' + s.name)

        for fqc_s in fqc_samples:
            if cnf.reuse_intermediate and verify_file(fqc_s.fastqc_html_fpath,
                                                      silent=True):
                info(fqc_s.fastqc_html_fpath + ' exists, reusing')
            else:
                fastqc_jobs.append(
                    run_fastqc(cnf, fqc_s.fastq_fpath, fqc_s.name, output_dir))
            info()

        wait_for_jobs(cnf, fastqc_jobs)

        fastqc_jobs = []
        # while True:
        for fqc_s in fqc_samples:
            fqc_s.fastqc_html_fpath = find_fastqc_html(output_dir, fqc_s.name)
        not_done_fqc = [
            fqc_s for fqc_s in fqc_samples
            if not verify_file(fqc_s.fastqc_html_fpath,
                               description='Not found FastQC html for ' +
                               fqc_s.name)
        ]
        # if not not_done_fqc:
        #     info('')
        #     info('Every FastQC job is done, moving on.')
        #     info('-' * 70)
        #     break
        # else:
        #     info('')
        #     info('Some FastQC jobs are not done (' + ', '.join(f.name for f in not_done_fqc) + '). Retrying them.')
        #     info('')
        #     for fqc_s in not_done_fqc:
        #         fastqc_jobs.append(run_fastqc(cnf, fqc_s.fastq_fpath, fqc_s.name, output_dir))
        #     wait_for_jobs(cnf, fastqc_jobs)

        for fqc_s in fqc_samples:
            sample_fastqc_dirpath = join(output_dir, fqc_s.name + '_fastqc')
            if isfile(sample_fastqc_dirpath + '.zip'):
                try:
                    os.remove(sample_fastqc_dirpath + '.zip')
                except OSError:
                    pass

        comb_fastqc_fpath = join(output_dir, 'fastqc.html')
        write_fastqc_combo_report(cnf, comb_fastqc_fpath, fqc_samples)
        verify_file(comb_fastqc_fpath, is_critical=True)
        info('Combined FastQC saved to ' + comb_fastqc_fpath)
        return comb_fastqc_fpath