Пример #1
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input. ' \
                  'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir'
    parser = OptionParser(description=description, usage=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c'))
    parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis')
    parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :')
    parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.')
    parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        parser.print_usage()
        sys.exit(1)
    if len(args) == 1 and not args[0].endswith('.bam'):
        sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv'))
        bam_by_sample = OrderedDict()
        for s, b in zip(sample_names, bam_fpaths):
            bam_by_sample[s] = b
    else:
        bam_by_sample = find_bams(args)

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)
    check_genome_resources(cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    verify_dir(dirname(cnf.output_dir), is_critical=True)
    safe_mkdir(cnf.output_dir)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Seq2C'
    set_up_dirs(cnf)

    samples = [
        source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath)
            for s_name, bam_fpath in bam_by_sample.items()]
    info('Samples: ')
    for s in samples:
        info('  ' + s.name)
    samples.sort(key=lambda _s: _s.key_to_sort())

    target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples, target_bed, cnf.output_dir
Пример #2
0
def proc_opts():
    parser = OptionParser()
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--expose-only',
                      dest='expose_to_ngs_server_only',
                      action='store_true',
                      default=False,
                      help='Only add project to the webserver')
    parser.add_option('--no-expose',
                      dest='expose',
                      action='store_false',
                      default=True,
                      help='Do not expose the reports')
    parser.add_option('-o', dest='output_dir')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option('--downsample-to', dest='downsample_to', type='int')

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) < 1:
        critical('Usage: ' + __file__ + ' *.fq.gz -o output_dir')
    # if len(args) < 2:
    #     info('No dataset path specified, assuming it is the current working directory')
    #     dataset_dirpath = adjust_path(os.getcwd())
    #     jira_url = args[0]

    fastq_fpaths = [verify_file(fpath) for fpath in args]
    fastq_fpaths = [fpath for fpath in fastq_fpaths if fpath]
    info(str(len(fastq_fpaths)) + ' fastq files')

    run_cnf = determine_run_cnf(opts)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    info('Writing to ' + str(cnf.output_dir))

    cnf.project_name = cnf.project_name or 'preproc'

    if cnf.work_dir:
        cnf.debug = True
    else:
        all_work_dir = join(cnf.output_dir, 'work')
        safe_mkdir(all_work_dir)

        latest_fpath = join(all_work_dir, 'latest')

        if cnf.reuse_intermediate:
            cnf.work_dir = latest_fpath
        else:
            cnf.work_dir = join(
                all_work_dir,
                datetime.datetime.now().strftime("%Y-%b-%d_%H-%M"))
            if islink(latest_fpath):
                os.remove(latest_fpath)
            if isdir(latest_fpath):
                shutil.rmtree(latest_fpath)
            if not exists(latest_fpath):
                os.symlink(basename(cnf.work_dir), latest_fpath)

    cnf.work_dir = adjust_path(cnf.work_dir)
    safe_mkdir(cnf.work_dir)
    cnf.log_dir = join(cnf.work_dir, 'log')
    safe_mkdir(cnf.log_dir)
    set_up_log(cnf)
    try:
        subprocess.call(['chmod', '-R', 'g+w', cnf.work_dir])
    except OSError:
        err(traceback.format_exc())
        pass

    if cnf.samplesheet:
        cnf.samplesheet = verify_file(cnf.samplesheet, is_critical=True)

    info(' '.join(sys.argv))
    info()
    info('Created a temporary working directory: ' + cnf.work_dir)

    if cnf.project_name:
        info('Project name: ' + cnf.project_name)

    if cnf.samplesheet:
        info('Using custom sample sheet ' + cnf.samplesheet)

    check_genome_resources(cnf)
    check_system_resources(cnf, optional=['fastq'])

    return cnf, cnf.output_dir, fastq_fpaths
Пример #3
0
def main():
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    parser.add_option('--work-dir', dest='work_dir', metavar='DIR')
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('--reannotate',
                      dest='reannotate',
                      action='store_true',
                      default=False,
                      help='re-annotate BED file with gene names')
    parser.add_option('--dedup',
                      dest='dedup',
                      action='store_true',
                      default=False,
                      help='count duplicates in coverage metrics')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option(
        '--exons',
        '--exome',
        '--features',
        dest='features',
        help=
        'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.'
    )

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No BAMs provided to input.')
    bam_fpaths = list(set([abspath(a) for a in args]))

    bad_bam_fpaths = []
    for fpath in bam_fpaths:
        if not verify_bam(fpath):
            bad_bam_fpaths.append(fpath)
    if bad_bam_fpaths:
        critical('BAM files cannot be found, empty or not BAMs:' +
                 ', '.join(bad_bam_fpaths))

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'TargQC'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name

    check_genome_resources(cnf)

    verify_bed(cnf.bed, is_critical=True)
    bed_fpath = adjust_path(cnf.bed)
    info('Using amplicons/capture panel ' + bed_fpath)

    features_bed_fpath = adjust_path(
        cnf.features) if cnf.features else adjust_path(cnf.genome.features)
    info('Features: ' + features_bed_fpath)

    genes_fpath = None
    if cnf.genes:
        genes_fpath = adjust_path(cnf.genes)
        info('Custom genes list: ' + genes_fpath)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    info('*' * 70)
    info()

    targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath,
                                   features_bed_fpath, genes_fpath)
    if targqc_html_fpath:
        send_email(
            cnf, 'TargQC report for ' + cnf.project_name + ':\n  ' +
            targqc_html_fpath)
Пример #4
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--is-wgs',
                      dest='is_wgs',
                      action='store_true',
                      default=False,
                      help='whole genome sequencing')
    parser.add_option('--is-deep-seq',
                      dest='is_deep_seq',
                      action='store_true',
                      default=False,
                      help='deep targeted sequencing')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('-c', '--caller', dest='caller')
    parser.add_option('--qc', dest='qc', action='store_true', default=False)
    parser.add_option('--no-qc',
                      dest='qc',
                      action='store_false',
                      default=False)
    parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP)
    parser.add_option('--no-tsv',
                      dest='tsv',
                      action='store_false',
                      default=True,
                      help=SUPPRESS_HELP)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No vcf files provided to input.')

    run_cnf = determine_run_cnf(opts,
                                is_targetseq=opts.is_deep_seq,
                                is_wgs=opts.is_wgs)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    vcf_fpath_by_sample = read_samples(args, cnf.caller)
    info()

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Variants'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name
    info(' '.join(sys.argv))

    samples = [
        source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath)
        for s_name, vcf_fpath in vcf_fpath_by_sample.items()
    ]
    samples.sort(key=lambda _s: _s.key_to_sort())

    check_genome_resources(cnf)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples