Exemplo n.º 1
0
def scalpel_indel(pairs, log_dir, config_file, ref_mnt):
    (scalpel, bedtools, bed, fasta, cpus, dustmask_flag, dustmask_bed) = parse_config(config_file)
    bed = ref_mnt + '/' + bed
    fasta = ref_mnt + '/' + fasta
    dustmask_bed = ref_mnt + '/' + dustmask_bed
    # use get_merged_bams api
    sample_list = 'sample_list.txt'
    if not os.path.isfile(sample_list):
        create_sample_list(pairs)
        sys.stderr.write(date_time() + 'Sample pairs list not created - creating one since this is being run likely '
                                       'outside of pipeline')
        get_merged_bams(config_file, sample_list)
    fh = open(pairs, 'r')
    for line in fh:
        cur = line.rstrip('\n').split('\t')
        loc = log_dir + cur[0] + '.scalpel.log'
        tumor_bam = cur[1] + '.merged.final.bam'
        normal_bam = cur[2] + '.merged.final.bam'
        scalpel_cmd = scalpel + ' --somatic --logs --numprocs ' + cpus + ' --tumor ' + tumor_bam + ' --normal ' \
                      + normal_bam + ' --bed ' + bed + ' --ref ' + fasta + ' 2>> ' + loc
        sys.stderr.write(date_time() + 'Starting indel calls for ' + cur[0] + '\n')
        log(loc, date_time() + 'Starting indel calls for ' + cur[0] + ' with command:\n' + scalpel_cmd + '\n')
        check = call(scalpel_cmd, shell=True)
        if check != 0:
            sys.stderr.write(date_time() + 'Indel calling failed for pair ' + cur[0] + ' with command:\n' +
                             scalpel_cmd + '\n')
        log(loc, date_time() + 'Indel calling complete for pair ' + cur[0] + ' moving output files\n')
        mv_cmd = 'mkdir ' + cur[0] + '; mv outdir/main/* ' + cur[0] + '; rm -rf outdir/main;'
        log(loc, date_time() + mv_cmd + '\n')
        call(mv_cmd, shell=True)
        sys.stderr.write(date_time() + 'Completed indel calls for ' + cur[0] + '\n')
        if dustmask_flag == 'Y':
            log(loc, date_time() + 'Filter dustmask flag given\n')
            check = filter_indel(bedtools, dustmask_bed, cur[0])
            if check != 0:
                sys.stderr.write(date_time() + 'Dustmask failed for ' + cur[0] + '\n')
                exit(1)
            else:
                log(loc, date_time() + 'Dustmask complete for ' + cur[0] + '\n')
    fh.close()
    sys.stderr.write(date_time() + 'Indel call completed\n')
    return 0
Exemplo n.º 2
0
def variant_annot_pipe(config_file, sample_pairs, kflag, ref_mnt, wg):
    # create eventual output location directories

    mk_dir = 'mkdir BAM LOGS ANALYSIS ANNOTATION'
    call(mk_dir, shell=True)
    (novosort, obj, cont, analysis, annotation, germ_flag, indel_flag, annot_used) = parse_config(config_file)
    # create sample list
    sample_list = 'sample_list.txt'
    fh = open(sample_pairs, 'r')
    sl = open(sample_list, 'w')
    temp = {}
    for line in fh:
        cur = line.rstrip('\n').split('\t')
        if cur[1] not in temp:
            sl.write(cur[1] + '\n')
            temp[cur[1]] = 1
        if cur[2] not in temp:
            sl.write(cur[2] + '\n')
            temp[cur[2]] = 1
    sl.close()
    fh .close()
    del temp
    # download and merge (if necessary) bam files
    temp_list = check_existing_bams(sample_list)

    if len(temp_list) > 0:
        sys.stderr.write(date_time() + 'Missing files detected, downloading merged bam files\n')
        temp_fn = 'temp_samp_list.txt'
        temp_fh = open(temp_fn, 'w')
        temp_fh.write('\n'.join(temp_list))
        temp_fh.close()
        miss_list = get_merged_bams(config_file, temp_fn)
        if len(miss_list) == 0:
            sys.stderr.write(date_time() + 'Merged bam files successfully download\n')
        else:
            sys.stderr.write(date_time() + 'Some merged bams appear to be missing, trying downloading constituent bams '
                                           'then merging\n')
            missing_fn = 'to_merge.txt'
            missing_fh = open(missing_fn, 'w')
            missing_fh.write('\n'.join(miss_list))
            missing_fh.close()
            run_novosort(config_file, missing_fn, obj)
    else:
        sys.stderr.write(date_time() + 'All bams found.  Moving on\n')
    if kflag == 'y':
        # create bam list for ksort
        bam_list = 'bam_list.txt'
        blist_cmd = 'ls *.merged.final.bam > ' + bam_list
        call(blist_cmd, shell=True)
        check = ksort(config_file, bam_list, kflag, ref_mnt)
        if check == 0:
            sys.stderr.write(date_time() + 'Karyotypic reorder of BAM files completed\n')
        else:
            sys.stderr.write(date_time() + 'Karyotypic reorder of BAM files failed.\n')
            exit(1)
    # quick check to see if just need to restart pipleine from mutect, or actually get merged bams

    check = mutect_pipe(config_file, sample_pairs, ref_mnt)
    if check == 0:
        sys.stderr.write(date_time() + 'Mutect variant calls successful\n')
    else:
        sys.stderr.write(date_time() + 'Mutect variant calls failed.\n')
        exit(1)
    check = mutect_merge_sort(config_file, sample_pairs, ref_mnt)
    if check == 0:
        sys.stderr.write(date_time() + 'Mutect file merge successful.\n')
    else:
        sys.stderr.write(date_time() + 'Mutect file merge failed.\n')
    # create def to do vep or snpeff mode for annotation
    check = scalpel_indel(sample_pairs, 'LOGS/', config_file, ref_mnt)
    if check == 0:
        sys.stderr.write(date_time() + 'scalpel successful.\n')
    else:
        sys.stderr.write(date_time() + 'scalpel failed.\n')
        exit(1)

    if annot_used == 'snpEff':
        snpEff(config_file, sample_pairs, ref_mnt, wg)
    if annot_used == 'vep':
        vep(config_file, sample_pairs, ref_mnt, '.vcf.keep', '.snv.vep.vcf', 'mutect')
        vep(config_file, sample_pairs, ref_mnt, '.indel.vcf', '.somatic.indel.vep.vcf', 'scalpel')

    if germ_flag == 'Y':
        sys.stderr.write(date_time() + 'Germ line call flag indicated\n')
        check = platypus_germline(config_file, sample_pairs, 'LOGS/', wg, ref_mnt)
        check += annot_platypus(config_file, sample_pairs, ref_mnt)
        if check == 0:
            sys.stderr.write(date_time() + 'Germ line call complete\n')
        else:
            sys.stderr.write(date_time() + 'Error during germline calls.  Check output\n')
            exit(1)

    # relocate stuff, then upload
    mv_cmds = 'rm -rf outdir; mv *.bai *.bam BAM; mv *.xls *eff* *sift* *vep* ANNOTATION; mv *out* *vcf* ANALYSIS;'
    call(mv_cmds, shell=True)
    check = upload_variants_to_swift(cont, obj, sample_list, sample_pairs, analysis, annotation, annot_used)
    if check == 0:
        sys.stderr.write(date_time() + 'Uploading data to swift successful!\n')
    else:
        sys.stderr.write(date_time() + 'Uploading data to swift failed!\n')
        exit(1)

    return 0