def complete(args): """ Run all modules """ assert args.reference, "need the reference genome" assert args.bams, "no files detected. Add vcf and bam files" assert args.region, "need region bed file" data = _prepare_samples(args) vcf = [d[0]['vcf'] for d in data] bam = [d[0]['bam'] for d in data] fastqc = [d[0]['fastqc'] for d in data] yaml_file = [fn for fn in args.bams if fn.endswith("yaml")] assert len(vcf) == len(bam), "no paired bam/vcf files found. %s %s" % (vcf, bam) assert yaml_file, "No bcbio yaml file found." assert fastqc, "No fastqc files" cluster = [] if args.scheduler: cluster = ['-n', args.numcores, '-s', args.scheduler, '-q', args.queue, '-p', args.tag, '-t', args.paralleltype] if args.resources: cluster += ['-r'] + args.resources cluster = map(str, cluster) print "doing basic-bam" new_args = ['--run', 'basic-bam', '--out', 'basic-bam'] + bam new_args = params().parse_args(new_args) calculate_bam(new_args) print "doing metrics" new_args = ['--run', 'metrics', '--out', 'metrics', yaml_file[0]] new_args = params().parse_args(new_args) bcbio_metrics(new_args) print "doing fastqc parsing" new_args = ['--run', 'fastqc', '--out', 'fastqc'] + fastqc new_args = params().parse_args(new_args) merge_fastq(data, new_args) print "doing stats-coverage" new_args = ['--run', 'stats-coverage', '--out', 'coverage', '--region', args.region] + bam + cluster new_args = params().parse_args(new_args) average_exome_coverage(data, new_args) print "doing bias-coverage" new_args = ['--run', 'bias-coverage', '--out', 'bias', '--region', args.region] + bam + cluster new_args = params().parse_args(new_args) bias_exome_coverage(data, new_args) print "doing cg-depth in vcf files" new_args = ['--run', 'cg-vcf', '--out', 'cg', '--region', args.region, '--reference', args.reference] + bam + vcf + cluster new_args = params().parse_args(new_args) calculate_cg_depth_coverage(data, new_args) print "doing report" report("report")
def _new_complete(args): data = _read_final(args.bams[0]) print data # config = _config(args) # new_data = [] # for s in data: # data['name'] = s # data['config'] = config # new_data.append(data[s]) assert args.reference, "need the reference genome" assert args.bams, "no files detected. Add vcf and bam files" assert args.region, "need region bed file" vcf_type = data.values()[0]['vcf'].keys()[0] vcf = [d['vcf'][vcf_type] for d in data.values() if 'vcf' in d] bam = [d['bam']['ready'] for d in data.values() if 'bam' in d] fastqc = [d['qc']['fastqc'] for d in data.values() if 'qc' in d] yaml_file = args.bams[0] assert len(vcf) == len(bam), "no paired bam/vcf files found. %s %s" % (vcf, bam) assert yaml_file, "No bcbio yaml file found." assert fastqc, "No fastqc files" cluster = [] if args.scheduler: cluster = ['-n', args.numcores, '-s', args.scheduler, '-q', args.queue, '-p', args.tag, '-t', args.paralleltype] if args.resources: cluster += ['-r'] + args.resources cluster = map(str, cluster) galaxy = [] if args.galaxy: galaxy = ['--galaxy', args.galaxy] print "copy qsignature" fn = glob.glob(op.join(_get_final_folder(yaml_file)['upload'], "*/mixup_check/qsignature.ma")) if file_exists(fn[0]) and not file_exists("qsignature.ma"): shutil.copy(fn[0], "qsignature.ma") print "doing basic-bam" new_args = ['--run', 'basic-bam', '--out', 'basic-bam'] + galaxy + bam new_args = params().parse_args(new_args) calculate_bam(new_args) print "doing metrics" new_args = ['--run', 'metrics', '--out', 'metrics', yaml_file] + galaxy new_args = params().parse_args(new_args) bcbio_metrics(new_args) print "doing fastqc parsing" new_args = ['--run', 'fastqc', '--out', 'fastqc'] + galaxy + fastqc + bam new_args = params().parse_args(new_args) data = _prepare_samples(new_args) merge_fastq(data, new_args) print "doing stats-coverage" new_args = ['--run', 'stats-coverage', '--out', 'coverage', '--region', args.region] + galaxy + bam + cluster new_args = params().parse_args(new_args) data = _prepare_samples(new_args) average_exome_coverage(data, new_args) print "doing bias-coverage" new_args = ['--run', 'bias-coverage', '--out', 'bias', '--region', args.region, '--n_sample', str(args.n_sample)] + galaxy + bam + cluster new_args = params().parse_args(new_args) data = _prepare_samples(new_args) bias_exome_coverage(data, new_args) print "doing cg-depth in vcf files" new_args = ['--run', 'cg-vcf', '--out', 'cg', '--region', args.region, '--reference', args.reference] + galaxy + bam + vcf + cluster new_args = params().parse_args(new_args) data = _prepare_samples(new_args) calculate_cg_depth_coverage(data, new_args) print "doing report" report("report")
if __name__ == "__main__": parser = params() args = parser.parse_args() if args.run == "stats-coverage": data = _prepare_samples(args) average_exome_coverage(data, args) elif args.run == "bias-coverage": data = _prepare_samples(args) bias_exome_coverage(data, args) elif args.run == "tstv": calculate_tstv(args) elif args.run == "basic-bam": calculate_bam(args) elif args.run == "metrics": bcbio_metrics(args) elif args.run == "cg-vcf": data = _prepare_samples(args) calculate_cg_depth_coverage(data, args) elif args.run == "plot": save_multiple_regions_coverage(args.bams, args.out, args.region) elif args.run == "fastqc": data = _prepare_samples(args) merge_fastq(data, args) elif args.run == "report": report(args) elif args.run == "complete": complete(args) elif args.run == "final": _new_complete(args)