def concatenate_vcfs(in_vcfs, in_chrom, out_vcf, out_log): args = locals() default = { 'main_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr38/concatenate-partitioned-chromosome-vcfs.sh'), } cmd_args = merge_params(default, args) cmd = None if len(cmd_args['in_vcfs']) == 1: cmd_args['in_vcfs'] = cmd_args['in_vcfs'][0] cmd = ("cp -v {in_vcfs} {out_vcf} " ">{out_log} " "2>&1").format(**cmd_args) else: cmd_args['in_vcfs'] = ' '.join( [x for x in in_vcfs if not empty_gzipped_vcf(x)]) cmd = ("{main_script} " "-m {out_vcf} " "{in_vcfs} " ">{out_log} " "2>&1").format(**cmd_args) return cmd
def gatk_select_variants_remove_ac_0(in_chrom, in_vcf, out_vcf, out_log): args = locals() default = { 'java': '/gapp/x64linux/opt/java/jre/jre1.7.0_45/bin/java', 'jar': '/usr/share/java/GenomeAnalysisTK-3.4.jar', 'java_opts': "-Xmx4096m", 'reference': '/gscmnt/gc2719/halllab/genomes/human/GRCh37/1kg_phase1/human_g1k_v37.fasta', } cmd_args = merge_params(default, args) cmd = ("{java} -jar {jar} " "-T SelectVariants -R {reference} " "--removeUnusedAlternates " "-V {in_vcf} " "-L {in_chrom} " "-o {out_vcf} " ">{out_log} " "2>&1").format(**cmd_args) return cmd
def bcftools_stats_summary(in_dir, out_dir): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/bcftools-stats-summary-plots.sh'), } cmd_args = merge_params(default, args) cmd = "{script} {in_dir} {out_dir}".format(**cmd_args) return cmd
def annotation_1000G(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/annotate-w-1000G.sh'), } cmd_args = merge_params(default, args) cmd = "{script} {in_vcf} {out_vcf} >{out_log} 2>&1".format(**cmd_args) return cmd
def variant_eval_summary(in_dir, out_dir): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/merge-and-plot-gatk-variant-eval-stats.sh'), } cmd_args = merge_params(default, args) cmd = "{script} {in_dir} {out_dir}".format(**cmd_args) return cmd
def filter_biallelic_snps(in_vcf, out_vcf, in_min_vqslod, **kwargs): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/vcf-filter-biallelic-snps.sh'), } cmd_args = merge_params(default, args) cmd = ("{script} {in_vcf} {out_vcf} {in_min_vqslod}").format(**cmd_args) return cmd
def plink_binary(in_vcf, out_path, **kwargs): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/create-plink-binary.sh'), } cmd_args = merge_params(default, args) cmd = ("{script} {in_vcf} {out_path}").format(**cmd_args) return cmd
def plink_ld_prune(in_path, out_path, **kwargs): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/plink-ld-prune.sh'), } cmd_args = merge_params(default, args) cmd = ("{script} {in_path} {out_path}").format(**cmd_args) return cmd
def plink_merge_pruned_files(in_ref, in_merge_file, out_path): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/plink-merge-pruned.sh'), } cmd_args = merge_params(default, args) cmd = ("{script} {in_ref} {in_merge_file} {out_path}").format(**cmd_args) return cmd
def normalize_decompose_unique(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'script': pkg_resources.resource_filename('yaps2', 'resources/postvqsr/run-decompose.sh'), } cmd_args = merge_params(default, args) cmd = "{script} {in_vcf} {out_vcf} {in_chrom} >{out_log} 2>&1".format( **cmd_args) return cmd
def remove_symbolic_deletion_alleles(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/remove-symbolic.sh'), } cmd_args = merge_params(default, args) cmd = "{script} {in_vcf} {out_vcf} >{out_log} 2>&1".format(**cmd_args) return cmd
def annotation_vep(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'main_script': pkg_resources.resource_filename('yaps2', 'resources/postvqsr38/run-vep.sh'), } cmd_args = merge_params(default, args) cmd = ("{main_script} " "{in_vcf} " "{out_vcf} " ">{out_log} 2>&1").format(**cmd_args) return cmd
def count_sample_missingness(in_vcf, in_chrom, out_json, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/count-sample-missingness.py'), 'python': sys.executable, } cmd_args = merge_params(default, args) cmd = "{python} {script} --out={out_json} {in_vcf} >{out_log} 2>&1".format( **cmd_args) return cmd
def create_evec_data_frame(in_file, out_file): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/make-pca-evec-data-frame.py'), } cmd_args = merge_params(default, args) cmd = ( "python -u {script} " "--src={in_file} " "--out={out_file} ").format(**cmd_args) return cmd
def eigenstrat_smartpca_analysis(in_ped_file, in_map_file, out_prj_dir): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/pca/eigenstrat.mk'), } cmd_args = merge_params(default, args) cmd = ( "make -f {script} " "INPUT_PED={in_ped_file} " "INPUT_MAP={in_map_file} " "PRJ_DIR={out_prj_dir}" ).format(**cmd_args) return cmd
def annotation_vep_cadd(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'main_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/vep-cadd-annotation.sh'), 'merge_script': pkg_resources.resource_filename('yaps2', 'resources/postvqsr/merge-in-cadd.py'), } cmd_args = merge_params(default, args) cmd = "{main_script} {in_vcf} {out_vcf} {merge_script} >{out_log} 2>&1".format( **cmd_args) return cmd
def calculate_sample_missingness(in_json, out_stats, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr38/calculate-overall-sample-missingness.py'), 'python': sys.executable, } cmd_args = merge_params(default, args) cmd = "{python} {script} --out={out_stats} {in_json} >{out_log} 2>&1".format( **cmd_args) return cmd
def plink_pipeline(in_vcf, in_trio_fam, out_dir, **kwargs): args = locals() default = { 'script': pkg_resources.resource_filename('yaps2', 'resources/mie/plink.mk'), } cmd_args = merge_params(default, args) cmd = ("make -f {script} " "INPUT_VCF={in_vcf} " "TRIO_FAM={in_trio_fam} " "PRJ_DIR={out_dir}").format(**cmd_args) return cmd
def exec_speedseq(output_prefix, tmpdir, input_bams, **kwargs): args = locals() default = { 'script' : pkg_resources.resource_filename('yaps2', 'resources/b38/speedseq-realign.sh'), # build 38 'reference' : os.path.join( '/gscmnt/gc2802/halllab/ccdg_resources/genomes', 'human/GRCh38DH/bwa/0_7_12/all_sequences.fa' ), } cmd_args = merge_params(default, args) cmd = ("{script} {output_prefix} {tmpdir} {reference} {input_bams}").format(**cmd_args) return cmd
def aggregate_mie_statistics(in_category, in_method, in_dir, out_file): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/mie/aggregate-mie-statistics.py'), } cmd_args = merge_params(default, args) cmd = ("{script} " "--input-dir={in_dir} " "--output-file={out_file} " "--category={in_category} " "--method={in_method}").format(**cmd_args) return cmd
def annotation_LINSIGHT(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'main_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr38/annotate-w-LINSIGHT.sh'), 'b37_to_b38_integration_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr38/integrate-b37-annotations-to-b38.py'), } cmd_args = merge_params(default, args) cmd = ("{main_script} " "{in_vcf} " "{out_vcf} " "{b37_to_b38_integration_script} " ">{out_log} 2>&1").format(**cmd_args) return cmd
def vcf_partition(in_vcf, out_vcf, in_min_vqslod, in_max_vqslod, in_samples, in_type, in_method, in_chrom, in_label): args = locals() default = { 'script': pkg_resources.resource_filename('yaps2', 'resources/mie/vcf-partition.sh'), } cmd_args = merge_params(default, args) cmd = ("{script} {in_vcf} {out_vcf} " "{in_min_vqslod} " "{in_max_vqslod} " "{in_samples} " "{in_type} " "{in_method}").format(**cmd_args) return cmd
def annotate_allele_balances(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/allele-balance-annotation.sh'), 'python_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/annotate-allele-balances.py'), 'python_executable': sys.executable, } cmd_args = merge_params(default, args) cmd = ("{script} " "{python_executable} {python_script} " "{in_vcf} {out_vcf} {in_chrom} " ">{out_log} 2>&1").format(**cmd_args) return cmd
def bcftools_stats(in_vcf, in_chrom, out_stats): args = locals() default = { 'bcftools': '/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4', 'reference': '/gscmnt/ams1102/info/model_data/2869585698/build106942997/all_sequences.fa', } cmd_args = merge_params(default, args) cmd = ("{bcftools} stats " "--split-by-ID " "-F {reference} " "-s - " "-f '.,PASS' " "{in_vcf} " ">{out_stats}").format(**cmd_args) return cmd
def bcftools_stats(in_vcf, in_chrom, out_stats): args = locals() default = { 'bcftools': '/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4', 'reference': '/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa', } cmd_args = merge_params(default, args) cmd = ("{bcftools} stats " "--split-by-ID " "-F {reference} " "-s - " "-f '.,PASS' " "{in_vcf} " ">{out_stats}").format(**cmd_args) return cmd
def gatk_variant_eval(in_chrom, in_vcf, out_stats, out_log): args = locals() default = { 'java': '/gapp/x64linux/opt/java/jre/jre1.8.0_31/bin/java', 'jar': '/gscmnt/gc2802/halllab/idas/jira/BIO-1662/vendor/local/jars/GenomeAnalysisTK-3.5-idas-experimental-293f64d-2016.02.19.jar', 'java_opts': "-Xmx4096m", 'reference': '/gscmnt/gc2719/halllab/genomes/human/GRCh37/1kg_phase1/human_g1k_v37.fasta', 'dbsnp': '/gscmnt/gc2802/halllab/idas/jira/BIO-1662/data/derived/FinnMetSeq-WGS/10-decompose-normalize-1000G-variant-ref-v1/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.decompose.normalize.vcf.gz', } cmd_args = merge_params(default, args) cmd = ("{java} -jar {jar} " "-nt 8 " "-T VariantEval " "-D {dbsnp} " "-R {reference} " "-ST Sample " "-noST " "-EV TiTvVariantEvaluator " "-EV CountVariants " "-EV CompOverlap " "-EV IndelSummary " "-EV MultiallelicSummary " "-noEV " "-L {in_chrom} " "-eval {in_vcf} " "-o {out_stats} " ">{out_log} " "2>&1").format(**cmd_args) return cmd
def filter_variant_missingness(in_vcf, in_chrom, out_vcf, out_log): args = locals() default = { 'script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/filter-missingness-sites.sh'), 'python_script': pkg_resources.resource_filename( 'yaps2', 'resources/postvqsr/filter-site-missingness.py'), 'python_executable': sys.executable, } cmd_args = merge_params(default, args) if in_chrom.startswith('Y') or in_chrom.startswith('y'): cmd_args['out_vcf'] = os.path.dirname(out_vcf) cmd = "/bin/cp -v {in_vcf}* {out_vcf} >{out_log} 2>&1".format( **cmd_args) else: cmd = ("{script} " "{python_executable} {python_script} " "{in_vcf} {out_vcf} {in_chrom} " ">{out_log} 2>&1").format(**cmd_args) return cmd
def concatenate_vcfs(in_vcfs, in_chrom, out_vcf, out_log): args = locals() default = { 'bcftools': '/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4', } cmd_args = merge_params(default, args) cmd = None if len(cmd_args['in_vcfs']) == 1: cmd_args['in_vcfs'] = cmd_args['in_vcfs'][0] cmd = ("cp -v {in_vcfs} {out_vcf}" ">{out_log} " "2>&1").format(**cmd_args) else: cmd_args['in_vcfs'] = ' '.join( [x for x in in_vcfs if not empty_gzipped_vcf(x)]) cmd = ("{bcftools} concat " "-a " "{in_vcfs} " "-O z -o {out_vcf}" ">{out_log} " "2>&1").format(**cmd_args) return cmd