m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl) res['method'] = m_name res = res[~res.test_var_alleles.str.contains('nan')] res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False) res['var_id'] = res['test_var_id'] res_u = res[~res.var_id.duplicated()] res_u.reset_index(inplace=True) res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int) #res_u = res_u[res_u.pred_labels == 1] #outp_tsv = os.path.join(output_dir, m_name + '.tsv') outp_tsv = os.path.join(output_dir, child_id + '.tsv') func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) cmd = ' '.join( [os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id]) print(cmd) func.runInShell(cmd) summarizeVariants.summarizeMutations( os.path.join(output_dir, child_id + '-ann-onePline.tsv'), os.path.join(output_dir, 'denovo'), config_file) #cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'), # os.path.join(output_dir, child_id + '-ann-onePline.tsv'), # os.path.join(output_dir, 'denovo'), # config_file]) #func.runInShell(cmd) # work/summarizeMutations.py /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/trio003.p1_642940-ann-onePline.tsv /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/denovo cfg_spark.yml
dnvo = pandas.read_csv(input_file) dnvo.ix[:, 'pred_labels'] = (dnvo['pred_prob'] > prob_cutoff).astype(int) dnvo = dnvo[dnvo.pred_labels == 1] dnvo.reset_index(inplace=True) if dnvo.empty: sys.exit('No mutations at score %s' % prob_cutoff) tmp_dir = tempfile.mkdtemp() print(tmp_dir) input_file_bn = os.path.splitext(os.path.basename(input_file))[0] outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv') print(outp_tsv) func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP) script_name = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), 'vcf2table.sh') #script_name = os.path.abspath(pkg_resources.resource_filename('variants', # 'vcf2table.sh')) cmd = ' '.join([ script_name, outp_tsv, os.path.dirname(script_name), input_file_bn, targ_bed, incl_make ]) print(cmd) func.runInShell(cmd) vn = summarizeVariants.summarizeMutations( os.path.join(tmp_dir, input_file_bn + '-ann.vcf.onePline.tsv'), os.path.join(tmp_dir, input_file_bn + '-vep.tsv'), input_file_bn, output_dir, config_file) if rm_tmp == 'yes': cmd = 'rm -rf %s' % tmp_dir func.runInShell(cmd)
dnvo.reset_index(inplace=True) if dnvo.empty: sys.exit('No mutations at score %s' % prob_cutoff) tmp_dir = tempfile.mkdtemp() print(tmp_dir) input_file_bn = os.path.splitext(os.path.basename(input_file))[0] outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv') print(outp_tsv) func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP) # script_name = os.path.basename(os.path.realpath(sys.argv[0])) script_name = os.path.abspath(pkg_resources.resource_filename('variants', 'vcf2table.sh')) cmd = ' '.join([script_name, outp_tsv, os.path.dirname(script_name), input_file_bn, targ_bed]) print(cmd) func.runInShell(cmd) vn = summarizeVariants.summarizeMutations( os.path.join(tmp_dir, input_file_bn + '-ann-onePline.tsv'), input_file_bn, output_dir, config_file) if rm_tmp == 'yes': cmd = 'rm -rf %s' % tmp_dir func.runInShell(cmd)
res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False) res['var_id'] = res['test_var_id'] res_u = res[~res.var_id.duplicated()] res_u.reset_index(inplace=True) res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int) #res_u = res_u[res_u.pred_labels == 1] #outp_tsv = os.path.join(output_dir, m_name + '.tsv') outp_tsv = os.path.join(output_dir, child_id + '.tsv') func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) cmd = ' '.join([os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id]) print(cmd) func.runInShell(cmd) summarizeVariants.summarizeMutations(os.path.join(output_dir, child_id + '-ann-onePline.tsv'), os.path.join(output_dir, 'denovo'), config_file) #cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'), # os.path.join(output_dir, child_id + '-ann-onePline.tsv'), # os.path.join(output_dir, 'denovo'), # config_file]) #func.runInShell(cmd) # work/summarizeMutations.py /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/trio003.p1_642940-ann-onePline.tsv /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/denovo cfg_spark.yml