def isFiltered(row, clr): cmd = ' '.join([ 'cat', os.path.join(sf_calls_dir, clr, clr + '-SNP-class_MIS_SNP__.csv'), os.path.join(sf_calls_dir, clr, clr + '-SNP-class_SYN_SNP__.csv'), os.path.join(sf_calls_dir, clr, clr + '-SNP-class_LOF_SNP__.csv'), '|', 'grep', row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep', str(row['POS']) ]) res = func.runInShell(cmd) return not bool(res)
def isPossibleDeNovo(row, clr): if len(row['REF']) == len(row['ALT']): v_t = 'snp' else: v_t = 'indels' cmd = ' '.join([ 'cat', os.path.join(sf_calls_dir, v_t, clr, row['ind_id']), '|', 'grep', row['CHROM'], '|', 'grep', str(row['POS']) ]) res = func.runInShell(cmd) return not bool(res)
def isDeNovo(row, clr): if len(row['REF']) == len(row['ALT']): v_t = 'snp' v_tt = 'SNP' else: v_t = 'indels' v_tt = 'INDEL' cmd = ' '.join([ 'cat', os.path.join(sf_calls_dir, v_t, clr, row['batch'], row['ind_id'] + '-' + v_tt + '-class.csv'), '|', 'grep', row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep', str(row['POS']) ]) res = func.runInShell(cmd) return not bool(res)
def whyNotDeNovo(row, clr): cmd = ' '.join([ 'cat', os.path.join(sf_calls_dir, clr, clr + '-SNP-class_ALL_SNP__.csv'), '|', 'grep', row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep', str(row['POS']) ]) res = func.runInShell(cmd, True) # print res if row['is_dn'] and not row['is_filt']: res = res.split('\n')[0] outp = res.split(',')[-len(sf_dn_c_columns):] else: outp = [None] * len(sf_dn_c_columns) ser_out = pandas.Series(outp, sf_dn_c_columns, dtype=str) # print ser_out # ser_out = ser_out[sf_dn_c_columns] return ser_out
dnvo.reset_index(inplace=True) if dnvo.empty: sys.exit('No mutations at score %s' % prob_cutoff) tmp_dir = tempfile.mkdtemp() print(tmp_dir) input_file_bn = os.path.splitext(os.path.basename(input_file))[0] outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv') print(outp_tsv) func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP) # script_name = os.path.basename(os.path.realpath(sys.argv[0])) script_name = os.path.abspath(pkg_resources.resource_filename('variants', 'vcf2table.sh')) cmd = ' '.join([script_name, outp_tsv, os.path.dirname(script_name), input_file_bn, targ_bed]) print(cmd) func.runInShell(cmd) vn = summarizeVariants.summarizeMutations( os.path.join(tmp_dir, input_file_bn + '-ann-onePline.tsv'), input_file_bn, output_dir, config_file) if rm_tmp == 'yes': cmd = 'rm -rf %s' % tmp_dir func.runInShell(cmd)
genome_build = int(cfg['genome_build']) vep_refseq = cfg['vep_refseq'] if genome_build == 19 or genome_build == 37: incl_make = '/mnt/xfs1/home/asalomatov/projects/pipeline/ppln/include.mk' elif int(genome_build) == 38: incl_make = '/mnt/xfs1/home/asalomatov/projects/pipeline/ppln/include_hg38.mk' else: sys.exit('Only builds 19, 37, 38 are supported') tmp_dir = tempfile.mkdtemp() print(tmp_dir) input_file_bn = os.path.splitext(os.path.basename(input_file))[0] input_lile_dir = os.path.dirname(input_file) script_name = os.path.abspath( pkg_resources.resource_filename('variants', 'vcf2tablee.sh')) script_dir = os.path.dirname(script_name) cmd = """ vcfintersect -b %(targ_bed)s %(input_file)s > %(tmp_dir)s/%(input_file_bn)s.vcf echo 'running VEP' make -f %(script_dir)s/annVEP.mk INCLMK=%(incl_make)s VEPREFSEQ=%(vep_refseq)s PREFIX=%(input_file_bn)s SUFFIX=.vcf INDIR=%(tmp_dir)s OUTDIR=%(tmp_dir)s """ print(cmd % locals()) func.runInShell(cmd % locals()) if rm_tmp == 'yes': cmd = 'rm -rf %s' % tmp_dir func.runInShell(cmd)
print(tmp_dir) input_file_bn = os.path.splitext(os.path.basename(input_file))[0] outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv') print(outp_tsv) func.writeTableAsVcf(dnvo, outp_tsv) # script_name = os.path.basename(os.path.realpath(sys.argv[0])) script_name = os.path.abspath( pkg_resources.resource_filename('variants', 'vcf2table.sh')) # 'vcf2table_notarg.sh')) cmd = ' '.join([ script_name, outp_tsv, os.path.dirname(script_name), input_file_bn, targ_bed, incl_make, vep_refseq ]) print(cmd) func.runInShell(cmd) vn = summarizeOtherVariants.summarizeMutations( os.path.join(tmp_dir, input_file_bn + '-ann.vcf.onePline.tsv'), os.path.join(tmp_dir, input_file_bn + '-vep.tsv'), input_file_bn, output_dir, config_file) if rm_tmp == 'yes': cmd = 'rm -rf %s' % tmp_dir func.runInShell(cmd) sys.exit('stop') def get_spID(x, lab2sp_dict): if x[:2] == 'SP': return x else:
sys.exit(1) model = os.path.join(model_dir, model[0]) print(model) m_pkl = joblib.load(model) list_of_features = m_pkl['features'] # hardcode lvl, this is intended for external use only lvl = 0 is_keras = bool(int(m_pkl['is_keras'])) if lvl == 0: m_pkl['extra_col_names'] = [] m_pkl['y_name'] = [] # create output dirs func.runInShell('mkdir -p ' + output_dir) if known_vars: output_dir_known = os.path.join(output_dir, 'known') func.runInShell('mkdir -p ' + output_dir_known) # populate ped DF myped = ped.Ped(ped_file_extended, ['bam', 'vcf']) f = features.Features(myped, known_vars) # trio has to be complete with no files missing if not f.initTrioFor(child_id): sys.stderr.write('\nfailed to initialize trio for ' + child_id) sys.exit(1) else: sys.stdout.write('\ninitialized trio for ' + child_id) sys.stdout.write('\n')