def isFiltered(row, clr):
    cmd = ' '.join([
        'cat',
        os.path.join(sf_calls_dir, clr, clr + '-SNP-class_MIS_SNP__.csv'),
        os.path.join(sf_calls_dir, clr, clr + '-SNP-class_SYN_SNP__.csv'),
        os.path.join(sf_calls_dir, clr, clr + '-SNP-class_LOF_SNP__.csv'), '|',
        'grep', row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep',
        str(row['POS'])
    ])
    res = func.runInShell(cmd)
    return not bool(res)
示例#2
0
def isPossibleDeNovo(row, clr):
    if len(row['REF']) == len(row['ALT']):
        v_t = 'snp'
    else:
        v_t = 'indels'
    cmd = ' '.join([
        'cat',
        os.path.join(sf_calls_dir, v_t, clr, row['ind_id']), '|', 'grep',
        row['CHROM'], '|', 'grep',
        str(row['POS'])
    ])
    res = func.runInShell(cmd)
    return not bool(res)
示例#3
0
def isDeNovo(row, clr):
    if len(row['REF']) == len(row['ALT']):
        v_t = 'snp'
        v_tt = 'SNP'
    else:
        v_t = 'indels'
        v_tt = 'INDEL'
    cmd = ' '.join([
        'cat',
        os.path.join(sf_calls_dir, v_t, clr, row['batch'],
                     row['ind_id'] + '-' + v_tt + '-class.csv'), '|', 'grep',
        row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep',
        str(row['POS'])
    ])
    res = func.runInShell(cmd)
    return not bool(res)
def whyNotDeNovo(row, clr):
    cmd = ' '.join([
        'cat',
        os.path.join(sf_calls_dir, clr, clr + '-SNP-class_ALL_SNP__.csv'), '|',
        'grep', row['ind_id'], '|', 'grep', row['CHROM'], '|', 'grep',
        str(row['POS'])
    ])
    res = func.runInShell(cmd, True)
    # print res
    if row['is_dn'] and not row['is_filt']:
        res = res.split('\n')[0]
        outp = res.split(',')[-len(sf_dn_c_columns):]
    else:
        outp = [None] * len(sf_dn_c_columns)

    ser_out = pandas.Series(outp, sf_dn_c_columns, dtype=str)
    # print ser_out
    # ser_out = ser_out[sf_dn_c_columns]
    return ser_out
示例#5
0
dnvo.reset_index(inplace=True)
if dnvo.empty:
    sys.exit('No mutations at score %s' % prob_cutoff)

tmp_dir = tempfile.mkdtemp()
print(tmp_dir)
input_file_bn = os.path.splitext(os.path.basename(input_file))[0]
outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv')
print(outp_tsv)
func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP)
# script_name = os.path.basename(os.path.realpath(sys.argv[0]))
script_name = os.path.abspath(pkg_resources.resource_filename('variants',
                                                              'vcf2table.sh'))
cmd = ' '.join([script_name,
                outp_tsv,
                os.path.dirname(script_name),
                input_file_bn,
                targ_bed])
print(cmd)
func.runInShell(cmd)
vn = summarizeVariants.summarizeMutations(
    os.path.join(tmp_dir,
                 input_file_bn +
                 '-ann-onePline.tsv'),
    input_file_bn,
    output_dir,
    config_file)
if rm_tmp == 'yes':
    cmd = 'rm -rf %s' % tmp_dir
    func.runInShell(cmd)
示例#6
0
genome_build = int(cfg['genome_build'])
vep_refseq = cfg['vep_refseq']
if genome_build == 19 or genome_build == 37:
    incl_make = '/mnt/xfs1/home/asalomatov/projects/pipeline/ppln/include.mk'
elif int(genome_build) == 38:
    incl_make = '/mnt/xfs1/home/asalomatov/projects/pipeline/ppln/include_hg38.mk'
else:
    sys.exit('Only builds 19, 37, 38 are supported')

tmp_dir = tempfile.mkdtemp()
print(tmp_dir)
input_file_bn = os.path.splitext(os.path.basename(input_file))[0]
input_lile_dir = os.path.dirname(input_file)
script_name = os.path.abspath(
    pkg_resources.resource_filename('variants', 'vcf2tablee.sh'))
script_dir = os.path.dirname(script_name)

cmd = """
vcfintersect -b %(targ_bed)s %(input_file)s > %(tmp_dir)s/%(input_file_bn)s.vcf
echo 'running VEP'
make -f %(script_dir)s/annVEP.mk INCLMK=%(incl_make)s VEPREFSEQ=%(vep_refseq)s PREFIX=%(input_file_bn)s SUFFIX=.vcf INDIR=%(tmp_dir)s OUTDIR=%(tmp_dir)s
"""

print(cmd % locals())

func.runInShell(cmd % locals())

if rm_tmp == 'yes':
    cmd = 'rm -rf %s' % tmp_dir
    func.runInShell(cmd)
示例#7
0
print(tmp_dir)
input_file_bn = os.path.splitext(os.path.basename(input_file))[0]
outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv')
print(outp_tsv)
func.writeTableAsVcf(dnvo, outp_tsv)
# script_name = os.path.basename(os.path.realpath(sys.argv[0]))
script_name = os.path.abspath(
    pkg_resources.resource_filename('variants', 'vcf2table.sh'))
#    'vcf2table_notarg.sh'))
cmd = ' '.join([
    script_name, outp_tsv,
    os.path.dirname(script_name), input_file_bn, targ_bed, incl_make,
    vep_refseq
])
print(cmd)
func.runInShell(cmd)
vn = summarizeOtherVariants.summarizeMutations(
    os.path.join(tmp_dir, input_file_bn + '-ann.vcf.onePline.tsv'),
    os.path.join(tmp_dir, input_file_bn + '-vep.tsv'), input_file_bn,
    output_dir, config_file)
if rm_tmp == 'yes':
    cmd = 'rm -rf %s' % tmp_dir
    func.runInShell(cmd)

sys.exit('stop')


def get_spID(x, lab2sp_dict):
    if x[:2] == 'SP':
        return x
    else:
示例#8
0
        sys.exit(1)
    model = os.path.join(model_dir, model[0])
    print(model)

m_pkl = joblib.load(model)
list_of_features = m_pkl['features']
# hardcode lvl, this is intended for external use only
lvl = 0
is_keras = bool(int(m_pkl['is_keras']))

if lvl == 0:
    m_pkl['extra_col_names'] = []
    m_pkl['y_name'] = []

# create output dirs
func.runInShell('mkdir -p ' + output_dir)
if known_vars:
    output_dir_known = os.path.join(output_dir, 'known')
    func.runInShell('mkdir -p ' + output_dir_known)

# populate ped DF
myped = ped.Ped(ped_file_extended, ['bam', 'vcf'])
f = features.Features(myped, known_vars)

# trio has to be complete with no files missing
if not f.initTrioFor(child_id):
    sys.stderr.write('\nfailed to initialize trio for ' + child_id)
    sys.exit(1)
else:
    sys.stdout.write('\ninitialized trio for ' + child_id)
    sys.stdout.write('\n')