Пример #1
0
np.set_printoptions(linewidth=200, precision=5, suppress=True)
import pandas as pd;

pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = False
import seaborn as sns
import pylab as plt;
import matplotlib as mpl
import os;

home = os.path.expanduser('~') + '/'
import popgen.Util as utl
import popgen.Estimate as est
import popgen.Kyrgys.Utils as kutl
# kutl.scan1000('LCT')
# kutl.scan1000('EDAR')

print 'cd {} && grep -v "#" Kyrgyz_merged_all34_NoChr_filter1_info.vcf  | cut -f1,2 > Kyrgyz_merged_all34_NoChr_filter1_info.vcf.pos'.format(kutl.path+'data/')
reload(utl)
# utl.createAnnotation(kutl.path+'data/Kyrgyz_merged_all34_NoChr_filter1_info.vcf',db='hg38')
kutl.createMap()

def createCADD():
    " less 1000G_phase3_inclAnno.tsv.gz | cut -f1,2  > coord.hg19.tsv"
    ' bedtools intersect -sorted -a Kyrgyz.hg19.tsv -wb -b ../CADD/1000G_phase3_inclAnno.tsv > CADD.hg19.tsv '
    cad=pd.read_csv(kutl.path+'data/CADD.hg19.tsv',sep='\t',header=None).iloc[:,3:].rename(columns={3:'CHROM',4:'POS'}).sort_values(['CHROM','POS']).set_index('CHROM')
    coor=pd.read_pickle(kutl.path+'data/map.df').dropna().apply(lambda x: x.astype(int)).set_index(19,append=True)[38].rename('POShg38')
    pd.read_pickle(kutl.path+'data/map.df').isnull().sum()
    cad.iloc[:10000].groupby(level=0).apply(lambda x: pd.merge(coor.loc[str(x.name)].sort_index().reset_index(),x,left_on=19,right_on='POS').iloc[:,2:] )