示例#1
0
def lmem(args, start, stop, output):
    print "Processing SNPs", start, "-", stop, "..."
    print "Reading phenotypes and RRM..."

    Y = np.genfromtxt(iolib.ropen(args.phenotype),
                      missing_values="NA",
                      dtype=np.float,
                      skip_header=args.header)
    if len(Y.shape) == 1:
        tmp = np.empty((len(Y), 1), np.float)
        tmp[:, 0] = Y
        Y = tmp
    nsample, npheno = Y.shape

    genfile = iolib.genReader(args.genfile, nsample, args.linebuffer, start,
                              stop)
    outf = csv.writer(iolib.wopen(output), delimiter="\t")
    K = np.genfromtxt(iolib.ropen(args.kinship), dtype=np.float)
    print "K.shape =", K.shape
    print "Y.shape =", Y.shape
    print nsample, "samples and", npheno, "phenotypes"
    missing = []
    null_model = []

    print "Calculating rotation matrices..."
    for mpheno in range(npheno):
        missing.append(np.logical_not(np.isnan(Y[:, mpheno])))
        ii = missing[mpheno]

        null_model.append(lmem_func.get_delta(K[ii][:, ii], Y[ii, mpheno]))

    #    Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno]))
    print "done"

    print "Fitting LMMs..."
    time0 = time.time()
    processed = 0
    for rsid1, rsid2, pos, G in genfile:
        output = []
        nsnp = G.shape[1]

        for mpheno in range(npheno):
            ii = missing[mpheno]
            output.append(
                lmem_func.fitlmm(null_model[mpheno]['offset'],
                                 null_model[mpheno]['Ut'],
                                 null_model[mpheno]['denom'], G[ii],
                                 null_model[mpheno]['uty']))

        for i in range(nsnp):
            outf.writerow([rsid1[i], pos[i], rsid2[i]] + sum([[
                null_model[mpheno]['beta_null'], null_model[mpheno]
                ['sigma_g_null'], null_model[mpheno]['loglik_null']
            ] + output[mpheno][0][i].tolist() + [
                output[mpheno][1][i], output[mpheno][2][i], output[mpheno][3][i]
            ] for mpheno in range(npheno)], []))

        processed += nsnp
        print processed, "loci processed"
    print "Took", time.time() - time0, "seconds"
示例#2
0
文件: lmem.py 项目: jaredo/gwas
def lmem(args,start,stop,output):
    print "Processing SNPs",start,"-",stop,"..."
    print "Reading phenotypes and RRM..."

    Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header)
    if len(Y.shape)==1:
        tmp = np.empty((len(Y),1),np.float)
        tmp[:,0] = Y
        Y = tmp
    nsample,npheno = Y.shape

    genfile = iolib.genReader(args.genfile,nsample,args.linebuffer,start,stop)
    outf = csv.writer(iolib.wopen(output),delimiter="\t")
    K = np.genfromtxt(iolib.ropen(args.kinship),dtype=np.float)
    print "K.shape =",K.shape
    print "Y.shape =",Y.shape
    print nsample,"samples and",npheno,"phenotypes"
    missing = []
    null_model = []

    print "Calculating rotation matrices..."
    for mpheno in range(npheno):
        missing.append(np.logical_not(np.isnan(Y[:,mpheno])))
        ii = missing[mpheno]

        null_model.append(lmem_func.get_delta(K[ii][:,ii],Y[ii,mpheno]))

    #    Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno]))
    print "done"

    print "Fitting LMMs..." 
    time0 = time.time()
    processed = 0
    for rsid1,rsid2,pos,G in genfile:
        output = []
        nsnp = G.shape[1]

        for mpheno in range(npheno):    
            ii = missing[mpheno]
            output.append(lmem_func.fitlmm(null_model[mpheno]['offset'],null_model[mpheno]['Ut'],null_model[mpheno]['denom'],G[ii],null_model[mpheno]['uty']))

        for i in range(nsnp):
            outf.writerow([rsid1[i],pos[i],rsid2[i]]
                          +sum([[null_model[mpheno]['beta_null'],null_model[mpheno]['sigma_g_null'],null_model[mpheno]['loglik_null']] 
                                +output[mpheno][0][i].tolist()+[output[mpheno][1][i],output[mpheno][2][i],output[mpheno][3][i]] for mpheno in range(npheno)],[]))

        processed+=nsnp
        print processed,"loci processed"
    print "Took",time.time() - time0,"seconds"
示例#3
0
#!/usr/bin/python -O

import sys,time,os,glob,pickle,cPickle, csv,gzip,numpy as np,resource,argparse,string
import numpy as np
import iolib

parser = argparse.ArgumentParser(description='calculates the RRM on plink binary file set')
parser.add_argument('plinkfile', metavar='plinkfile', type=str, help='a binary plink file set')
parser.add_argument('-snps', metavar='snps', type=str,default='', help='list of snp ids to use for rrm calculation')
parser.add_argument('-out', metavar='out', type=str,default='', help='outfile')
parser.add_argument('--ibs', metavar='out', type=bool,default=False, help='use kinship coefficient rather than realised relationship')

args = parser.parse_args()

if args.out=='': outfile = csv.writer(iolib.wopen(args.plinkfile+".rrm.gz"),delimiter="\t")
else: outfile = csv.writer(iolib.wopen(args.out+".rrm.gz"),delimiter="\t")

if args.snps!='': 
    snps = iolib.scan(args.snps)
    print "Calculating RRM from subset of",len(snps),"SNPs"
else: snps = None


infile = iolib.plinkReader(args.plinkfile,snps=snps)

n = infile.nsample

rrm = np.zeros((n,n),np.float)
rrm_diag = np.zeros(n,np.float)

print "Calculating RRM..."
示例#4
0
        processed += nsnp
        print processed, "loci processed"
    print "Took", time.time() - time0, "seconds"


if __name__ == '__main__':
    # logger = mp.log_to_stderr()
    # logger.setLevel(logging.INFO)
    assert args.header >= 0
    iolib.checkfile(args.kinship)
    iolib.checkfile(args.genfile)

    if args.output[-3:] == ".gz": args.output = args.output[:-3]

    outf = csv.writer(iolib.wopen(args.output + ".gz"), delimiter="\t")

    nl = iolib.nlines(iolib.ropen(args.genfile))
    print nl, "SNPs"
    neach = int(math.ceil(nl / args.nprocess))
    chunks = range(0, nl, neach) + [nl]
    print chunks

    pool = mp.Pool(processes=args.nprocess)

    Y = np.genfromtxt(iolib.ropen(args.phenotype),
                      missing_values="NA",
                      dtype=np.float,
                      skip_header=args.header)

    if len(Y.shape) == 1:
示例#5
0
文件: lmem.py 项目: jaredo/gwas
        processed+=nsnp
        print processed,"loci processed"
    print "Took",time.time() - time0,"seconds"


if __name__ == '__main__':
    # logger = mp.log_to_stderr()
    # logger.setLevel(logging.INFO)
    assert args.header>=0
    iolib.checkfile(args.kinship)
    iolib.checkfile(args.genfile)

    if args.output[-3:]==".gz": args.output = args.output[:-3]

    outf = csv.writer(iolib.wopen(args.output+".gz"),delimiter="\t")

    nl = iolib.nlines(iolib.ropen(args.genfile))
    print nl,"SNPs"
    neach = int(math.ceil(nl/args.nprocess))
    chunks = range(0,nl,neach) + [nl]
    print chunks

    pool = mp.Pool(processes=args.nprocess)

    Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header)

    if len(Y.shape)==1:
        nsample = Y.shape[0]
        npheno = 1
    else: