Exemplo n.º 1
0
def loadData(bfile,
             extractSim,
             phenoFile,
             missingPhenotype='-9',
             loadSNPs=False,
             standardize=True):
    bed = Bed(bfile)

    if (extractSim is not None):
        f = open(extractSim)
        csvReader = csv.reader(f)
        extractSnpsSet = set([])
        for l in csvReader:
            extractSnpsSet.add(l[0])
        f.close()
        keepSnpsInds = [
            i for i in xrange(bed.sid.shape[0]) if bed.sid[i] in extractSnpsSet
        ]
        bed = bed[:, keepSnpsInds]

    phe = None
    if (phenoFile is not None):
        bed, phe = loadPheno(bed, phenoFile, missingPhenotype)

    if (loadSNPs):
        bed = bed.read()
        if (standardize): bed = bed.standardize()

    return bed, phe
Exemplo n.º 2
0
def load_data(snp_file, pheno_file, covar_file):
    # Load SNP data
    snp_reader = Bed(snp_file)

    # Load phenotype
    pheno = pysnptools.util.pheno.loadPhen(pheno_file)

    # Load covariates
    if covar_file is not None:
        covar = pysnptools.util.pheno.loadPhen(covar_file)
        snp_reader, pheno, covar = srutil.intersect_apply([snp_reader, pheno, covar])
        covar = covar['vals']
    else:
        snp_reader, pheno = srutil.intersect_apply([snp_reader, pheno])
        covar = None

    snp_data = snp_reader.read().standardize()
    Y = pheno['vals']
    Y -= Y.mean(0)
    Y /= Y.std(0)

    X = 1./np.sqrt((snp_data.val**2).sum() / float(snp_data.iid_count)) * snp_data.val
    K = np.dot(X, X.T) # TODO use symmetric dot to speed this up

    assert np.all(pheno['iid'] == snp_data.iid), "the samples are not sorted"

    return snp_data, pheno, covar, X, Y, K
Exemplo n.º 3
0
def loadData(bfile, extractSim, phenoFile, missingPhenotype='-9', loadSNPs=False, standardize=True):
	bed = Bed(bfile)
	
	if (extractSim is not None):
		f = open(extractSim)
		csvReader = csv.reader(f)
		extractSnpsSet = set([])
		for l in csvReader: extractSnpsSet.add(l[0])			
		f.close()		
		keepSnpsInds = [i for i in xrange(bed.sid.shape[0]) if bed.sid[i] in extractSnpsSet]		
		bed = bed[:, keepSnpsInds]
		
	phe = None
	if (phenoFile is not None):	bed, phe = loadPheno(bed, phenoFile, missingPhenotype)
	
	if (loadSNPs):
		bed = bed.read()
		if (standardize): bed = bed.standardize()	
	
	return bed, phe
Exemplo n.º 4
0
def getChromosome(bfile, chrom):
    bed = Bed(bfile)
    indsToKeep = (bed.pos[:, 0] == chrom)
    bed = bed[:, indsToKeep]
    return bed.read().standardize()
Exemplo n.º 5
0
def getChromosome(bfile, chrom):
	bed = Bed(bfile)
	indsToKeep = (bed.pos[:,0] == chrom)
	bed = bed[:, indsToKeep]	
	return bed.read().standardize()
Exemplo n.º 6
0
def getExcludedChromosome(bfile, chrom):
    bed = Bed(bfile)
    indsToKeep = bed.pos[:, 0] != chrom
    bed = bed[:, indsToKeep]
    return bed.read().standardize()
Exemplo n.º 7
0
def getExcludedChromosome(bfile, chrom):
    bed = Bed(bfile, count_A1=True)
    indsToKeep = (bed.pos[:, 0] != chrom)
    bed = bed[:, indsToKeep]
    return bed.read().standardize()