Пример #1
0
    def setUpClass(self):
        currentFolder = os.path.dirname(os.path.realpath(__file__))
        self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata"
        self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt"
        #self.cov_fn = currentFolder + "/examples/toydata.cov"

        # load data
        ###################################################################
        snp_reader = Bed(self.snp_fn)
        pheno = pstpheno.loadOnePhen(self.pheno_fn)
        #cov = pstpheno.loadPhen(self.cov_fn)

        # intersect sample ids
        snp_reader, pheno = pysnptools.util.intersect_apply(
            [snp_reader, pheno])

        self.G = snp_reader.read(order='C').val
        self.G = stdizer.Unit().standardize(self.G)
        self.G.flags.writeable = False
        self.y = pheno['vals'][:, 0]
        self.y.flags.writeable = False

        # load pcs
        #self.G_cov = cov['vals']
        self.G_cov = np.ones((len(self.y), 1))
        self.G_cov.flags.writeable = False
Пример #2
0
def main():
    """
    example that compares output to fastlmmc
    """

    # set up data
    phen_fn = "../feature_selection/examples/toydata.phe"
    snp_fn = "../feature_selection/examples/toydata.5chrom.bed"
    #chrom_count = 5

    # load data
    ###################################################################
    snp_reader = Bed(snp_fn)
    pheno = pstpheno.loadOnePhen(phen_fn)

    cov = None
    #cov = pstpheno.loadPhen(self.cov_fn)

    snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov])

    G = snp_reader.read(order='C').val
    G = stdizer.Unit().standardize(G)
    G.flags.writeable = False
    y = pheno['vals'][:, 0]
    y.flags.writeable

    # load pcs
    #G_pc = cov['vals']
    #G_pc.flags.writeable = False
    delta = 2.0
    gwas = WindowingGwas(G, y, delta=delta)
    pv = gwas.run_gwas()

    from fastlmm.association.tests.test_gwas import GwasTest
    REML = False
    snp_pos_sim = snp_reader.sid
    snp_pos_test = snp_reader.sid
    os.environ["FastLmmUseAnyMklLib"] = "1"
    gwas_c = GwasTest(snp_fn,
                      phen_fn,
                      snp_pos_sim,
                      snp_pos_test,
                      delta,
                      REML=REML,
                      excludeByPosition=0)
    gwas_c.run_gwas()

    import pylab
    pylab.plot(np.log(pv), np.log(gwas_c.p_values), "+")
    pylab.plot(np.arange(-18, 0), np.arange(-18, 0), "-k")
    pylab.show()

    np.testing.assert_array_almost_equal(np.log(pv),
                                         np.log(gwas_c.p_values),
                                         decimal=3)

    simple_manhattan_plot(pv)
Пример #3
0
def load_intersect(snp_reader, pheno_fn_or_none, snp_set=AllSnps()):
    """
    load SNPs and phenotype, intersect ids
    ----------------------------------------------------------------------
    Input:
    bed_reader : SnpReader object (e.g. BedReader)
    pheno_fn   : str, file name of phenotype file, defa
    ----------------------------------------------------------------------
    Output:
    G : numpy array containing SNP data
    y : numpy (1d) containing phenotype
    ----------------------------------------------------------------------
    """

    standardizer = stdizer.Unit()

    geno = snp_reader.read(order='C', snp_set=snp_set)
    G = geno['snps']
    G = standardizer.standardize(G)

    snp_names = geno['rs']
    chr_ids = geno['pos'][:, 0]

    if not pheno_fn_or_none is None:

        # load phenotype
        pheno = pstpheno.loadOnePhen(pheno_fn_or_none, 0)
        y = pheno['vals'][:, 0]

        # load covariates and intersect ids
        import warnings
        warnings.warn(
            "This intersect_ids is deprecated. Pysnptools includes newer versions of intersect_ids",
            DeprecationWarning)
        indarr = util.intersect_ids([pheno['iid'], snp_reader.original_iids])

        #print "warning: random phen"
        #y = np.random.random_sample(len(y))

        if not (indarr[:, 0] == indarr[:, 1]).all():
            assert False, "ERROR: this code assumes the same order for snp and phen file"

            print "reindexing"
            y = y[indarr[:, 0]]
            G = G[indarr[:, 1]]
    else:
        y = None

    return G, y, snp_names, chr_ids
Пример #4
0
def standardize(snps,
                blocksize=None,
                standardizer=stdizer.Unit(),
                force_python_only=False):
    '''Does in-place standardization.
            Will use C++ if possible (for single and double, unit and beta, order="F" and order="C")
    '''
    #!!warnings.warn("This standardizer is deprecated. Pysnptools includes newer versions of standardization", DeprecationWarning)
    if isinstance(standardizer, str):
        standardizer = standardizer.factor(standardizer)

    if blocksize is not None and blocksize >= snps.shape[
            1]:  #If blocksize is larger than the # of snps, set it to None
        blocksize = None

    return standardizer.standardize(snps,
                                    blocksize=blocksize,
                                    force_python_only=force_python_only)
Пример #5
0
def load_GWAS_data(genotype_file,
                   phenotype_file,
                   covariates_file=None,
                   exms=-1,
                   permute=True):
    import pysnptools.pysnptools.util.util
    from pysnptools.pysnptools.snpreader.bed import Bed
    import fastlmm.util.standardizer as stdizer
    import fastlmm.pyplink.plink as plink

    snp_reader = Bed(genotype_file)
    phenotype = plink.loadOnePhen(phenotype_file)
    if covariates_file is not None:
        covariates = plink.loadPhen(covariates_file)
    else:
        covariates = None
    snp_reader, phenotype, covariates = pysnptools.pysnptools.util.util.intersect_apply(
        [snp_reader, phenotype, covariates])

    if exms > 0:  #subset number individuals
        if permute:
            print snp_reader.iid_count
            perms = np.random.permutation(range(snp_reader.iid_count))
            np.savetxt('inds.txt', perms[0:exms])
            snp_reader = snp_reader[perms[0:exms], :]
        else:
            snp_reader = snp_reader[0:exms, :]
    #read the SNPs
    snp_data = snp_reader.read(order='C')

    X = snp_data.val
    if covariates is not None:
        X = np.hstack((covariates, X))
    stdizer.Unit().standardize(X)
    X.flags.writeable = False
    y = phenotype['vals'][:, 0]

    print "done reading"
    return (X, y)