Пример #1
0
    def standardize(self, snpreader):
        """
        make sure blocked standardize yields same result as regular standardize
        """

        for dtype in [sp.float64, sp.float32]:

            snps = snpreader.read(order='F',
                                  force_python_only=True,
                                  dtype=dtype).val
            self.assertEqual(dtype, snps.dtype)

            snp_s1 = Unit().standardize(snps.copy(), force_python_only=True)
            snp_s2 = Unit().standardize(snps.copy(),
                                        block_size=100,
                                        force_python_only=True)
            snps_F = np.array(snps, dtype=dtype, order="F")
            snp_s3 = Unit().standardize(snps_F)
            snps_C = np.array(snps, dtype=dtype, order="C")
            snp_s4 = Unit().standardize(snps_C)

            snp_beta1 = Beta(1, 25).standardize(snps.copy(),
                                                force_python_only=True)
            snps_F = np.array(snps, dtype=dtype, order="F")
            snp_beta2 = Beta(1, 25).standardize(snps_F)
            snps_C = np.array(snps, dtype=dtype, order="C")
            snp_beta3 = Beta(1, 25).standardize(snps_C)

            self.assertEqual(snp_s1.shape[0], snp_s2.shape[0])
            self.assertEqual(snp_s1.shape[1], snp_s2.shape[1])

            self.assertEqual(snp_s1.shape[0], snp_s3.shape[0])
            self.assertEqual(snp_s1.shape[1], snp_s3.shape[1])

            self.assertEqual(snp_s1.shape[0], snp_s4.shape[0])
            self.assertEqual(snp_s1.shape[1], snp_s4.shape[1])

            self.assertTrue(np.allclose(snp_s1, snp_s2, rtol=1e-05,
                                        atol=1e-05))
            self.assertTrue(np.allclose(snp_s1, snp_s3, rtol=1e-05,
                                        atol=1e-05))
            self.assertTrue(np.allclose(snp_s1, snp_s4, rtol=1e-05,
                                        atol=1e-05))

            self.assertEqual(snp_beta1.shape[0], snp_beta2.shape[0])
            self.assertEqual(snp_beta1.shape[1], snp_beta2.shape[1])
            self.assertEqual(snp_beta1.shape[0], snp_beta3.shape[0])
            self.assertEqual(snp_beta1.shape[1], snp_beta3.shape[1])

            self.assertTrue(
                np.allclose(snp_beta1, snp_beta2, rtol=1e-05, atol=1e-05))
            self.assertTrue(
                np.allclose(snp_beta1, snp_beta3, rtol=1e-05, atol=1e-05))
Пример #2
0
    def test_some_std(self):
        k0 = self.snpdata.read_kernel(standardizer=Unit()).val
        from pysnptools.kernelreader import SnpKernel
        k1 = self.snpdata.read_kernel(standardizer=Unit())
        np.testing.assert_array_almost_equal(k0, k1.val, decimal=10)

        from pysnptools.snpreader import SnpData
        snpdata2 = SnpData(iid=self.snpdata.iid,
                           sid=self.snpdata.sid,
                           pos=self.snpdata.pos,
                           val=np.array(self.snpdata.val))
        s = str(snpdata2)
        snpdata2.standardize()
        s = str(snpdata2)

        snpreader = Bed(self.currentFolder + "/examples/toydata",
                        count_A1=False)
        k2 = snpreader.read_kernel(standardizer=Unit(), block_size=500).val
        np.testing.assert_array_almost_equal(k0, k2, decimal=10)

        from pysnptools.standardizer.identity import Identity
        from pysnptools.standardizer.diag_K_to_N import DiagKtoN
        for dtype in [sp.float64, sp.float32]:
            for std in [Unit(), Beta(1, 25), Identity(), DiagKtoN()]:
                s = str(std)
                np.random.seed(0)
                x = np.array(np.random.randint(3, size=[60, 100]), dtype=dtype)
                x2 = x[:, ::2]
                x2b = np.array(x2)
                #LATER what's this about? It doesn't do non-contiguous?
                #assert not x2.flags['C_CONTIGUOUS'] and not x2.flags['F_CONTIGUOUS'] #set up to test non contiguous
                #assert x2b.flags['C_CONTIGUOUS'] or x2b.flags['F_CONTIGUOUS'] #set up to test non contiguous
                #a,b = std.standardize(x2b),std.standardize(x2)
                #np.testing.assert_array_almost_equal(a,b)
        logging.info("done")
Пример #3
0
    def factory_iterator():

        snp_reader_factory_bed = lambda: Bed("examples/toydata",
                                             count_A1=False)
        snp_reader_factory_snpmajor_hdf5 = lambda: SnpHdf5(
            "examples/toydata.snpmajor.snp.hdf5")
        snp_reader_factory_iidmajor_hdf5 = lambda: SnpHdf5(
            "examples/toydata.iidmajor.snp.hdf5")
        snp_reader_factory_dat = lambda: Dat("examples/toydata.dat")

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        snpreader0 = snp_reader_factory_bed()
        S_original = snpreader0.sid_count
        N_original = snpreader0.iid_count

        snps_to_read_count = min(S_original, 100)

        for iid_index_list in [
                list(range(N_original)),
                list(range(N_original / 2)),
                list(range(N_original - 1, 0, -2))
        ]:
            for snp_index_list in [
                    list(range(snps_to_read_count)),
                    list(range(snps_to_read_count / 2)),
                    list(range(snps_to_read_count - 1, 0, -2))
            ]:
                for standardizer in [Unit(), Beta(1, 25)]:
                    reference_snps, reference_dtype = NaNCNCTestCases(
                        iid_index_list, snp_index_list, standardizer,
                        snp_reader_factory_bed(), sp.float64, "C", "False",
                        None, None).read_and_standardize()
                    for snpreader_factory in [
                            snp_reader_factory_bed,
                            snp_reader_factory_snpmajor_hdf5,
                            snp_reader_factory_iidmajor_hdf5,
                            snp_reader_factory_dat
                    ]:
                        for dtype in [sp.float64, sp.float32]:
                            for order in ["C", "F"]:
                                for force_python_only in [False, True]:
                                    snpreader = snpreader_factory()
                                    test_case = NaNCNCTestCases(
                                        iid_index_list, snp_index_list,
                                        standardizer, snpreader, dtype, order,
                                        force_python_only, reference_snps,
                                        reference_dtype)
                                    yield test_case
        os.chdir(previous_wd)
Пример #4
0
def factory(s):
    s = s.capitalize()
    if s == "Unit" or s == "Unit()":
        return Unit()

    if s == "Identity" or s == "Identity()":
        return Identity()

    if s == "BySqrtSidCount" or s == "BySqrtSidCount()":
        return BySqrtSidCount()

    if s == "BySidCount" or s == "BySidCount()":
        return BySidCount()

    if s == "Beta":
        return Beta()

    if s.startswith("Beta("):
        standardizer = eval(s)
        return standardizer
Пример #5
0
snpreader = Bed("all.bed")
snpdata = snpreader.read()
snpdata = snpdata.standardize()  #In place AND returns self
print snpdata.val
#[[ 0.30156099  0.2481353  -0.50673344 ...,  0.92208184 -0.1266665   0.55601103]
# [ 0.30156099  0.2481353  -0.50673344 ...,  0.92208184 -1.5034763   0.55601103]
#...

# In one-line:
snpdata = Bed("all.bed").read().standardize()

# Beta standardization
from pysnptools.standardizer import Beta

snpdataB = Bed("all.bed").read().standardize(Beta(1, 25))
print snpdataB.val
#[[  7.40112054e-01   7.15532756e-01  -5.02003205e-04 ...,   4.40649336e-03   -1.13331663e-06   1.87525732e-01]
# [  7.40112054e-01   7.15532756e-01  -5.02003205e-04 ...,   4.40649336e-03   -1.34519756e-05   1.87525732e-01]
# ...

# To create an kernel (the relateness of each iid pair as the dot product of their standardized SNP values)
from pysnptools.standardizer import Unit

kerneldata = Bed("all.bed").read_kernel(standardizer=Unit())
print kerneldata.val
#array([[ 5081.6121922 ,   253.32922313,   165.9842232 , ...,  -130.76998392,  -298.66392286,  -287.66887036],
#       [  253.32922313,  5061.87849635,   384.04149913, ...,  -334.33599388,  -127.02308706,  -291.41483161]
#
#...