示例#1
0
    def test_c_reader_pheno(self):
        snpdata1 = Pheno(self.currentFolder + "/examples/toydata.phe").read()

        self.assertEqual(np.float64, snpdata1.val.dtype)

        snpdata1.val[1,0] = np.NaN # Inject a missing value to test writing and reading missing values
        output = "tempdir/snpreader/toydata.phe"
        create_directory_if_necessary(output)
        Pheno.write(output, snpdata1)
        snpreader = Pheno(output)
        _fortesting_JustCheckExists().input(snpreader)
        s = str(snpreader)
        snpdata2 = snpreader.read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata2.val, decimal=10)

        snpdata1 = Pheno(self.currentFolder + "/examples/toydata.phe").read()
        import pysnptools.util.pheno as pstpheno
        dict = pstpheno.loadOnePhen(self.currentFolder + "/examples/toydata.phe",missing="")
        snpdata3 = Pheno(dict).read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata3.val, decimal=10)


        dict = pstpheno.loadOnePhen(self.currentFolder + "/examples/toydata.phe",missing="",vectorize=True)
        assert len(dict['vals'].shape)==1, "test 1-d array of values"
        snpdata3 = Pheno(dict).read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata3.val, decimal=10)

        snpdata4 = Pheno(None,iid_if_none=snpdata1.iid)
        assert (snpdata4.row == snpdata1.row).all() and snpdata4.col_count == 0

        snpdata5 = Pheno(self.currentFolder + "/examples/toydata.id.phe").read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata5.val, decimal=10)
        snpdata6 = Pheno(self.currentFolder + "/examples/toydata.fid.phe").read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata6.val, decimal=10)
示例#2
0
 def test_snp_dist2(self):
     logging.info("in test_snp_dist2")
     snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed",
                     count_A1=False)
     snp2dist = snpreader.as_dist(max_weight=2)
     s = str(snp2dist)
     _fortesting_JustCheckExists().input(snp2dist)
示例#3
0
文件: test.py 项目: hyacz/PySnpTools
    def test_c_reader_dat(self):
        snpreader = Dat(self.currentFolder + "/examples/toydata.dat")[:, ::100]
        _fortesting_JustCheckExists().input(snpreader)

        snpdata1 = snpreader.read()
        self.assertEqual(np.float64, snpdata1.val.dtype)
        self.assertTrue(
            np.allclose(self.snps[:, ::100],
                        snpdata1.val,
                        rtol=1e-05,
                        atol=1e-05))

        snpdata1.val[
            1,
            2] = np.NaN  # Inject a missing value to test writing and reading missing values
        output = "tempdir/snpreader/toydata.dat"
        create_directory_if_necessary(output)
        Dat.write(output, snpdata1)
        snpdata2 = Dat(output).read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata2.val,
                                             decimal=10)

        snpdata3 = snpdata1[:, 0:0].read()  #create snpdata with no sids
        output = "tempdir/snpreader/toydata3.dat"
        Dat.write(output, snpdata3)
        snpdata4 = Dat(output).read()
        assert snpdata3 == snpdata4
示例#4
0
 def test_dist_snp2(self):
     logging.info("in test_dist_snp2")
     distreader = DistNpz(self.currentFolder +
                          "/../examples/toydata.dist.npz")
     dist2snp = distreader.as_snp(max_weight=33)
     s = str(dist2snp)
     _fortesting_JustCheckExists().input(dist2snp)
示例#5
0
 def cmktest_writes(self):
     #===================================
     #    Defining sub functions
     #===================================
     def _oned_int(c):
         return range(c)
     def _oned_str(c):
         return [str(i).encode('ascii') for i in range(c)]
     def _twooned_int(c):
         return [[i] for i in range(c)]
     def _twooned_str(c):
         return [[str(i).encode('ascii')] for i in range(c)]
     def _twotwod_int(c):
         return [[i,i] for i in range(c)]
     def _twotwod_str(c):
         return [[str(i).encode('ascii'),b"hello"] for i in range(c)]
     def _none(c):
         return None
     def _zero(c):
         return np.empty([c,0])
     #===================================
     #    Staring main function
     #===================================
     logging.info("starting 'test_writes'")
     np.random.seed(0)
     output_template = "tempdir/pstreader/writes.{0}.{1}"
     create_directory_if_necessary(output_template.format(0,"npz"))
     i = 0
     for row_count in [5,2,1,0]:
         for col_count in [4,2,1,0]:
             val = np.random.normal(.5,2,size=(row_count,col_count))
             for row_or_col_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str]:
                 row = row_or_col_gen(row_count)
                 col = row_or_col_gen(col_count)
                 for prop_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str,_none,_zero]:
                     row_prop = prop_gen(row_count)
                     col_prop = prop_gen(col_count)
                     pstdata = PstData(row,col,val,row_prop,col_prop,str(i))
                     for the_class,suffix in [(PstNpz,"npz"),(PstHdf5,"hdf5")]:
                         filename = output_template.format(i,suffix)
                         logging.info(filename)
                         i += 1
                         the_class.write(filename,pstdata)
                         for subsetter in [None, sp.s_[::2,::3]]:
                             reader = the_class(filename)
                             _fortesting_JustCheckExists().input(reader)
                             subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]]
                             readdata = subreader.read(order='C')
                             expected = pstdata if subsetter is None else pstdata[subsetter[0],subsetter[1]].read()
                             assert np.array_equal(readdata.val,expected.val)
                             assert np.array_equal(readdata.row,expected.row)
                             assert np.array_equal(readdata.col,expected.col)
                             assert np.array_equal(readdata.row_property,expected.row_property)
                             assert np.array_equal(readdata.col_property,expected.col_property)
                         try:
                             os.remove(filename)
                         except:
                             pass
     logging.info("done with 'test_writes'")
示例#6
0
 def test_writes(self):
     #===================================
     #    Defining sub functions
     #===================================
     def _oned_int(c):
         return list(range(c))
     def _oned_str(c):
         return [str(i) for i in range(c)]
     def _twooned_int(c):
         return [[i] for i in range(c)]
     def _twooned_str(c):
         return [[str(i)] for i in range(c)]
     def _twotwod_int(c):
         return [[i,i] for i in range(c)]
     def _twotwod_str(c):
         return [[str(i),"hello"] for i in range(c)]
     def _none(c):
         return None
     def _zero(c):
         return np.empty([c,0])
     #===================================
     #    Staring main function
     #===================================
     logging.info("starting 'test_writes'")
     np.random.seed(0)
     output_template = "tempdir/pstreader/writes.{0}.{1}"
     create_directory_if_necessary(output_template.format(0,"npz"))
     i = 0
     for row_count in [5,2,1,0]:
         for col_count in [4,2,1,0]:
             val = np.random.normal(.5,2,size=(row_count,col_count))
             for row_or_col_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str]:
                 row = row_or_col_gen(row_count)
                 col = row_or_col_gen(col_count)
                 for prop_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str,_none,_zero]:
                     row_prop = prop_gen(row_count)
                     col_prop = prop_gen(col_count)
                     pstdata = PstData(row,col,val,row_prop,col_prop,str(i))
                     for the_class,suffix in [(PstNpz,"npz"),(PstHdf5,"hdf5")]:
                         filename = output_template.format(i,suffix)
                         logging.info(filename)
                         i += 1
                         the_class.write(filename,pstdata)
                         for subsetter in [None, sp.s_[::2,::3]]:
                             reader = the_class(filename)
                             _fortesting_JustCheckExists().input(reader)
                             subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]]
                             readdata = subreader.read(order='C')
                             expected = pstdata if subsetter is None else pstdata[subsetter[0],subsetter[1]].read()
                             assert np.array_equal(readdata.val,expected.val)
                             assert np.array_equal(readdata.row,expected.row)
                             assert np.array_equal(readdata.col,expected.col)
                             assert np.array_equal(readdata.row_property,expected.row_property)
                             assert np.array_equal(readdata.col_property,expected.col_property)
                         try:
                             os.remove(filename)
                         except:
                             pass
     logging.info("done with 'test_writes'")
示例#7
0
 def test_c_reader_dense(self):
     snpdata1 = self.snpdata[:,::100].read()
     snpdata1.val[1,2] = np.NaN # Inject a missing value to test writing and reading missing values
     output = "tempdir/snpreader/toydata.dense.txt"
     create_directory_if_necessary(output)
     Dense.write(output, snpdata1)
     snpreader = Dense(output)
     _fortesting_JustCheckExists().input(snpreader)
     snpdata2 = snpreader.read()
     np.testing.assert_array_almost_equal(snpdata1.val, snpdata2.val, decimal=10)
示例#8
0
文件: test.py 项目: hyacz/PySnpTools
    def test_c_reader_pheno(self):
        snpdata1 = Pheno(self.currentFolder + "/examples/toydata.phe").read()

        self.assertEqual(np.float64, snpdata1.val.dtype)

        snpdata1.val[
            1,
            0] = np.NaN  # Inject a missing value to test writing and reading missing values
        output = "tempdir/snpreader/toydata.phe"
        create_directory_if_necessary(output)
        Pheno.write(output, snpdata1)
        snpreader = Pheno(output)
        _fortesting_JustCheckExists().input(snpreader)
        s = str(snpreader)
        snpdata2 = snpreader.read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata2.val,
                                             decimal=10)

        snpdata1 = Pheno(self.currentFolder + "/examples/toydata.phe").read()
        import pysnptools.util.pheno as pstpheno
        dict = pstpheno.loadOnePhen(self.currentFolder +
                                    "/examples/toydata.phe",
                                    missing="")
        snpdata3 = Pheno(dict).read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata3.val,
                                             decimal=10)

        dict = pstpheno.loadOnePhen(self.currentFolder +
                                    "/examples/toydata.phe",
                                    missing="",
                                    vectorize=True)
        assert len(dict['vals'].shape) == 1, "test 1-d array of values"
        snpdata3 = Pheno(dict).read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata3.val,
                                             decimal=10)

        snpdata4 = Pheno(None, iid_if_none=snpdata1.iid)
        assert (snpdata4.row == snpdata1.row).all() and snpdata4.col_count == 0

        snpdata5 = Pheno(self.currentFolder +
                         "/examples/toydata.id.phe").read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata5.val,
                                             decimal=10)
        snpdata6 = Pheno(self.currentFolder +
                         "/examples/toydata.fid.phe").read()
        np.testing.assert_array_almost_equal(snpdata1.val,
                                             snpdata6.val,
                                             decimal=10)
示例#9
0
文件: test.py 项目: hyacz/PySnpTools
 def test_c_reader_dense(self):
     snpdata1 = self.snpdata[:, ::100].read()
     snpdata1.val[
         1,
         2] = np.NaN  # Inject a missing value to test writing and reading missing values
     output = "tempdir/snpreader/toydata.dense.txt"
     create_directory_if_necessary(output)
     Dense.write(output, snpdata1)
     snpreader = Dense(output)
     _fortesting_JustCheckExists().input(snpreader)
     s = str(snpreader)
     snpdata2 = snpreader.read()
     np.testing.assert_array_almost_equal(snpdata1.val,
                                          snpdata2.val,
                                          decimal=10)
示例#10
0
    def test_read(self):
        np.random.seed(0)
        row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
        col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
        pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                          col=np.array([["A","a"],["B","b"]]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([np.array([3.0,4])])[0] == 1
        assert pstdata.col_to_index([np.array(["A","a"])])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,col_property[:2])


        pstdata2 = pstdata[:2,:2].read()
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(pstdata)
        _fortesting_JustCheckExists().input(pstdata2)

        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata3 = pstdata[[],:].read()
        assert pstdata3.val.shape[0] == 0 and pstdata3.val.shape[1]==2
        pstdata.val = pstdata.val.copy(order='F')
        pstdata2 = pstdata[:2,:2].read()
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='A')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order=None)
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata4 = pstdata[::,::].read(force_python_only=True)
        np.testing.assert_array_almost_equal(pstdata4.val, pstdata.val, decimal=10)


        logging.info("done with test")
示例#11
0
    def test_read(self):
        np.random.seed(0)
        row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
        col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
        pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                          col=np.array([["A","a"],["B","b"]]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([np.array([3.0,4])])[0] == 1
        assert pstdata.col_to_index([np.array(["A","a"])])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,col_property[:2])


        pstdata2 = pstdata[:2,:2].read()
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(pstdata)
        _fortesting_JustCheckExists().input(pstdata2)

        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata3 = pstdata[[],:].read()
        assert pstdata3.val.shape[0] == 0 and pstdata3.val.shape[1]==2
        pstdata.val = pstdata.val.copy(order='F')
        pstdata2 = pstdata[:2,:2].read()
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='A')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order=None)
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata4 = pstdata[::,::].read(force_python_only=True)
        np.testing.assert_array_almost_equal(pstdata4.val, pstdata.val, decimal=10)


        logging.info("done with test")
示例#12
0
    def test_c_reader_ped(self):
        if False: #Too slow for routine testing
            snpdata1 = Ped(self.currentFolder + "/examples/toydata.ped")[::25,::1000].read()
            self.assertEqual(np.float64, snpdata1.val.dtype)
            TestPySnpTools.assert_match_012_210(self.snpdata[::25,::1000].read(),snpdata1)
        else:
            snpdata1 = self.snpdata[::25,::1000].read()

        output = "tempdir/snpreader/toydata.ped"
        create_directory_if_necessary(output)

        snpdata1.val[1,2] = np.NaN # Inject a missing value to test writing and reading missing values
        Ped.write(output, snpdata1)
        snpreader = Ped(output)
        _fortesting_JustCheckExists().input(snpreader)
        s = str(snpreader)
        snpdata2 = snpreader.read()
        TestPySnpTools.assert_match_012_210(snpdata1,snpdata2)
示例#13
0
    def test_write_distnpz_f64cpp_5(self):
        distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")

        _fortesting_JustCheckExists().input(distreader)

        iid_index = 5
        logging.info("iid={0}".format(iid_index))
        #if distreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
        #    distreader = distreader[0:-1,:]
        #assert distreader.iid_count % 4 != 0
        distdata = distreader[0:iid_index,:].read(order='F',dtype=np.float64)
        if distdata.iid_count > 0:
            distdata.val[-1,0] = float("NAN")
        output = "tempdir/toydata.F64cpp.{0}.dist.npz".format(iid_index)
        create_directory_if_necessary(output)
        DistNpz.write(output, distdata ) #,force_python_only=True)
        snpdata2 = DistNpz(output).read()
        np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)
示例#14
0
    def test_write_bed_f64cpp_5(self):
        snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)

        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(snpreader)

        iid_index = 5
        logging.info("iid={0}".format(iid_index))
        #if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
        #    snpreader = snpreader[0:-1,:]
        #assert snpreader.iid_count % 4 != 0
        snpdata = snpreader[0:iid_index,:].read(order='F',dtype=np.float64)
        if snpdata.iid_count > 0:
            snpdata.val[-1,0] = float("NAN")
        output = "tempdir/toydata.F64cpp.{0}".format(iid_index)
        create_directory_if_necessary(output)
        Bed.write(output, snpdata ,count_A1=False) #,force_python_only=True)
        snpdata2 = Bed(output,count_A1=False).read()
        np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
示例#15
0
    def test_c_reader_dat(self):
        snpreader = Dat(self.currentFolder + "/examples/toydata.dat")[:,::100]
        _fortesting_JustCheckExists().input(snpreader)

        snpdata1 = snpreader.read()
        self.assertEqual(np.float64, snpdata1.val.dtype)
        self.assertTrue(np.allclose(self.snps[:,::100], snpdata1.val, rtol=1e-05, atol=1e-05))

        snpdata1.val[1,2] = np.NaN # Inject a missing value to test writing and reading missing values
        output = "tempdir/snpreader/toydata.dat"
        create_directory_if_necessary(output)
        Dat.write(output,snpdata1)
        snpdata2 = Dat(output).read()
        np.testing.assert_array_almost_equal(snpdata1.val, snpdata2.val, decimal=10)

        snpdata3 = snpdata1[:,0:0].read() #create snpdata with no sids
        output = "tempdir/snpreader/toydata3.dat"
        Dat.write(output,snpdata3)
        snpdata4 = Dat(output).read()
        assert snpdata3 == snpdata4
示例#16
0
文件: test.py 项目: hyacz/PySnpTools
    def test_c_reader_ped(self):
        if False:  #Too slow for routine testing
            snpdata1 = Ped(self.currentFolder +
                           "/examples/toydata.ped")[::25, ::1000].read()
            self.assertEqual(np.float64, snpdata1.val.dtype)
            TestLoader.assert_match_012_210(self.snpdata[::25, ::1000].read(),
                                            snpdata1)
        else:
            snpdata1 = self.snpdata[::25, ::1000].read()

        output = "tempdir/snpreader/toydata.ped"
        create_directory_if_necessary(output)

        snpdata1.val[
            1,
            2] = np.NaN  # Inject a missing value to test writing and reading missing values
        Ped.write(output, snpdata1)
        snpreader = Ped(output)
        _fortesting_JustCheckExists().input(snpreader)
        s = str(snpreader)
        snpdata2 = snpreader.read()
        TestLoader.assert_match_012_210(snpdata1, snpdata2)
示例#17
0
文件: test.py 项目: hyacz/PySnpTools
    def test_write_bed_f64cpp_5(self):
        snpreader = Bed(self.currentFolder + "/examples/toydata",
                        count_A1=False)

        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(snpreader)

        iid_index = 5
        logging.info("iid={0}".format(iid_index))
        #if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
        #    snpreader = snpreader[0:-1,:]
        #assert snpreader.iid_count % 4 != 0
        snpdata = snpreader[0:iid_index, :].read(order='F', dtype=np.float64)
        if snpdata.iid_count > 0:
            snpdata.val[-1, 0] = float("NAN")
        output = "tempdir/toydata.F64cpp.{0}".format(iid_index)
        create_directory_if_necessary(output)
        Bed.write(output, snpdata, count_A1=False)  #,force_python_only=True)
        snpdata2 = Bed(output, count_A1=False).read()
        np.testing.assert_array_almost_equal(snpdata.val,
                                             snpdata2.val,
                                             decimal=10)
示例#18
0
    def test_writes(self):
        from pysnptools.distreader import DistData, DistHdf5, DistNpz, DistMemMap, Bgen
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists

        the_class_and_suffix_list = [(DistNpz,"npz",None,None),
                                     (Bgen,"bgen",None,lambda filename,distdata: Bgen.write(filename,distdata,bits=32)),
                                     (DistHdf5,"hdf5",None,None),
                                     (DistMemMap,"memmap",None,None)]
        cant_do_col_prop_none_set = {'bgen'}
        cant_do_col_len_0_set = {'bgen'}
        cant_do_row_count_zero_set = {'bgen'}
        can_swap_0_2_set = {}
        can_change_col_names_set = {}
        ignore_fam_id_set = {}
        ignore_pos1_set = {'bgen'}
        ignore_pos_set = {}
        erase_any_write_dir = {}

        
        #===================================
        #    Starting main function
        #===================================
        logging.info("starting 'test_writes'")
        np.random.seed(0)
        output_template = "tempdir/distreader/writes.{0}.{1}"
        create_directory_if_necessary(output_template.format(0,"npz"))
        i = 0
        for row_count in [0,5,2,1]:
            for col_count in [4,2,1,0]:
                val=np.random.random(size=[row_count,col_count,3])
                val /= val.sum(axis=2,keepdims=True)  #make probabilities sum to 1

                val[val==3]=np.NaN
                row = [('0','0'),('1','1'),('2','2'),('3','3'),('4','4')][:row_count]
                col = ['s0','s1','s2','s3','s4'][:col_count]
                for is_none in [True,False]:
                    row_prop = None
                    col_prop = None if is_none else [(x,x,x) for x in range(5)][:col_count]
                    distdata = DistData(iid=row,sid=col,val=val,pos=col_prop,name=str(i))
                    for the_class,suffix,constructor,writer in the_class_and_suffix_list:
                        constructor = constructor or (lambda filename: the_class(filename))
                        writer = writer or (lambda filename,distdata: the_class.write(filename,distdata))
                        if col_count == 0 and suffix in cant_do_col_len_0_set:
                            continue
                        if col_prop is None and suffix in cant_do_col_prop_none_set:
                            continue
                        if row_count==0 and suffix in cant_do_row_count_zero_set:
                            continue
                        filename = output_template.format(i,suffix)
                        logging.info(filename)
                        i += 1
                        if suffix in erase_any_write_dir and os.path.exists(filename):
                            shutil.rmtree(filename)
                        ret = writer(filename,distdata)
                        assert ret is not None
                        for subsetter in [None, np.s_[::2,::3]]:
                            reader = constructor(filename)
                            _fortesting_JustCheckExists().input(reader)
                            subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]]
                            readdata = subreader.read(order='C')
                            expected = distdata if subsetter is None else distdata[subsetter[0],subsetter[1]].read()
                            if not suffix in can_swap_0_2_set:
                                assert np.allclose(readdata.val,expected.val,equal_nan=True)
                            else:
                                for col_index in range(readdata.col_count):
                                    assert (np.allclose(readdata.val[:,col_index],expected.val[:,col_index],equal_nan=True) or
                                            np.allclose(readdata.val[:,col_index]*-1+2,expected.val[:,col_index],equal_nan=True))
                            if not suffix in ignore_fam_id_set:
                                assert np.array_equal(readdata.row,expected.row)
                            else:
                                assert np.array_equal(readdata.row[:,1],expected.row[:,1])
                            if not suffix in can_change_col_names_set:
                                assert np.array_equal(readdata.col,expected.col)
                            else:
                                assert readdata.col_count==expected.col_count
                            assert np.array_equal(readdata.row_property,expected.row_property) or (readdata.row_property.shape[1]==0 and expected.row_property.shape[1]==0)

                            if suffix in ignore_pos1_set:
                                assert np.allclose(readdata.col_property[:,[0,2]],expected.col_property[:,[0,2]],equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0)
                            elif not suffix in ignore_pos_set:
                                assert np.allclose(readdata.col_property,expected.col_property,equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0)
                            else:
                                assert len(readdata.col_property)==len(expected.col_property)
                        try:
                            os.remove(filename)
                        except:
                            pass
        logging.info("done with 'test_writes'")
示例#19
0
    def test_writes(self):
        #===================================
        #    Defining sub functions
        #===================================
        def _oned_int(c):
            return range(c)

        def _oned_str(c):
            return [str(i).encode('ascii') for i in range(c)]

        def _twooned_int(c):
            return [[i] for i in range(c)]

        def _twooned_str(c):
            return [[str(i).encode('ascii')] for i in range(c)]

        def _twotwod_int(c):
            return [[i, i] for i in range(c)]

        def _twotwod_str(c):
            return [[str(i).encode('ascii'), b"hello"] for i in range(c)]

        #def _twotwod_U(c):
        #    return [[str(i).encode('UTF-8'),u"hello"] for i in range(c)]
        def _none(c):
            return None

        def _zero(c):
            return np.empty([c, 0], dtype='S')

        #===================================
        #    Starting main function
        #===================================
        logging.info("starting 'test_writes'")
        np.random.seed(0)
        temp_dir = tempfile.TemporaryDirectory("pstreader")
        output_template = temp_dir.name + '/writes.{0}.{1}'
        i = 0
        for row_count in [5, 2, 1, 0]:
            for col_count in [4, 2, 1, 0]:
                for val_shape in [3, None, 1]:
                    val = np.random.normal(.5, 2, size=(
                        row_count,
                        col_count)) if val_shape is None else np.random.normal(
                            .5, 2, size=(row_count, col_count, val_shape))
                    for row_or_col_gen in [
                            _oned_int, _oned_str, _twooned_int, _twooned_str,
                            _twotwod_int, _twotwod_str
                    ]:  #!!!,_twotwod_U can't roundtrop Unicode in hdf5
                        row = row_or_col_gen(row_count)
                        col = row_or_col_gen(col_count)
                        for prop_gen in [
                                _none, _oned_str, _oned_int, _twooned_int,
                                _twooned_str, _twotwod_int, _twotwod_str, _zero
                        ]:  #!!!_twotwod_U can't round trip Unicode because Hdf5 doesn't like it.
                            row_prop = prop_gen(row_count)
                            col_prop = prop_gen(col_count)
                            pstdata = PstData(row, col, val, row_prop,
                                              col_prop, str(i))
                            for the_class, suffix in [(PstMemMap, "memmap"),
                                                      (PstHdf5, "hdf5"),
                                                      (PstNpz, "npz")]:
                                filename = output_template.format(i, suffix)
                                logging.info(filename)
                                i += 1
                                the_class.write(filename, pstdata)
                                reader = the_class(
                                    filename
                                ) if suffix != 'hdf5' else the_class(
                                    filename, block_size=3)
                                _fortesting_JustCheckExists().input(reader)
                                for subsetter in [None, np.s_[::2, ::3]]:
                                    subreader = reader if subsetter is None else reader[
                                        subsetter[0], subsetter[1]]
                                    expected = pstdata if subsetter is None else pstdata[
                                        subsetter[0], subsetter[1]].read()
                                    for order in ['C', 'F', 'A']:
                                        for force_python_only in [True, False]:
                                            readdata = subreader.read(
                                                order=order,
                                                force_python_only=
                                                force_python_only)
                                            assert np.array_equal(
                                                readdata.val, expected.val)
                                            assert np.array_equal(
                                                readdata.row, expected.row)
                                            assert np.array_equal(
                                                readdata.col, expected.col)
                                            assert np.array_equal(
                                                readdata.row_property,
                                                expected.row_property
                                            ) or (
                                                readdata.row_property.shape[1]
                                                == 0 and
                                                expected.row_property.shape[1]
                                                == 0)
                                            assert np.array_equal(
                                                readdata.col_property,
                                                expected.col_property
                                            ) or (
                                                readdata.col_property.shape[1]
                                                == 0 and
                                                expected.col_property.shape[1]
                                                == 0)
                                if suffix in {'memmap', 'hdf5'}:
                                    reader.flush()
                                os.remove(filename)
        temp_dir.cleanup()
        logging.info("done with 'test_writes'")