Exemplo n.º 1
0
    def __init__(self, iid, test=None):  #!!! add docs and test for test
        super(Identity, self).__init__()

        if test is None:
            test = iid

        if test is iid:
            iid = PstData._fixup_input(
                iid,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')
            test = iid
        else:
            iid = PstData._fixup_input(
                iid,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')
            test = PstData._fixup_input(
                test,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')

        if len(iid) > 0:
            self._row0 = iid
        else:
            self._row0 = self._empty

        if len(test) > 0:
            self._row1 = test
        else:
            self._row1 = self._empty
Exemplo n.º 2
0
    def __init__(self,
                 filename,
                 count_A1=None,
                 iid=None,
                 sid=None,
                 pos=None,
                 skip_format_check=False
                 ):  #!!!document these new optionals. they are here
        super(Bed, self).__init__()

        self._ran_once = False
        self._file_pointer = None

        self.filename = filename
        if count_A1 is None:
            warnings.warn(
                "'count_A1' was not set. For now it will default to 'False', but in the future it will default to 'True'",
                FutureWarning)
            count_A1 = False
        self.count_A1 = count_A1
        self.skip_format_check = skip_format_check
        if iid is not None:
            self._row = PstData._fixup_input(
                iid, empty_creator=lambda ignore: np.empty([0, 2], dtype=str))
        if sid is not None:
            self._col = PstData._fixup_input(
                sid, empty_creator=lambda ignore: np.empty([0], dtype=str))
        if pos is not None:
            self._col_property = PstData._fixup_input(
                pos,
                count=len(self._col),
                empty_creator=lambda count: np.array([[np.nan, np.nan, np.nan]]
                                                     * count))
Exemplo n.º 3
0
 def cmktest_repr_test(self):
     np.random.seed(0)
     row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
     col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
     pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                       col=np.array([("A","a"),("B","b")]),
                       val = np.random.normal(.5,2,size=(3,2)),
                       row_property=row_property,
                       col_property=col_property)
     assert pstdata.col_to_index([("B","b")])[0] == 1
     s = str(pstdata)
Exemplo n.º 4
0
 def _run_once(self):
     if (self._ran_once):
         return
     row, col, val, row_property, col_property = self._run_once_inner()
     PstData.__init__(self,
                      row,
                      col,
                      val,
                      row_property,
                      col_property,
                      name="np.memmap('{0}')".format(self._filename))
Exemplo n.º 5
0
 def __init__(self, iid, sid, val, pos=None, name=None, parent_string=None, copyinputs_function=None):
     if parent_string is not None:
         warnings.warn("'parent_string' is deprecated. Use 'name'", DeprecationWarning)
     self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype=str))
     self._col = PstData._fixup_input(sid,empty_creator=lambda ignore:np.empty([0],dtype=str))
     self._row_property = PstData._fixup_input(None,count=len(self._row),empty_creator=lambda count:np.empty([count,0],dtype=str))
     self._col_property = PstData._fixup_input(pos,count=len(self._col),empty_creator=lambda count:np.array([[np.nan, np.nan, np.nan]]*count))
     self.val = PstData._fixup_input_val(val,row_count=len(self._row),col_count=len(self._col),empty_creator=lambda row_count,col_count:np.empty([row_count,col_count],dtype=np.float64))
     self._assert_iid_sid_pos()
     self._name = name or parent_string or ""
     self._std_string_list = []
Exemplo n.º 6
0
 def test_repr_test(self):
     np.random.seed(0)
     row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
     col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
     pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                       col=np.array([("A","a"),("B","b")]),
                       val = np.random.normal(.5,2,size=(3,2)),
                       row_property=row_property,
                       col_property=col_property)
     assert pstdata.col_to_index([("B","b")])[0] == 1
     s = str(pstdata)
Exemplo n.º 7
0
    def __init__(self, iid, sid, val, pos=None, name=None, copyinputs_function=None):

        #We don't have a 'super(DistData, self).__init__()' here because DistData takes full responsibility for initializing both its superclasses

        self._val = None

        self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype='str'),dtype='str')
        self._col = PstData._fixup_input(sid,empty_creator=lambda ignore:np.empty([0],dtype='str'),dtype='str')
        self._row_property = PstData._fixup_input(None,count=len(self._row),empty_creator=lambda count:np.empty([count,0],dtype='str'),dtype='str')
        self._col_property = PstData._fixup_input(pos,count=len(self._col),empty_creator=lambda count:np.full([count, 3], np.nan))
        self._val = PstData._fixup_input_val(val,row_count=len(self._row),col_count=len(self._col),empty_creator=lambda row_count,col_count:np.empty([row_count,col_count,3],dtype=np.float64))#!!!Replace empty with my FillNA method?
        self._assert_iid_sid_pos(check_val=True)
        self._name = name or ""
        self._std_string_list = []
Exemplo n.º 8
0
    def test_inputs3(self):
        from pysnptools.pstreader import PstData
        np.random.seed(0)
        row_property=None
        col_property=None
        pstdata = PstData(row=[[1.0,2.0],[3,4],[6,7]],
                          col=np.array([]),
                          val = [[],[],[]],
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([[3,4]])[0] == 1
        assert np.array_equal(pstdata[1:,:2].row_property,pstdata.row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,pstdata.col_property[:2])
        logging.info("done with test")
Exemplo n.º 9
0
    def __init__(self, iid, sid, val, pos=None, name=None, parent_string=None, copyinputs_function=None):

        #We don't have a 'super(SnpData, self).__init__()' here because SnpData takes full responsiblity for initializing both its superclasses

        self.val = None

        if parent_string is not None:
            warnings.warn("'parent_string' is deprecated. Use 'name'", DeprecationWarning)
        self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype=str))
        self._col = PstData._fixup_input(sid,empty_creator=lambda ignore:np.empty([0],dtype=str))
        self._row_property = PstData._fixup_input(None,count=len(self._row),empty_creator=lambda count:np.empty([count,0],dtype=str))
        self._col_property = PstData._fixup_input(pos,count=len(self._col),empty_creator=lambda count:np.array([[np.nan, np.nan, np.nan]]*count))
        self.val = PstData._fixup_input_val(val,row_count=len(self._row),col_count=len(self._col),empty_creator=lambda row_count,col_count:np.empty([row_count,col_count],dtype=np.float64))
        self._assert_iid_sid_pos()
        self._name = name or parent_string or ""
        self._std_string_list = []
Exemplo n.º 10
0
    def assert_approx_equal(distdata0, distdata1, atol):
        from pysnptools.pstreader import PstData

        assert PstData._allclose(distdata0.row, distdata1.row, equal_nan=True)
        assert PstData._allclose(distdata0.col, distdata1.col, equal_nan=True)
        assert PstData._allclose(distdata0.row_property,
                                 distdata1.row_property,
                                 equal_nan=True)
        assert PstData._allclose(distdata0.col_property,
                                 distdata1.col_property,
                                 equal_nan=True)
        np.testing.assert_allclose(distdata0.val,
                                   distdata1.val,
                                   atol=atol,
                                   equal_nan=True,
                                   verbose=True)
Exemplo n.º 11
0
 def test_every_read(self):
     for order_from in ['F', 'C']:
         for order_to in ['F', 'C']:
             for dtype_from in [np.float32, np.float64]:
                 for dtype_to in [np.float32, np.float64]:
                     for val_shape in [None, 1, 3]:
                         for force_python_only in [True, False]:
                             np.random.seed(0)
                             val0 = np.random.normal(.5, 2, size=(
                                 3, 2
                             )) if val_shape is None else np.random.normal(
                                 .5, 2, size=(3, 2, val_shape))
                             val = np.array(val0,
                                            order=order_from,
                                            dtype=dtype_from)
                             pstdata = PstData(val=val,
                                               row=list(range(3)),
                                               col=list(range(2)))
                             expected = np.array(val[::-2, :][:, ::-1],
                                                 order=order_to,
                                                 dtype=dtype_to)
                             result = pstdata[::-2, ::-1].read(
                                 order=order_to,
                                 dtype=dtype_to,
                                 force_python_only=force_python_only)
                             assert result.val.dtype == dtype_to
                             assert (order_to == 'F'
                                     and result.val.flags['F_CONTIGUOUS']
                                     ) or (order_to == 'C' and
                                           result.val.flags['C_CONTIGUOUS'])
                             assert np.array_equal(result.val, expected)
Exemplo n.º 12
0
    def cmktest_inputs3(self):
        from pysnptools.pstreader import PstData
        np.random.seed(0)
        row_property=None
        col_property=None
        pstdata = PstData(row=[[1.0,2.0],[3,4],[6,7]],
                          col=np.array([]),
                          val = [[],[],[]],
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([[3,4]])[0] == 1
        assert np.array_equal(pstdata[1:,:2].row_property,pstdata.row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,pstdata.col_property[:2])
        logging.info("done with test")
Exemplo n.º 13
0
    def __init__(self, iid, sid, val, pos=None, name=None, parent_string=None, copyinputs_function=None):

        #We don't have a 'super(SnpData, self).__init__()' here because SnpData takes full responsiblity for initializing both its superclasses

        self.val = None

        if parent_string is not None:
            warnings.warn("'parent_string' is deprecated. Use 'name'", DeprecationWarning)
        self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype='S'),dtype='S')
        self._col = PstData._fixup_input(sid,empty_creator=lambda ignore:np.empty([0],dtype='S'),dtype='S')
        self._row_property = PstData._fixup_input(None,count=len(self._row),empty_creator=lambda count:np.empty([count,0],dtype='S'),dtype='S')
        self._col_property = PstData._fixup_input(pos,count=len(self._col),empty_creator=lambda count:np.array([[np.nan, np.nan, np.nan]]*count))
        self.val = PstData._fixup_input_val(val,row_count=len(self._row),col_count=len(self._col),empty_creator=lambda row_count,col_count:np.empty([row_count,col_count],dtype=np.float64))
        self._assert_iid_sid_pos()
        self._name = name or parent_string or ""
        self._std_string_list = []
Exemplo n.º 14
0
 def test_writes(self):
     #===================================
     #    Defining sub functions
     #===================================
     def _oned_int(c):
         return list(range(c))
     def _oned_str(c):
         return [str(i) for i in range(c)]
     def _twooned_int(c):
         return [[i] for i in range(c)]
     def _twooned_str(c):
         return [[str(i)] for i in range(c)]
     def _twotwod_int(c):
         return [[i,i] for i in range(c)]
     def _twotwod_str(c):
         return [[str(i),"hello"] for i in range(c)]
     def _none(c):
         return None
     def _zero(c):
         return np.empty([c,0])
     #===================================
     #    Staring main function
     #===================================
     logging.info("starting 'test_writes'")
     np.random.seed(0)
     output_template = "tempdir/pstreader/writes.{0}.{1}"
     create_directory_if_necessary(output_template.format(0,"npz"))
     i = 0
     for row_count in [5,2,1,0]:
         for col_count in [4,2,1,0]:
             val = np.random.normal(.5,2,size=(row_count,col_count))
             for row_or_col_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str]:
                 row = row_or_col_gen(row_count)
                 col = row_or_col_gen(col_count)
                 for prop_gen in [_oned_int,_oned_str,_twooned_int,_twooned_str,_twotwod_int,_twotwod_str,_none,_zero]:
                     row_prop = prop_gen(row_count)
                     col_prop = prop_gen(col_count)
                     pstdata = PstData(row,col,val,row_prop,col_prop,str(i))
                     for the_class,suffix in [(PstNpz,"npz"),(PstHdf5,"hdf5")]:
                         filename = output_template.format(i,suffix)
                         logging.info(filename)
                         i += 1
                         the_class.write(filename,pstdata)
                         for subsetter in [None, sp.s_[::2,::3]]:
                             reader = the_class(filename)
                             _fortesting_JustCheckExists().input(reader)
                             subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]]
                             readdata = subreader.read(order='C')
                             expected = pstdata if subsetter is None else pstdata[subsetter[0],subsetter[1]].read()
                             assert np.array_equal(readdata.val,expected.val)
                             assert np.array_equal(readdata.row,expected.row)
                             assert np.array_equal(readdata.col,expected.col)
                             assert np.array_equal(readdata.row_property,expected.row_property)
                             assert np.array_equal(readdata.col_property,expected.col_property)
                         try:
                             os.remove(filename)
                         except:
                             pass
     logging.info("done with 'test_writes'")
Exemplo n.º 15
0
    def __init__(self,
                 iid,
                 sid,
                 val,
                 pos=None,
                 name=None,
                 parent_string=None,
                 copyinputs_function=None,
                 xp=None,
                 _require_float32_64=True):
        #We don't have a 'super(SnpData, self).__init__()' here because SnpData takes full responsibility for initializing both its superclasses
        xp = pstutil.array_module(xp)
        self._val = None

        if parent_string is not None:
            warnings.warn("'parent_string' is deprecated. Use 'name'",
                          DeprecationWarning)
        self._row = PstData._fixup_input(
            iid,
            empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
            dtype='str')
        self._col = PstData._fixup_input(
            sid,
            empty_creator=lambda ignore: np.empty([0], dtype='str'),
            dtype='str')
        self._row_property = PstData._fixup_input(
            None,
            count=len(self._row),
            empty_creator=lambda count: np.empty([count, 0], dtype='str'),
            dtype='str')
        self._col_property = PstData._fixup_input(
            pos,
            count=len(self._col),
            empty_creator=lambda count: np.full([count, 3], np.nan))
        self._val = PstData._fixup_input_val(
            val,
            row_count=len(self._row),
            col_count=len(self._col),
            empty_creator=lambda row_count, col_count: np.empty(
                [row_count, col_count], dtype=np.float64),
            _require_float32_64=_require_float32_64,
            xp=xp)
        self._assert_iid_sid_pos(check_val=True)
        self._name = name or parent_string or ""
        self._std_string_list = []
        self._xp = xp
Exemplo n.º 16
0
 def val(self, new_value):
     self._val = PstData._fixup_input_val(
         new_value,
         row_count=len(self._row),
         col_count=len(self._col),
         empty_creator=lambda row_count, col_count: np.empty(
             [row_count, col_count], dtype=np.float64))
     self._assert_iid0_iid1(check_val=True)
Exemplo n.º 17
0
    def cmktest_inputs2(self):
        from pysnptools.pstreader import PstData
        np.random.seed(0)
        row_property=None
        col_property=None
        pstdata = PstData(row=np.array([1.0,3,6]),
                          col=np.array(["Aa","Bb"]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([3])[0] == 1
        assert pstdata.col_to_index(["Aa"])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,pstdata.row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,pstdata.col_property[:2])
        logging.info("done with test")
Exemplo n.º 18
0
    def test_inputs2(self):
        from pysnptools.pstreader import PstData
        np.random.seed(0)
        row_property=None
        col_property=None
        pstdata = PstData(row=np.array([1.0,3,6]),
                          col=np.array(["Aa","Bb"]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([3])[0] == 1
        assert pstdata.col_to_index(["Aa"])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,pstdata.row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,pstdata.col_property[:2])
        logging.info("done with test")
Exemplo n.º 19
0
    def test_read(self):
        np.random.seed(0)
        row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
        col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
        pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                          col=np.array([["A","a"],["B","b"]]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([np.array([3.0,4])])[0] == 1
        assert pstdata.col_to_index([np.array(["A","a"])])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,col_property[:2])


        pstdata2 = pstdata[:2,:2].read()
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(pstdata)
        _fortesting_JustCheckExists().input(pstdata2)

        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata3 = pstdata[[],:].read()
        assert pstdata3.val.shape[0] == 0 and pstdata3.val.shape[1]==2
        pstdata.val = pstdata.val.copy(order='F')
        pstdata2 = pstdata[:2,:2].read()
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='A')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order=None)
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata4 = pstdata[::,::].read(force_python_only=True)
        np.testing.assert_array_almost_equal(pstdata4.val, pstdata.val, decimal=10)


        logging.info("done with test")
Exemplo n.º 20
0
    def test_read(self):
        np.random.seed(0)
        row_property=np.array([[1.0,2,2.5],[3,4,4.5],[5,6,6.5]])
        col_property=np.array([[1.0,2,2.5,1],[3,4,4.5,3]])
        pstdata = PstData(row=np.array([[1.0,2],[3,4],[5,6]]),
                          col=np.array([["A","a"],["B","b"]]),
                          val = np.random.normal(.5,2,size=(3,2)),
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert pstdata.row_to_index([np.array([3.0,4])])[0] == 1
        assert pstdata.col_to_index([np.array(["A","a"])])[0] == 0
        assert np.array_equal(pstdata[1:,:2].row_property,row_property[1:])
        assert np.array_equal(pstdata[1:,:2].col_property,col_property[:2])


        pstdata2 = pstdata[:2,:2].read()
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists
        _fortesting_JustCheckExists().input(pstdata)
        _fortesting_JustCheckExists().input(pstdata2)

        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata3 = pstdata[[],:].read()
        assert pstdata3.val.shape[0] == 0 and pstdata3.val.shape[1]==2
        pstdata.val = pstdata.val.copy(order='F')
        pstdata2 = pstdata[:2,:2].read()
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(order='A')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order='C')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype='float32',order=None)
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2].astype(dtype='float32'), decimal=10)
        pstdata2 = pstdata[:2,:2].read(force_python_only=True,dtype=None,order='F')
        np.testing.assert_array_almost_equal(pstdata2.val, pstdata.val[:2,:2], decimal=10)
        pstdata4 = pstdata[::,::].read(force_python_only=True)
        np.testing.assert_array_almost_equal(pstdata4.val, pstdata.val, decimal=10)


        logging.info("done with test")
Exemplo n.º 21
0
    def __init__(self, filename, count_A1=None, iid=None, sid=None, pos=None, skip_format_check=False): #!!!document these new optionals. they are here
        super(Bed, self).__init__()

        self._ran_once = False
        self._file_pointer = None

        self.filename = filename
        if count_A1 is None:
             warnings.warn("'count_A1' was not set. For now it will default to 'False', but in the future it will default to 'True'", FutureWarning)
             count_A1 = False
        self.count_A1 =count_A1
        self.skip_format_check = skip_format_check
        if iid is not None:
            self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype='S'),dtype='S')
        if sid is not None:
            self._col = PstData._fixup_input(sid,empty_creator=lambda ignore:np.empty([0],dtype='S'),dtype='S')
        if pos is not None:
            self._col_property = PstData._fixup_input(pos,count=len(self._col),empty_creator=lambda count:np.array([[np.nan, np.nan, np.nan]]*count))
Exemplo n.º 22
0
 def val(self, new_value):
     self._val = PstData._fixup_input_val(
         new_value,
         row_count=len(self._row),
         col_count=len(self._col),
         empty_creator=lambda row_count, col_count: np.empty(
             [row_count, col_count, 3], dtype=np.float64)
     )  #!!!Replace empty with my FillNA method?
     self._assert_iid_sid_pos(check_val=True)
Exemplo n.º 23
0
    def test_big_npz(self):
        logging.info("in test_big_npz")
        n = 1000
        pstdata = PstData(row=range(n-1),col=range(n+1),val=np.zeros([n-1,n+1]))
        output = "tempdir/pstreader/big.npz"
        create_directory_if_necessary(output)
        PstNpz.write(output,pstdata)
        pstnpz = PstNpz(output)
        pstdata1 = pstnpz[::2,::4].read()
        pstdata2 = pstnpz.read(order='A')
        assert pstdata2.val.flags['C_CONTIGUOUS']

        pstdata = PstData(row=range(n-1),col=range(n+1),val=np.zeros([n-1,n+1],order='F'))
        PstNpz.write(output,pstdata)
        pstnpz = PstNpz(output)
        pstdata2 = pstnpz.read(order='A')
        pstdata2.val.flags['F_CONTIGUOUS']

        print("done")
Exemplo n.º 24
0
    def test_inputs4(self):
        from pysnptools.pstreader import PstData
        pstdata = PstData(row=None,
                          col=None,
                          val = None,
                          row_property=None,
                          col_property=None,
                          name="test_read")

        assert pstdata.row_count == 0 and pstdata.col_count == 0 and pstdata.val.shape[0] == 0 and pstdata.val.shape[1]==0 and len(pstdata.row_property)==0 and len(pstdata.col_property)==0 
        logging.info("done with test")
Exemplo n.º 25
0
    def _empty_inner(self, row, col, filename, row_property, col_property, order, dtype, val_shape):
        self._ran_once = True
        self._dtype =  np.dtype(dtype)
        self._order = order

        row = PstData._fixup_input(row)
        col = PstData._fixup_input(col)
        row_property = PstData._fixup_input(row_property,count=len(row))
        col_property = PstData._fixup_input(col_property,count=len(col))

        with open(filename,'wb') as fp:
            np.save(fp, np.array([_magic_number]))
            np.save(fp, np.array(["pstmemmap"])) #name of file format
            np.save(fp, np.array([2])) #file format version
            np.save(fp, row)
            np.save(fp, col)
            np.save(fp, row_property)
            np.save(fp, col_property)
            np.save(fp, np.array([self._dtype]))
            np.save(fp, np.array([self._order]))
            np.save(fp, np.array([val_shape]))
            self._offset = fp.tell()

        logging.info("About to start allocating memmap '{0}'".format(filename))
        shape = (len(row),len(col)) if val_shape is None else (len(row),len(col),val_shape)
        val = np.memmap(filename, offset=self._offset, dtype=dtype, mode="r+", order=order, shape=shape)
        logging.info("Finished allocating memmap '{0}'. Size is {1:,}".format(filename,os.path.getsize(filename)))
        PstData.__init__(self,row,col,val,row_property,col_property,name="np.memmap('{0}')".format(filename))
Exemplo n.º 26
0
    def __init__(self,
                 iid=None,
                 iid0=None,
                 iid1=None,
                 val=None,
                 name=None,
                 parent_string=None,
                 xp=None
                 ):  #!!!autodoc doesn't generate good doc for this constructor
        #We don't have a 'super(KernelData, self).__init__()' here because KernelData takes full responsibility for initializing both its superclasses
        xp = pstutil.array_module(xp)
        self._val = None

        #!!why does SnpData __init__ have a copy_inputs, but KernelData doesn't?
        assert (iid is None) != (
            iid0 is None and iid1 is None
        ), "Either 'iid' or both 'iid0' 'iid1' must be provided."
        assert name is None or parent_string is None, "Can't set both 'name' and the deprecated 'parent_string'"
        if parent_string is not None:
            warnings.warn("'parent_string' is deprecated. Use 'name'",
                          DeprecationWarning)

        if iid is not None:
            self._row = PstData._fixup_input(
                iid,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')
            self._col = self._row
        else:
            self._row = PstData._fixup_input(
                iid0,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')
            self._col = PstData._fixup_input(
                iid1,
                empty_creator=lambda ignore: np.empty([0, 2], dtype='str'),
                dtype='str')
        self._row_property = PstData._fixup_input(
            None,
            count=len(self._row),
            empty_creator=lambda count: np.empty([count, 0], dtype='str'),
            dtype='str')
        self._col_property = PstData._fixup_input(
            None,
            count=len(self._col),
            empty_creator=lambda count: np.empty([count, 0], dtype='str'),
            dtype='str')
        self._val = PstData._fixup_input_val(
            val,
            row_count=len(self._row),
            col_count=len(self._col),
            empty_creator=lambda row_count, col_count: xp.empty(
                [row_count, col_count], dtype=xp.float64),
            xp=xp)
        self._assert_iid0_iid1(check_val=True)
        self._name = name or parent_string or ""
        self._std_string_list = []
        self._xp = xp
Exemplo n.º 27
0
    def test2(self):
        old_dir = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        filename = "tempdir/x.pst.memmap"
        pstutil.create_directory_if_necessary(filename)

        a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=filename,row_property=['A','B','C'],order="F",dtype=np.float64)
        b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C'])
        pointer1, read_only_flag = a.val.__array_interface__['data']
        a.val+=1
        a.val+=b.val
        pointer2, read_only_flag = a.val.__array_interface__['data']
        assert pointer1==pointer2
        os.chdir(old_dir)
Exemplo n.º 28
0
    def allclose(self, value, equal_nan=True):
        '''
        :param value: Other object with which to compare.
        :type value: :class:`SnpData`
        :param equal_nan: (Default: True) Tells if NaN in :attr:`SnpData.val` should be treated as regular values when testing equality.
        :type equal_nan: bool

        >>> import numpy as np
        >>> snpdata5 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'], val=[[0.,2.,0.],[0.,1.,np.nan]], pos=[[0,0,0],[0,0,0],[0,0,0]])
        >>> snpdata6 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'], val=[[0.,2.,0.],[0.,1.,np.nan]], pos=[[0,0,0],[0,0,0],[0,0,0]])
        >>> print(snpdata5.allclose(snpdata6)) #True, if we consider the NaN as regular values, all the arrays have the same values.
        True
        >>> print(snpdata5.allclose(snpdata6,equal_nan=False)) #False, if we consider the NaN as special values, all the arrays are not equal.
        False

        '''
        return PstData.allclose(self, value, equal_nan=equal_nan)
Exemplo n.º 29
0
    def _read_pstdata(self):
        bim_list = []
        val_list_list = []
        with open(self.filename, "r") as fp:
            header = fp.readline()
            iid_string_list = header.strip().split()[1:]
            iid = np.array([
                self.extract_iid_function(iid_string)
                for iid_string in iid_string_list
            ],
                           dtype='str')
            val_list = []
            zerofloat = float('0'[0])
            missing_char = "?"[0]
            for line_index, line in enumerate(fp):
                if line_index % 1000 == 0:
                    logging.info(
                        "reading sid and iid info from line {0} of file '{1}'".
                        format(line_index, self.filename))
                sid_string_rest = line.strip().split()
                sid_string = sid_string_rest[0]
                rest = [] if len(sid_string_rest) == 1 else sid_string_rest[1]
                assert len(rest) == len(iid)
                bim_list.append(self.extract_sid_pos_function(sid_string))
                val_list = np.array([
                    float(val) - zerofloat if val != missing_char else np.NaN
                    for val in rest
                ])
                val_list_list.append(val_list)

        col = np.array([bim[1] for bim in bim_list], dtype='str')
        col_property = np.array([[bim[0], bim[2], bim[3]] for bim in bim_list],
                                dtype=np.float64)

        val = np.zeros((len(iid), len(col)))
        for col_index in range(len(col)):
            val[:, col_index] = val_list_list[col_index]

        return PstData(iid,
                       col,
                       val,
                       col_property=col_property,
                       name=self.filename)
Exemplo n.º 30
0
    def test_subset(self):
        np.random.seed(0)
        row_property = np.array([[1.0, 2, 2.5], [3, 4, 4.5], [5, 6, 6.5]])
        col_property = np.array([[1.0, 2, 2.5, 1], [3, 4, 4.5, 3]])
        val = np.random.normal(.5, 2, size=(3, 2))
        pstdata = PstData(row=np.array([[1.0, 2], [3, 4], [5, 6]]),
                          col=np.array([["A", "a"], ["B", "b"]]),
                          val=val,
                          row_property=row_property,
                          col_property=col_property,
                          name="test_read")

        assert np.array_equal(pstdata[-1:0:-1, :].read().val, val[-1:0:-1, :])
        assert pstdata[-1, -1].read().val[0, 0] == val[-1, -1]
        assert np.array_equal(pstdata[[-1, 0], [-1, 0]].read().val,
                              val[[-1, 0], :][:, [-1, 0]])
        assert np.array_equal(
            pstdata[[True, False, True], [False, True]].read().val,
            val[[True, False, True], [False, True]].reshape(2, 1))
        assert pstdata[0, 0].read().val[0, 0] == val[0, 0]
        assert np.array_equal(pstdata[1::2, 1::2].read().val, val[1::2, 1::2])

        logging.info("done with test")
Exemplo n.º 31
0
    def __init__(self, iid=None, iid0=None, iid1=None, val=None, name=None, parent_string=None): #!!!autodoc doesn't generate good doc for this constructor
        #!!why does SnpData __init__ have a copy_inputs, but KernelData doesn't?
        assert (iid is None) != (iid0 is None and iid1 is None), "Either 'iid' or both 'iid0' 'iid1' must be provided."
        assert name is None or parent_string is None, "Can't set both 'name' and the deprecated 'parent_string'"
        if parent_string is not None:
            warnings.warn("'parent_string' is deprecated. Use 'name'", DeprecationWarning)

        if iid is not None:
            self._row = PstData._fixup_input(iid,empty_creator=lambda ignore:np.empty([0,2],dtype=str))
            self._col = self._row
        else:
            self._row = PstData._fixup_input(iid0,empty_creator=lambda ignore:np.empty([0,2],dtype=str))
            self._col = PstData._fixup_input(iid1,empty_creator=lambda ignore:np.empty([0,2],dtype=str))
        self._row_property = PstData._fixup_input(None,count=len(self._row),empty_creator=lambda count:np.empty([count,0],dtype=str))
        self._col_property = PstData._fixup_input(None,count=len(self._col),empty_creator=lambda count:np.empty([count,0],dtype=str))
        self.val = PstData._fixup_input_val(val,row_count=len(self._row),col_count=len(self._col),empty_creator=lambda row_count,col_count:np.empty([row_count,col_count],dtype=np.float64))

        self._assert_iid0_iid1()
        self._name = name or parent_string or ""
        self._std_string_list = []
Exemplo n.º 32
0
        fn = '../examples/tiny.pst.memmap'
        os.getcwd()
        print((os.path.exists(fn)))
        pst_mem_map = PstMemMap(fn)
        print((pst_mem_map.val[0,1]))


    if False:
        a=np.ndarray([2,3])
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a*=2
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=r'c:\deldir\a.memmap',row_property=['A','B','C'],order="F",dtype=np.float64)
        b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C'])
        pointer, read_only_flag = a.val.__array_interface__['data']
        print(pointer)
        a.val+=1
        a.val+=b.val
        pointer, read_only_flag = a.val.__array_interface__['data']
        print(pointer)


    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=True)
    ret = r.run(suites)
    assert ret.wasSuccessful()

    result = doctest.testmod(optionflags=doctest.ELLIPSIS)
    assert result.failed == 0, "failed doc test: " + __file__
Exemplo n.º 33
0
    def read(self,
             order='F',
             dtype=np.float64,
             force_python_only=False,
             view_ok=False,
             num_threads=None):
        """Reads the matrix values and returns a :class:`.PstData` (with :attr:`PstData.val` property containing a new ndarray of the matrix values).

        :param order: {'F' (default), 'C', 'A'}, optional -- Specify the order of the ndarray. If order is 'F' (default),
            then the array will be in F-contiguous order (row-index varies the fastest).
            If order is 'C', then the returned array will be in C-contiguous order (col-index varies the fastest).
            If order is 'A', then the :attr:`PstData.val`
            ndarray may be in any order (either C-, Fortran-contiguous).
        :type order: string or None

        :param dtype: {numpy.float64 (default), numpy.float32}, optional -- The data-type for the :attr:`PstData.val` ndarray.
        :type dtype: data-type

        :param force_python_only: optional -- If False (default), may use outside library code. If True, requests that the read
            be done without outside library code.
        :type force_python_only: bool


        :param view_ok: optional -- If False (default), allocates new memory for the :attr:`PstData.val`'s ndarray. If True,
            if practical and reading from a :class:`PstData`, will return a new 
            :class:`PstData` with a ndarray shares memory with the original :class:`PstData`.
            Typically, you'll also wish to use "order='A'" to increase the chance that sharing will be possible.
            Use these parameters with care because any change to either ndarray will effect
            the others. Also keep in mind that :meth:`read` relies on ndarray's mechanisms to decide whether to actually
            share memory and so it may ignore your suggestion and allocate a new ndarray anyway.
        :type view_ok: bool

        :param num_threads: optional -- The number of threads with which to read data. Defaults to all available
            processors. Can also be set with these environment variables (listed in priority order):
            'PST_NUM_THREADS', 'NUM_THREADS', 'MKL_NUM_THREADS'.
        :type num_threads: None or int


        :rtype: :class:`.PstData`

        Calling the method again causes the matrix values to be re-read and creates a new in-memory :class:`.PstData` with a new ndarray of matrix values.

        If you request the values for only a subset of the sids or iids, (to the degree practical) only that subset will be read from disk.

        :Example:

        >>> from pysnptools.pstreader import PstHdf5
        >>> from pysnptools.util import example_file # Download and return local file name
        >>> hdf5_file = example_file('pysnptools/examples/toydata.iidmajor.snp.hdf5')
        >>> on_disk = PstHdf5(hdf5_file) # Specify matrix data on disk
        >>> pstdata1 = on_disk.read() # Read all the matrix data returning a PstData instance
        >>> print(type(pstdata1.val).__name__) # The PstData instance contains a ndarray of the data.
        ndarray
        >>> subset_pstdata = on_disk[:,::2].read() # From the disk, read matrix values for every other sid
        >>> print(subset_pstdata.val[0,0]) # Print the first matrix value in the subset
        1.0
        >>> subsub_pstdata = subset_pstdata[:10,:].read(order='A',view_ok=True) # Create an in-memory subset of the subset with matrix values for the first ten iids. Share memory if practical.
        >>> import numpy as np
        >>> # print(np.may_share_memory(subset_snpdata.val, subsub_snpdata.val)) # Do the two ndarray's share memory? They could. Currently they won't.       
        """
        dtype = np.dtype(dtype)
        val = self._read(None, None, order, dtype, force_python_only, view_ok,
                         num_threads)
        from pysnptools.pstreader import PstData
        ret = PstData(self.row,
                      self.col,
                      val,
                      row_property=self.row_property,
                      col_property=self.col_property,
                      name=str(self))
        return ret
Exemplo n.º 34
0
    def test_writes(self):
        #===================================
        #    Defining sub functions
        #===================================
        def _oned_int(c):
            return range(c)

        def _oned_str(c):
            return [str(i).encode('ascii') for i in range(c)]

        def _twooned_int(c):
            return [[i] for i in range(c)]

        def _twooned_str(c):
            return [[str(i).encode('ascii')] for i in range(c)]

        def _twotwod_int(c):
            return [[i, i] for i in range(c)]

        def _twotwod_str(c):
            return [[str(i).encode('ascii'), b"hello"] for i in range(c)]

        #def _twotwod_U(c):
        #    return [[str(i).encode('UTF-8'),u"hello"] for i in range(c)]
        def _none(c):
            return None

        def _zero(c):
            return np.empty([c, 0], dtype='S')

        #===================================
        #    Starting main function
        #===================================
        logging.info("starting 'test_writes'")
        np.random.seed(0)
        temp_dir = tempfile.TemporaryDirectory("pstreader")
        output_template = temp_dir.name + '/writes.{0}.{1}'
        i = 0
        for row_count in [5, 2, 1, 0]:
            for col_count in [4, 2, 1, 0]:
                for val_shape in [3, None, 1]:
                    val = np.random.normal(.5, 2, size=(
                        row_count,
                        col_count)) if val_shape is None else np.random.normal(
                            .5, 2, size=(row_count, col_count, val_shape))
                    for row_or_col_gen in [
                            _oned_int, _oned_str, _twooned_int, _twooned_str,
                            _twotwod_int, _twotwod_str
                    ]:  #!!!,_twotwod_U can't roundtrop Unicode in hdf5
                        row = row_or_col_gen(row_count)
                        col = row_or_col_gen(col_count)
                        for prop_gen in [
                                _none, _oned_str, _oned_int, _twooned_int,
                                _twooned_str, _twotwod_int, _twotwod_str, _zero
                        ]:  #!!!_twotwod_U can't round trip Unicode because Hdf5 doesn't like it.
                            row_prop = prop_gen(row_count)
                            col_prop = prop_gen(col_count)
                            pstdata = PstData(row, col, val, row_prop,
                                              col_prop, str(i))
                            for the_class, suffix in [(PstMemMap, "memmap"),
                                                      (PstHdf5, "hdf5"),
                                                      (PstNpz, "npz")]:
                                filename = output_template.format(i, suffix)
                                logging.info(filename)
                                i += 1
                                the_class.write(filename, pstdata)
                                reader = the_class(
                                    filename
                                ) if suffix != 'hdf5' else the_class(
                                    filename, block_size=3)
                                _fortesting_JustCheckExists().input(reader)
                                for subsetter in [None, np.s_[::2, ::3]]:
                                    subreader = reader if subsetter is None else reader[
                                        subsetter[0], subsetter[1]]
                                    expected = pstdata if subsetter is None else pstdata[
                                        subsetter[0], subsetter[1]].read()
                                    for order in ['C', 'F', 'A']:
                                        for force_python_only in [True, False]:
                                            readdata = subreader.read(
                                                order=order,
                                                force_python_only=
                                                force_python_only)
                                            assert np.array_equal(
                                                readdata.val, expected.val)
                                            assert np.array_equal(
                                                readdata.row, expected.row)
                                            assert np.array_equal(
                                                readdata.col, expected.col)
                                            assert np.array_equal(
                                                readdata.row_property,
                                                expected.row_property
                                            ) or (
                                                readdata.row_property.shape[1]
                                                == 0 and
                                                expected.row_property.shape[1]
                                                == 0)
                                            assert np.array_equal(
                                                readdata.col_property,
                                                expected.col_property
                                            ) or (
                                                readdata.col_property.shape[1]
                                                == 0 and
                                                expected.col_property.shape[1]
                                                == 0)
                                if suffix in {'memmap', 'hdf5'}:
                                    reader.flush()
                                os.remove(filename)
        temp_dir.cleanup()
        logging.info("done with 'test_writes'")