Пример #1
0
 def test05(self):
     """Testing `where()` iterator using `skip`"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5][2:]
     cwt = [v for v in b.where(blz.barray(a<=5), skip=2)]
     #print "numpy ->", [v for v in a if v<=5][2:]
     #print "where ->", [v for v in b.where(blz.barray(a<=5), skip=2)]
     self.assert_(wt == cwt, "where() does not work correctly")
Пример #2
0
 def test06(self):
     """Testing `where()` iterator (using array bool in fancy indexing)"""
     a = np.arange(1, 110)
     b = blz.barray(a, chunklen=10)
     wt = a[(a<5)|(a>9)]
     cwt = b[blz.barray((a<5)|(a>9))]
     #print "numpy ->", a[(a<5)|(a>9)]
     #print "where ->", b[blz.barray((a<5)|(a>9))]
     assert_array_equal(wt, cwt, "where() does not work correctly")
Пример #3
0
 def test03(self):
     """Testing `where()` iterator (using a boolean array)"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5]
     cwt = [v for v in b.where(blz.barray(a<=5))]
     #print "numpy ->", [v for v in a if v<=5]
     #print "where ->", [v for v in b.where(blz.barray(a<=5))]
     self.assert_(wt == cwt, "where() does not work correctly")
Пример #4
0
 def test06(self):
     """Testing `where()` iterator using `limit` and `skip`"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5][1:4]
     cwt = [v for v in b.where(blz.barray(a<=5), limit=3, skip=1)]
     #print "numpy ->", [v for v in a if v<=5][1:4]
     #print "where ->", [v for v in b.where(blz.barray(a<=5),
     #                                      limit=3, skip=1)]
     self.assert_(wt == cwt, "where() does not work correctly")
Пример #5
0
 def test04(self):
     """Testing fancy indexing with __setitem__ (bool barray)"""
     a = np.arange(1,1e2)
     b = blz.barray(a, chunklen=10)
     bc = (a > 5) & (a < 40)
     sl = blz.barray(bc)
     b[sl] = 3.
     a[bc] = 3.
     #print "b[%s] -> %r" % (sl, b)
     assert_array_equal(b[:], a, "fancy indexing does not work correctly")
Пример #6
0
 def test07(self):
     """Testing `where()` iterator using `limit` and `skip` (zeros)"""
     a = np.arange(10000)
     b = blz.barray(a,)
     wt = [v for v in a if v<=5000][1010:2020]
     cwt = [v for v in b.where(blz.barray(a<=5000, chunklen=100),
                               limit=1010, skip=1010)]
     # print "numpy ->", [v for v in a if v>=5000][1010:2020]
     # print "where ->", [v for v in b.where(blz.barray(a>=5000,chunklen=100),
     #                                       limit=1010, skip=1010)]
     self.assert_(wt == cwt, "where() does not work correctly")
Пример #7
0
 def test01d(self):
     """Testing `__getitem()__` method with only a (large) start"""
     a = np.arange(1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     sl = -2   # second last element
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #8
0
 def test04(self):
     """Testing `iter()` method with large zero arrays"""
     a = np.zeros(1e4, dtype='f8')
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     c = blz.fromiter((v for v in b), dtype='f8', count=len(a))
     #print "c ->", repr(c)
     assert_array_equal(a, c[:], "iterator fails on zeros")
Пример #9
0
 def test03a(self):
     """Testing `iter()` method with only step"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(step=4))
     self.assert_(sum(a[::4]) == sum(b.iter(step=4)),
                  "Sums are not equal")
Пример #10
0
 def test03d(self):
     """Testing `__getitem()__` method with ranges and steps (IV)"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(4, 80, 3000)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #11
0
 def test02b(self):
     """Testing `__getitem()__` method with ranges (negative start)"""
     a = np.arange(1e2)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(-3)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #12
0
def open(persist, **kwargs):
    """Open an existing persistent array.

    Parameters
    ----------
    persist : a Storage instance
        The Storage instance specifies, among other things, URI of
        where the array is stored.
    kwargs : a dictionary
        Put here different parameters depending on the format.

    Returns
    -------
    out: a concrete blaze array.

    Notes
    -----
    Only BLZ, HDF5, CSV and JSON formats are supported currently.

    """
    persist = _persist_convert(persist)
    if persist.format == 'blz':
        d = blz.barray(rootdir=persist.path, **kwargs)
        dd = BLZDataDescriptor(d)
    elif persist.format == 'csv':
        dd = CSVDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'json':
        dd = JSONDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'hdf5':
        dd = HDF5DataDescriptor(persist.path, **kwargs)
    return Array(dd)
Пример #13
0
def open(persist, **kwargs):
    """Open an existing persistent array.

    Parameters
    ----------
    persist : a Storage instance
        The Storage instance specifies, among other things, URI of
        where the array is stored.
    kwargs : a dictionary
        Put here different parameters depending on the format.

    Returns
    -------
    out: a concrete blaze array.

    Notes
    -----
    Only BLZ, HDF5, CSV and JSON formats are supported currently.

    """
    persist = _persist_convert(persist)
    if persist.format == 'blz':
        d = blz.barray(rootdir=persist.path, **kwargs)
        dd = BLZDataDescriptor(d)
    elif persist.format == 'csv':
        dd = CSVDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'json':
        dd = JSONDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'hdf5':
        dd = HDF5DataDescriptor(persist.path, **kwargs)
    return Array(dd)
Пример #14
0
 def select_and_apply(self, apply_func = lambda xx:np.dot(xx,np.ones(xx.shape[1]) / xx.shape[1]),
                      select_format = (None,1), 
                      combine_fun = lambda xx:np.concatenate(xx,1), 
                      limit_n_per_slice = 2000000):
     
     selected_slices = self.generate_axis_dividing_slice_selectors(select_format=select_format,
                                                                   limit_n_per_slice=limit_n_per_slice)
     
     print "selected_slices = ",selected_slices
     
     one_selected_slices = selected_slices[0]
     combined_arr = self.select_all_barrays(one_selected_slices,
                                            combine_fun=combine_fun)
         
     output_barray = blz.barray(apply_func(combined_arr))
     
     for one_selected_slices in selected_slices[1:]:
         print "applying one_selected_slices = ",one_selected_slices
         
         combined_arr = self.select_all_barrays(one_selected_slices,
                                                combine_fun=combine_fun)
     
         output_barray.append(apply_func(combined_arr))
         
     return output_barray
Пример #15
0
 def test03(self):
     """Testing copy() with no shuffle"""
     a = np.linspace(-1., 1., 1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     c = b.copy(bparams=blz.bparams(shuffle=False))
     #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes
     self.assert_(b.cbytes < c.cbytes, "shuffle not changed")
Пример #16
0
 def test02c(self):
     """Testing `iter()` method with positive start, negative stop"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(24, -3))
     self.assert_(sum(a[24:-3]) == sum(b.iter(24, -3)),
                  "Sums are not equal")
Пример #17
0
 def test04a(self):
     """Testing `__getitem()__` method with long ranges"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     sl = slice(1, 8000)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #18
0
 def test03b(self):
     """Testing `iter()` method with start, stop, step"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(3, 24, 4))
     self.assert_(sum(a[3:24:4]) == sum(b.iter(3, 24, 4)),
                  "Sums are not equal")
Пример #19
0
 def test04d(self):
     """Testing `__getitem()__` method with no start and no stop"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     sl = slice(None, None, 2)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #20
0
 def test05(self):
     """Testing `__getitem()__` method with negative steps"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(None, None, -3)
     #print "b[sl]->", `b[sl]`
     self.assertRaises(NotImplementedError, b.__getitem__, sl)
Пример #21
0
 def test02(self):
     """Testing copy() with lesser compression"""
     a = np.linspace(-1., 1., 1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     c = b.copy(bparams=blz.bparams(clevel=1))
     #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes
     self.assert_(b.cbytes < c.cbytes, "clevel not changed")
Пример #22
0
 def test02(self):
     """Testing fancy indexing (empty list)"""
     a = np.arange(101)
     b = blz.barray(a)
     c = b[[]]
     r = a[[]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Пример #23
0
 def test00(self):
     """Testing fancy indexing (short list)"""
     a = np.arange(1,111)
     b = blz.barray(a)
     c = b[[3,1]]
     r = a[[3,1]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Пример #24
0
 def test03(self):
     """Testing fancy indexing (list of floats)"""
     a = np.arange(1,101)
     b = blz.barray(a)
     c = b[[1.1, 3.3]]
     r = a[[1.1, 3.3]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Пример #25
0
 def test01(self):
     """Testing fancy indexing (large list, numpy)"""
     a = np.arange(1,1e4)
     b = blz.barray(a)
     idx = np.random.randint(1000, size=1000)
     c = b[idx]
     r = a[idx]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Пример #26
0
 def test02c(self):
     """Testing `append()` method (large chunklen III)"""
     a = np.arange(1000*1000)
     b = blz.barray(a, chunklen=100*1000-1, rootdir=self.rootdir)
     b.append(a)
     #print "b->", `b`
     c = np.concatenate((a, a))
     assert_array_equal(c, b[:], "Arrays are not equal")
Пример #27
0
 def test01(self):
     """Testing __sizeof__() (big arrays)"""
     a = np.arange(2e5)
     b = blz.barray(a, rootdir=self.rootdir)
     #print "size b uncompressed-->", b.nbytes
     #print "size b compressed  -->", b.cbytes
     self.assert_(sys.getsizeof(b) < b.nbytes,
                  "barray does not seem to compress at all")
Пример #28
0
 def test02(self):
     """Testing __sizeof__() (small arrays)"""
     a = np.arange(111)
     b = blz.barray(a)
     #print "size b uncompressed-->", b.nbytes
     #print "size b compressed  -->", b.cbytes
     self.assert_(sys.getsizeof(b) > b.nbytes,
                  "barray compressed too much??")
Пример #29
0
 def test07(self):
     """Testing `iter()` method with `limit` and `skip`"""
     a = np.arange(1e4, dtype='f8')
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     c = blz.fromiter((v for v in b.iter(limit=1010, skip=1010)), dtype='f8',
                     count=1010)
     #print "c ->", repr(c)
     assert_array_equal(a[1010:2020], c, "iterator fails on zeros")
Пример #30
0
 def test00(self):
     """Testing unicode types (creation)"""
     a = np.array([[u"aŀle", u"eñe"], [u"açò", u"áèâë"]], dtype="U4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Пример #31
0
 def test00(self):
     """Testing string types (creation)"""
     a = np.array([["ale", "ene"], ["aco", "ieie"]], dtype="S4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Пример #32
0
 def test00(self):
     """Testing string types (creation)"""
     a = np.array([["ale", "ene"], ["aco", "ieie"]], dtype="S4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Пример #33
0
 def test00(self):
     """Testing unicode types (creation)"""
     a = np.array([[u"aŀle", u"eñe"], [u"açò", u"áèâë"]], dtype="U4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Пример #34
0
    def testImplicitDtype(self):
        """Testing barray construction keeping dimensions (implicit dtype)"""
        a = np.eye(6)  # 2d
        b = blz.barray(a, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
Пример #35
0
 def test01(self):
     """Testing unicode types (append)"""
     a = np.ones((300, 4), dtype="U4")
     b = blz.barray([], dtype="U4").reshape((0, 4))
     b.append(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Пример #36
0
 def test_barray_record_as_object(self):
     src_data = np.empty((10,), dtype=np.dtype('u1,O'))
     src_data[:] = [(i, 's'*i) for i in range(10)]
     carr = blz.barray(src_data, dtype=np.dtype('O'))
     self.assertEqual(len(carr.shape), 1)
     self.assertEqual(len(src_data), carr.shape[0])
     for i in range(len(carr)):
         self.assertEqual(carr[i][0], src_data[i][0])
         self.assertEqual(carr[i][1], src_data[i][1])
Пример #37
0
    def test_barray_1d_source(self):
        """Testing barray of objects, 1d source"""
        src_data = ['s'*i for i in range(10)]
        carr = blz.barray(src_data, dtype=np.dtype('O'))

        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i], src_data[i])
            self.assertEqual(carr[i], src_data[i])
Пример #38
0
 def test00(self):
     """Testing sum()."""
     a = np.arange(1e5).reshape(10, 1e4)
     sa = a.sum()
     ac = blz.barray(a)
     sac = ac.sum()
     #print "numpy sum-->", sa
     #print "barray sum-->", sac
     self.assert_(sa.dtype == sac.dtype, "sum() is not working correctly.")
     self.assert_(sa == sac, "sum() is not working correctly.")
Пример #39
0
 def test04c(self):
     """Testing `__getitem()__` method with shape reduction (III)"""
     a = np.arange(6000).reshape((50, 40, 3))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (1, slice(1, 4, 2), 2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #40
0
    def testExplicitDtype(self):
        """Testing barray construction keeping dimensions (explicit dtype)"""
        dtype = np.dtype(np.float64)
        a = np.eye(6, dtype=dtype)
        b = blz.barray(a, dtype=dtype, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
Пример #41
0
 def test03c(self):
     """Testing `__getitem()__` method with several slices (III)"""
     a = np.arange(120 * 1000).reshape((5 * 1000, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (slice(None, None, 3), slice(1, 3, 2), slice(1, 4, 2))
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #42
0
 def test04c(self):
     """Testing `__setitem()__` method with shape reduction (III)"""
     a = np.arange(24).reshape((4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (1, 2, slice(None, None, None))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Пример #43
0
 def test03d(self):
     """Testing `__setitem()__` method with several slices (IV)"""
     a = np.arange(120).reshape((5, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (slice(1, 3), slice(1, 3, 1), slice(1, None, 2), slice(1))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[:], b[:], "Arrays are not equal")
    def __setitem__(self, key, value):
        # Check config file
        location = (_config().caching
                    if hasattr(_config(), 'caching') else None)
        assert location in ('disk', 'memory')

        # Use tempfolder for saving on disk
        save_folder = tempfile.mkdtemp() if location == 'disk' else None
        self.compress = lambda v: blz.barray(v, rootdir=save_folder)

        if type(value) == np.ndarray:
            value = self.compress(value)
        return super(CompressedCache, self).__setitem__(key, value)
Пример #45
0
def modelPredictor(modelsPath_modelIndex_dataPath_colNames_tuple):
    """
    Input: A tuple, with following two attributes (with order):
            modelsPath: string, the path to the trained models. (pickle file)
            modelIndex: integer, the index of the model to predict.
            dataPath: string, the path to the data.
            colNames: a list of strings, column names of the output table. It should be like ["Id", "V1", ...]
    Output: A btable, consists of Id column, Predicted column and the data.
    
    Notes:
    modelPredictor will create following directories for you if they do not exist.
            1. Model_No{modelIndex}_predicted_array: it will be under the dataPath.
    """
    # Set up necessary constance.
    divideN = 300000
    modelsPath, modelIndex, dataPath, colNames = modelsPath_modelIndex_dataPath_colNames_tuple
    def data_abspath(colname):
        return os.path.abspath(os.path.join(dataPath, colname))
    with open(modelsPath, "rb") as rf:
        models = pickle.load(rf)
    model = models[modelIndex]
    del models
    
    # Read in data with btable.
    Id = blz.open(os.path.join(dataPath, colNames[0]))
    totalN = len(Id)
    if totalN % divideN == 0:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)]
    else:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)] + [totalN]
    nodes_pair_list = zip(nodes_list[:-1], nodes_list[1:])
    
    # Prediction.
    y_predict = np.zeros(totalN)
    print "[Model No.{modelIndex}] Prediction process begins.".format(modelIndex = modelIndex)
    for begin, end in nodes_pair_list:
        print "[Model No.{modelIndex}] Processing {begin} ~ {end} observations.".format(modelIndex=modelIndex, begin = begin + 1, end = end)
        columns = [blz.open(os.path.join(dataPath, colname))[begin:end] for colname in colNames[1:]]
        X = np.column_stack(columns)
        temp = model.predict(X)
        y_predict[begin:end] = temp
    
    columns = [Id, blz.barray(y_predict)]
    data_rootdir = os.path.join(dataPath, "Model_No{modelIndex}_predicted_array".format(modelIndex = modelIndex))
    if data_rootdir in os.listdir(dataPath):
        print "Removing Old result_table directory for new btable."
        command = "rm -rf " + data_rootdir
        os.system(command)
    final_table = blz.btable(columns = columns, names = ["Id", "Predict"], rootdir = data_rootdir)
    print "The result_table btable rootdir is under {path}".format(path=data_rootdir)
Пример #46
0
    def test_barray_2d_source(self):
        """Testing barray of objects, 2d source

        Expected result will be a 1d barray whose elements are
        containers holding the inner dimension
        """
        src_data = [(i, 's'*i) for i in range(10)]
        carr = blz.barray(src_data, dtype=np.dtype('O'))
        # note that barray should always create a 1 dimensional
        # array of objects.
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Пример #47
0
    def test_barray_tuple_source(self):
        """Testing a barray of objects that are tuples

        This uses a numpy container as source. Tuples should be
        preserved
        """
        src_data = np.empty((10,), dtype=np.dtype('O'))
        src_data[:] = [(i, 's'*i) for i in range(src_data.shape[0])]
        carr = blz.barray(src_data)
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        self.assertEqual(type(carr[0]), tuple)
        self.assertEqual(type(carr[0]), type(src_data[0]))
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Пример #48
0
    def _test_barray_record_inferred_opt2(self):
        """Testing barray handling of inferred record dtypes
        containing objects.  When there is no explicit dtype in the
        barray constructor, the dtype becomes 'O', and the barrays
        behaves accordingly (one dimensional)
        """
        src_data = np.empty((10,), dtype=np.dtype('u1,O'))
        src_data[:] = [(i, 's'*i) for i in range(10)]

        carr = blz.barray(src_data)
        # note: this is similar as if it was created with dtype='O'
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Пример #49
0
## Benchmark to check the creation of an array of length > 2**32 (5e9)

import blz
from time import time

t0 = time()
#cn = blz.zeros(5e9, dtype="i1")
cn = blz.zeros(5e9, dtype="i1", rootdir='ondisk_barray', mode='w')
print "Creation time:", round(time() - t0, 3)
assert len(cn) == int(5e9)

t0 = time()
cn = blz.barray(rootdir='ondisk_barray', mode='a')
print "Re-open time:", round(time() - t0, 3)
print "len(cn)", len(cn)
assert len(cn) == int(5e9)

# Now check some accesses
cn[1] = 1
assert cn[1] == 1
cn[int(2e9)] = 2
assert cn[int(2e9)] == 2
cn[long(3e9)] = 3
assert cn[long(3e9)] == 3
cn[-1] = 4
assert cn[-1] == 4

t0 = time()
assert cn.sum() == 10
print "Sum time:", round(time() - t0, 3)
def array(obj, dshape=None, ddesc=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    ddesc : data descriptor instance
        This comes with the necessary info for storing the data.  If
        None, a DyND_DDesc will be used.

    Returns
    -------
    out : a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)

    if ((obj is not None) and (not inspect.isgenerator(obj))
            and (dshape is not None)):
        dt = ndt.type(str(dshape))
        if dt.ndim > 0:
            obj = nd.array(obj, type=dt, access='rw')
        else:
            obj = nd.array(obj, dtype=dt, access='rw')

    if obj is None and ddesc is None:
        raise ValueError('you need to specify at least `obj` or `ddesc`')

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, DDesc):
        if ddesc is None:
            ddesc = obj
            return Array(ddesc)
        else:
            raise ValueError(('you cannot specify `ddesc` when `obj` '
                              'is already a DDesc instance'))

    if ddesc is None:
        # Use a dynd ddesc by default
        try:
            array = nd.asarray(obj, access='rw')
        except:
            raise ValueError(('failed to construct a dynd array from '
                              'object %r') % obj)
        ddesc = DyND_DDesc(array)
        return Array(ddesc)

    # The DDesc has been specified
    if isinstance(ddesc, DyND_DDesc):
        if obj is not None:
            raise ValueError(('you cannot specify simultaneously '
                              '`obj` and a DyND `ddesc`'))
        return Array(ddesc)
    elif isinstance(ddesc, BLZ_DDesc):
        if inspect.isgenerator(obj):
            dt = None if dshape is None else to_numpy_dtype(dshape)
            # TODO: Generator logic could go inside barray
            ddesc.blzarr = blz.fromiter(obj,
                                        dtype=dt,
                                        count=-1,
                                        rootdir=ddesc.path,
                                        mode=ddesc.mode,
                                        **ddesc.kwargs)
        else:
            if isinstance(obj, nd.array):
                obj = nd.as_numpy(obj)
            if dshape and isinstance(dshape.measure, datashape.Record):
                ddesc.blzarr = blz.btable(obj,
                                          rootdir=ddesc.path,
                                          mode=ddesc.mode,
                                          **ddesc.kwargs)
            else:
                ddesc.blzarr = blz.barray(obj,
                                          rootdir=ddesc.path,
                                          mode=ddesc.mode,
                                          **ddesc.kwargs)
    elif isinstance(ddesc, HDF5_DDesc):
        if isinstance(obj, nd.array):
            obj = nd.as_numpy(obj)
        with tb.open_file(ddesc.path, mode=ddesc.mode) as f:
            where, name = split_path(ddesc.datapath)
            if dshape and isinstance(dshape.measure, datashape.Record):
                # Convert the structured array to unaligned dtype
                # We need that because PyTables only accepts unaligned types,
                # which are the default in NumPy
                obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure))
                f.create_table(where, name, filters=ddesc.filters, obj=obj)
            else:
                f.create_earray(where, name, filters=ddesc.filters, obj=obj)
        ddesc.mode = 'a'  # change into 'a'ppend mode for further operations

    return Array(ddesc)
Пример #51
0
import numpy as np
import blz
from time import time

N = 1e8  # the number of elements in x
clevel = 5  # the compression level
sexpr = "(x-1) < 10."  # the expression to compute
#sexpr = "((x-1) % 1000) == 0."  # the expression to compute
#sexpr = "(2*x**3+.3*y**2+z+1)<0"  # the expression to compute

bparams = blz.bparams(clevel)

print "Creating inputs with %d elements..." % N

x = np.arange(N)
cx = blz.barray(x, bparams=bparams)
if 'y' not in sexpr:
    ct = blz.btable((cx, ), names=['x'])
else:
    y = np.arange(N)
    z = np.arange(N)
    cy = blz.barray(y, bparams=bparams)
    cz = blz.barray(z, bparams=bparams)
    ct = blz.btable((cx, cy, cz), names=['x', 'y', 'z'])

print "Evaluating...", sexpr
t0 = time()
cbout = ct.eval(sexpr)
print "Time for evaluation--> %.3f" % (time() - t0, )
print "Converting to numy arrays"
bout = cbout[:]
Пример #52
0
import numpy as np
import blz
from time import time

N = 100 * 1000 * 1000
CLEVEL = 5

a = np.linspace(0, 1, N)

t0 = time()
ac = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL))
print "time creation (memory) ->", round(time()-t0, 3)
print "data (memory):", repr(ac)

t0 = time()
b = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL),
               rootdir='myarray', mode='w')
b.flush()
print "time creation (disk) ->", round(time()-t0, 3)
#print "meta (disk):", b.read_meta()

t0 = time()
an = np.array(a)
print "time creation (numpy) ->", round(time()-t0, 3)

t0 = time()
c = blz.barray(rootdir='myarray')
print "time open (disk) ->", round(time()-t0, 3)
#print "meta (disk):", c.read_meta()
print "data (disk):", repr(c)
Пример #53
0
    #print "cout-->", repr(cout)


if __name__=="__main__":

    N = 1e8       # the number of elements in x
    clevel = 5    # the compression level
    sexpr = "(x+1)<0"
    sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)<0"
    #sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)"
    doprofile = True

    print("Creating inputs...")
    x = np.arange(N)
    #x = np.linspace(0,100,N)
    cx = blz.barray(x, bparams=blz.bparams(clevel))

    print("Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N))))

    t0 = time()
    cout = ne.evaluate(sexpr)
    print "Time for numexpr --> %.3f" % (time()-t0,)

    if doprofile:
        import pstats
        import cProfile as prof
        prof.run('compute_barray(sexpr, clevel=clevel, kernel="numexpr")',
        #prof.run('compute_barray(sexpr, clevel=clevel, kernel="python")',
                 'eval.prof')
        stats = pstats.Stats('eval.prof')
        stats.strip_dirs()
Пример #54
0
def append(data, clevel, cname):
    alldata = blz.barray(data[0], bparams=blz.bparams(clevel, cname=cname))
    for carr in data[1:]:
        alldata.append(carr)

    return alldata
Пример #55
0
import numpy as np
import blz
from time import time

N = 1e8
#a = np.arange(N, dtype='f8')
a = np.random.randint(0, 10, N).astype('bool')

t0 = time()
sa = a.sum()
print "Time sum() numpy --> %.3f" % (time() - t0)

t0 = time()
ac = blz.barray(a, bparams=blz.bparams(9))
print "Time barry conv --> %.3f" % (time() - t0)
print "ac-->", ` ac `

t0 = time()
sac = ac.sum()
#sac = ac.sum(dtype=np.dtype('i8'))
print "Time sum() barray --> %.3f" % (time() - t0)

# t0 = time()
# sac = sum(i for i in ac)
# print "Time sum() carray (iter) --> %.3f" % (time()-t0)

print "sa, sac-->", sa, sac, type(sa), type(sac)
assert (sa == sac)
Пример #56
0
import blz
from time import time

N = 1e7  # the number of elements in x
M = 100000  # the elements to get
clevel = 1  # the compression level

print "Creating inputs with %d elements..." % N

bparams = blz.bparams(clevel)

#x = np.arange(N)
x = np.zeros(N, dtype="f8")
y = x.copy()
z = x.copy()
cx = blz.barray(x, bparams=bparams)
cy = cx.copy()
cz = cx.copy()
ct = blz.btable((cx, cy, cz), names=['x', 'y', 'z'])
t = ct[:]

print "Starting benchmark now for getting %d elements..." % M
# Retrieve from a ndarray
t0 = time()
vals = [x[i] for i in xrange(0, M, 3)]
print "Time for array--> %.3f" % (time() - t0, )
print "vals-->", len(vals)

#blz.set_num_threads(blz.ncores//2)

# Retrieve from a barray
Пример #57
0
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for storing the data.

    Returns
    -------
    out : a concrete blaze array.

    Bugs
    ----
    Right now the explicit dshape is ignored. This needs to be
    corrected. When the data cannot be coerced to an explicit dshape
    an exception should be raised.

    """
    dshape = _normalize_dshape(dshape)

    storage = _storage_convert(storage)

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, IDataDescriptor):
        # TODO: Validate the 'caps', convert to another kind
        #       of data descriptor if necessary
        # Note by Francesc: but if it is already an IDataDescriptor I wonder
        # if `caps` should be ignored.  Hmm, probably not...
        #
        # Note by Oscar: Maybe we shouldn't accept a datadescriptor at
        #   all at this level. If you've got a DataDescriptor you are
        #   playing with internal datastructures anyways, go to the
        #   Array constructor directly. If you want to transform to
        #   another datadescriptor... convert it yourself (you are
        #   playing with internal datastructures, remember? you should
        #   be able to do it in your own.
        dd = obj
    elif storage is not None:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path))
        else:
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, rootdir=storage.path))
    elif 'efficient-write' in caps and caps['efficient-write'] is True:
        # In-Memory array
        if dshape is None:
            dd = DyNDDataDescriptor(nd.asarray(obj, access='rw'))
        else:
            # Use the uniform/full dtype specification in dynd depending
            # on whether the datashape has a uniform dim
            dt = ndt.type(str(dshape))
            if dt.ndim > 0:
                dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw'))
            else:
                dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw'))
    elif 'compress' in caps and caps['compress'] is True:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        # BLZ provides compression
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1))
        else:
            dd = BLZDataDescriptor(blz.barray(obj, dtype=dt))

    elif isinstance(obj, np.ndarray):
        dd = DyNDDataDescriptor(nd.view(obj))
    elif isinstance(obj, nd.array):
        dd = DyNDDataDescriptor(obj)
    elif isinstance(obj, blz.barray):
        dd = BLZDataDescriptor(obj)
    else:
        raise TypeError(('Failed to construct blaze array from '
                         'object of type %r') % type(obj))
    return Array(dd)
Пример #58
0
# Benchmark to compare times for iterators in generator contexts by
# using barrays vs plain numpy arrays.

import numpy as np
import blz
from time import time

N = 1e7

a = np.arange(N)
b = blz.barray(a)

t0 = time()
#sum1 = sum(a)
sum1 = sum((v for v in a[2::3] if v < 10))
t1 = time()-t0
print "Summing using numpy iterator: %.3f" % t1

t0 = time()
#sum2 = sum(b)
sum2 = sum((v for v in b.iter(2, None, 3) if v < 10))
t2 = time()-t0
print "Summing using barray iterator: %.3f  speedup: %.2f" % (t2, t1/t2)

assert sum1 == sum2, "Summations are not equal!"
Пример #59
0
        kernel,
        time() - t0,
    ),
    print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
    #print "cout-->", repr(cout)


if __name__ == "__main__":

    print "Creating inputs..."

    bparams = blz.bparams(clevel)

    y = x.copy()
    z = x.copy()
    cx = blz.barray(x, bparams=bparams)
    cy = blz.barray(y, bparams=bparams)
    cz = blz.barray(z, bparams=bparams)

    for sexpr in sexprs:
        print "Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N)))
        compute_ref(sexpr)
        for kernel in "python", "numexpr":
            compute_blz(sexpr, clevel=0, kernel=kernel)
        if doprofile:
            import pstats
            import cProfile as prof
            #prof.run('compute_blz(sexpr, clevel=clevel, kernel="numexpr")',
            prof.run(
                'compute_blz(sexpr, clevel=0, kernel="numexpr")',
                #prof.run('compute_blz(sexpr, clevel=clevel, kernel="python")',