def into(a, b, **kwargs): names = dshape(nd.dshape_of(b))[1].names columns = [getattr(b, name) for name in names] columns = [np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O') else into(np.ndarray(0), c) for c in columns] return bcolz.ctable(columns, names=names, **kwargs)
def into(a, b, **kwargs): names = dshape(nd.dshape_of(b))[1].names columns = [getattr(b, name) for name in names] columns = [ np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O') else into(np.ndarray(0), c) for c in columns ] return bcolz.ctable(columns, names=names, **kwargs)
def groupby(self, json_cmd): print('GroupBy operation') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] arr = self.get_session_array(array_name)[...].ddesc.dynd_arr() # Do the groupby, get its groups, then # evaluate it because deferred operations # through the groupby won't work well yet. res = nd.groupby(arr, nd.fields(arr, *fields)) groups = res.groups res = res.eval() # Write out the groupby result defarr_gb = self.array_provider.create_deferred_array_filename( self.session_name, 'groupby_', array(res)) dshape_gb = nd.dshape_of(res) defarr_gb[0].write( json.dumps({ 'dshape': dshape_gb, 'command': 'groupby', 'params': { 'fields': fields } })) defarr_gb[0].close() # Write out the groups defarr_groups = self.array_provider.create_deferred_array_filename( self.session_name, 'groups_', groups) dshape_groups = nd.dshape_of(groups) defarr_groups[0].write( json.dumps({ 'dshape': dshape_groups, 'command': 'groupby.groups', 'params': { 'fields': fields } })) defarr_groups[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output_gb': self.base_url + defarr_gb[1], 'dshape_gb': dshape_gb, 'output_groups': self.base_url + defarr_groups[1], 'dshape_groups': dshape_groups }) return (content_type, body)
def groupby(self, json_cmd): print('GroupBy operation') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] arr = self.get_session_array(array_name)[...].ddesc.dynd_arr() # Do the groupby, get its groups, then # evaluate it because deferred operations # through the groupby won't work well yet. res = nd.groupby(arr, nd.fields(arr, *fields)) groups = res.groups res = res.eval() # Write out the groupby result defarr_gb = self.array_provider.create_deferred_array_filename( self.session_name, 'groupby_', array(res)) dshape_gb = nd.dshape_of(res) defarr_gb[0].write(json.dumps({ 'dshape': dshape_gb, 'command': 'groupby', 'params': { 'fields': fields } })) defarr_gb[0].close() # Write out the groups defarr_groups = self.array_provider.create_deferred_array_filename( self.session_name, 'groups_', groups) dshape_groups = nd.dshape_of(groups) defarr_groups[0].write(json.dumps({ 'dshape': dshape_groups, 'command': 'groupby.groups', 'params': { 'fields': fields } })) defarr_groups[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output_gb': self.base_url + defarr_gb[1], 'dshape_gb': dshape_gb, 'output_groups': self.base_url + defarr_groups[1], 'dshape_groups': dshape_groups }) return (content_type, body)
def make_computed_fields(self, json_cmd): print('Adding computed fields') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] replace_undim = cmd.get('replace_undim', 0) fnname = cmd.get('fnname', None) arr = self.get_session_array(array_name).ddesc.dynd_arr() res = nd.make_computed_fields(arr, replace_undim, fields, fnname) defarr = self.array_provider.create_deferred_array_filename( self.session_name, 'computed_fields_', array(res)) dshape = nd.dshape_of(res) defarr[0].write( json.dumps({ 'dshape': dshape, 'command': 'make_computed_fields', 'params': { 'fields': fields, 'replace_undim': replace_undim, 'fnname': fnname } })) defarr[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output': self.base_url + defarr[1], 'dshape': dshape }) return (content_type, body)
def sort(self, json_cmd): import numpy as np print('sorting') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] field = cmd['field'] arr = self.get_session_array(array_name) nparr = as_numpy(arr) idxs = np.argsort(nparr[field]) res = nd.ndobject(nparr[idxs]) defarr = self.array_provider.create_deferred_array_filename( self.session_name, 'sort_', res) dshape = nd.dshape_of(res) defarr[0].write( json.dumps({ 'dshape': dshape, 'command': 'sort', 'params': { 'field': field, } })) defarr[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output': self.base_url + defarr[1], 'dshape': dshape }) return (content_type, body)
def make_computed_fields(self, json_cmd): print('Adding computed fields') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] replace_undim = cmd.get('replace_undim', 0) fnname = cmd.get('fnname', None) arr = self.get_session_array(array_name).ddesc.dynd_arr() res = nd.make_computed_fields(arr, replace_undim, fields, fnname) defarr = self.array_provider.create_deferred_array_filename( self.session_name, 'computed_fields_', array(res)) dshape = nd.dshape_of(res) defarr[0].write(json.dumps({ 'dshape': dshape, 'command': 'make_computed_fields', 'params': { 'fields': fields, 'replace_undim': replace_undim, 'fnname': fnname } })) defarr[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output': self.base_url + defarr[1], 'dshape': dshape }) return (content_type, body)
def sort(self, json_cmd): import numpy as np print ('sorting') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] field = cmd['field'] arr = self.get_session_array(array_name) nparr = as_numpy(arr) idxs = np.argsort(nparr[field]) res = nd.ndobject(nparr[idxs]) defarr = self.array_provider.create_deferred_array_filename( self.session_name, 'sort_', res) dshape = nd.dshape_of(res) defarr[0].write(json.dumps({ 'dshape': dshape, 'command': 'sort', 'params': { 'field': field, } })) defarr[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output': self.base_url + defarr[1], 'dshape': dshape }) return (content_type, body)
def __init__(self, dyndarr, nindex): if nindex > nd.ndim_of(dyndarr): raise IndexError('Cannot have more indices than dimensions') self._nindex = nindex self._dshape = datashape.dshape(nd.dshape_of(dyndarr)).subarray(nindex) self._c_dtype = ndt.type(str(self._dshape)) self._dyndarr = dyndarr
def test_array_from_ptr(self): # cfixed_dim arrmeta is redundant so this is ok a = (ctypes.c_int32 * 3)() a[0] = 3 a[1] = 6 a[2] = 9 # Readwrite version using cfixed b = _lowlevel.array_from_ptr('cfixed[3] * int32', ctypes.addressof(a), a, 'readwrite') self.assertEqual(_lowlevel.data_address_of(b), ctypes.addressof(a)) self.assertEqual(nd.dshape_of(b), '3 * int32') self.assertEqual(nd.as_py(b), [3, 6, 9]) b[1] = 10 self.assertEqual(a[1], 10) # Readonly version using cfixed b = _lowlevel.array_from_ptr('cfixed[3] * int32', ctypes.addressof(a), a, 'readonly') self.assertEqual(nd.as_py(b), [3, 10, 9]) def assign_to(b): b[1] = 100 self.assertRaises(RuntimeError, assign_to, b) # Using a fixed dim default-constructs the arrmeta, so works too b = _lowlevel.array_from_ptr('3 * int32', ctypes.addressof(a), a, 'readonly') self.assertEqual(nd.as_py(b), [3, 10, 9]) # Should get an error if we try strided, because the size is unknown self.assertRaises(RuntimeError, lambda: _lowlevel.array_from_ptr('strided * int32', ctypes.addressof(a), a, 'readonly'))
def __init__(self, dyndarr): if nd.ndim_of(dyndarr) <= 0: raise IndexError('Need at least one dimension for iteration') self._index = 0 self._len = len(dyndarr) self._dshape = datashape.dshape(nd.dshape_of(dyndarr)).subarray(1) self._c_dtype = ndt.type(str(self._dshape)) self._dyndarr = dyndarr
def into(a, b): ds = dshape(nd.dshape_of(b)) if list(a.columns): names = a.columns elif isinstance(ds[-1], Record): names = ds[-1].names else: names = None if names: return pd.DataFrame(nd.as_py(b), columns=names) else: return pd.DataFrame(nd.as_py(b))
def __init__(self, dyndarr): if nd.ndim_of(dyndarr) <= 0: raise IndexError('Need at least one dimension for iteration') self._index = 0 self._len = len(dyndarr) ds = datashape.dshape(nd.dshape_of(dyndarr)) self._dshape = ds.subarray(1) self._c_dtype = ndt.type(str(self._dshape)) self._usebuffer = (ndt.type(str(ds)) != nd.type_of(dyndarr)) self._buffer = None self._buffer_index = -1 self._dyndarr = dyndarr
def test_array_from_ptr(self): a = (ctypes.c_int32 * 3)() a[0] = 3 a[1] = 6 a[2] = 9 # Readwrite version b = _lowlevel.array_from_ptr(ndt.type('3 * int32'), ctypes.addressof(a), a, 'readwrite') self.assertEqual(_lowlevel.data_address_of(b), ctypes.addressof(a)) self.assertEqual(nd.dshape_of(b), '3 * int32') self.assertEqual(nd.as_py(b), [3, 6, 9]) b[1] = 10 self.assertEqual(a[1], 10) # Readonly version b = _lowlevel.array_from_ptr(ndt.type('3 * int32'), ctypes.addressof(a), a, 'readonly') self.assertEqual(nd.as_py(b), [3, 10, 9]) def assign_to(b): b[1] = 100 self.assertRaises(RuntimeError, assign_to, b)
def handle_array_query(self, environ, start_response): print('Handling array query') try: array_name, indexers = split_array_base(environ['PATH_INFO']) arr = self.get_array(array_name, indexers) base_url = wsgi_reconstruct_base_url(environ) request_method = environ['REQUEST_METHOD'] if request_method == 'GET' and environ['QUERY_STRING'] == '': # This version of the array information is for human consumption content_type = 'text/html; charset=utf-8' body = self.html_array(arr, base_url, array_name, indexers) else: if request_method == 'GET': q = parse_qs(environ['QUERY_STRING']) elif request_method == 'POST': # the environment variable CONTENT_LENGTH may be empty or missing try: request_body_size = int(environ.get('CONTENT_LENGTH', 0)) except (ValueError): request_body_size = 0 request_body = environ['wsgi.input'].read(request_body_size) q = parse_qs(request_body) else: status = '404 Not Found' response_headers = [('content-type', 'text/plain')] start_response(status, response_headers) return ['Unsupported request method'] print q if not q.has_key('r'): status = '400 Bad Request' response_headers = [('content-type', 'text/plain')] start_response(status, response_headers, sys.exc_info()) return ['Blaze server request requires the ?r= query request type'] q_req = q['r'][0] if q_req == 'data.json': content_type = 'application/json; charset=utf-8' body = nd.as_py(nd.format_json(arr).view_scalars(ndt.bytes)) elif q_req == 'datashape': content_type = 'text/plain; charset=utf-8' body = nd.dshape_of(arr) elif q_req == 'dyndtype': content_type = 'application/json; charset=utf-8' body = str(nd.type_of(arr)) elif q_req == 'dynddebug': content_type = 'text/plain; charset=utf-8' body = str(nd.debug_repr(arr)) elif q_req == 'create_session': session = compute_session(self.array_provider, base_url, add_indexers_to_url(array_name, indexers)) self.sessions[session.session_name] = session content_type, body = session.creation_response() else: status = '400 Bad Request' response_headers = [('content-type', 'text/plain')] start_response(status, response_headers, sys.exc_info()) return ['Unknown Blaze server request ?r=%s' % q['r'][0]] except: traceback.print_exc() status = '404 Not Found' response_headers = [('content-type', 'text/plain')] start_response(status, response_headers, sys.exc_info()) return ['Error getting Blaze Array\n\n' + traceback.format_exc()] status = '200 OK' response_headers = [ ('content-type', content_type), ('content-length', str(len(body))) ] start_response(status, response_headers) return [body]
def discover(arr): return dshape(nd.dshape_of(arr))
def __init__(self, dyndarr): if not isinstance(dyndarr, nd.array): raise TypeError('object is not a dynd array, has type %s' % type(dyndarr)) self._dyndarr = dyndarr self._dshape = dshape(nd.dshape_of(dyndarr))
def test_var_dshape(self): # Getting the dshape can see into leading var dims a = nd.array([[[1], [2,3]]], type='var * var * var * int32') self.assertEqual(nd.dshape_of(a), '1 * 2 * var * int32')
def _dshape(self): return nd.dshape_of(self.arr)