def open(rootdir, mode='a'): # ---------------------------------------------------------------------- # https://github.com/Blosc/bcolz/blob/master/bcolz/toplevel.py#L104-L132 # ---------------------------------------------------------------------- """ open(rootdir, mode='a') Open a disk-based carray/ctable. This function could be used to open bcolz objects as bquery objects to perform queries on them. Parameters ---------- rootdir : pathname (string) The directory hosting the carray/ctable object. mode : the open mode (string) Specifies the mode in which the object is opened. The supported values are: * 'r' for read-only * 'w' for emptying the previous underlying data * 'a' for allowing read/write on top of existing data Returns ------- out : a carray/ctable object or IOError (if not objects are found) """ # First try with a carray rootsfile = os.path.join(rootdir, ROOTDIRS) if os.path.exists(rootsfile): return bquery.ctable(rootdir=rootdir, mode=mode) else: return bquery.carray(rootdir=rootdir, mode=mode)
def test_where_terms00(self): """ test_where_terms00: get terms in one column bigger than a certain value """ # expected result ref_data = np.fromiter(((x > 10000) for x in range(20000)), dtype='bool') ref_result = bquery.carray(ref_data) # generate data to filter on iterable = ((x, x) for x in range(20000)) data = np.fromiter(iterable, dtype='i8,i8') # filter data terms_filter = [('f0', '>', 10000)] ct = bquery.ctable(data, rootdir=self.rootdir) result = ct.where_terms(terms_filter) # compare assert_array_equal(result, ref_result)
def where_terms(self, term_list): """ TEMPORARY WORKAROUND TILL NUMEXPR WORKS WITH IN where_terms(term_list, outcols=None, limit=None, skip=0) Iterate over rows where `term_list` is true. A terms list has a [(col, operator, value), ..] construction. Eg. [('sales', '>', 2), ('state', 'in', ['IL', 'AR'])] :param term_list: :param outcols: :param limit: :param skip: :return: :raise ValueError: """ if type(term_list) not in [list, set, tuple]: raise ValueError("Only term lists are supported") eval_string = '' eval_list = [] for term in term_list: filter_col = term[0] filter_operator = term[1].lower() filter_value = term[2] # convert postgres eq and ne to python equivalents if filter_operator == '=': filter_operator = '==' elif filter_operator == '<>': filter_operator = '!=' if filter_operator not in ['in', 'not in'] and filter_operator not in opMap: # direct filters should be added to the eval_string # add and logic if not the first term if eval_string: eval_string += ' & ' eval_string += '(' + filter_col + ' ' \ + filter_operator + ' ' \ + str(filter_value) + ')' print eval_string elif filter_operator in opMap: eval_list.append( (filter_col, filter_operator, filter_value) ) elif filter_operator in ['in', 'not in']: # Check input if type(filter_value) not in [list, set, tuple]: raise ValueError("In selections need lists, sets or tuples") if len(filter_value) < 1: raise ValueError("A value list needs to have values") elif len(filter_value) == 1: # handle as eval # add and logic if not the first term if eval_string: eval_string += ' & ' if filter_operator == 'not in': filter_operator = '!=' else: filter_operator = '==' eval_string += '(' + filter_col + ' ' + \ filter_operator filter_value = filter_value[0] if type(filter_value) == str: filter_value = '"' + filter_value + '"' else: filter_value = str(filter_value) eval_string += filter_value + ') ' else: if type(filter_value) in [list, tuple]: filter_value = set(filter_value) eval_list.append( (filter_col, filter_operator, filter_value) ) else: raise ValueError( "Input not correctly formatted for eval or list filtering" ) # (1) Evaluate terms in eval # return eval_string, eval_list if eval_string: boolarr = self.eval(eval_string) if eval_list: # convert to numpy array for array_is_in boolarr = boolarr[:] else: boolarr = np.ones(self.size, dtype=bool) # (2) Evaluate other terms like 'in' or 'not in' ... for term in eval_list: name = term[0] col = self.cols[name] operator = term[1] if operator.lower() in ['not in', 'in']: if operator.lower() == 'not in': reverse = True elif operator.lower() == 'in': reverse = False else: raise ValueError( "Input not correctly formatted for list filtering" ) value_set = set(term[2]) ctable_ext.carray_is_in(col, value_set, boolarr, reverse) elif operator in opMap: opFunc = getOperatorFunction(operator) value = term[2] i = 0 for row in col.iter(): if not opFunc(name, value): boolarr[i] = False i += 1 if eval_list: # convert boolarr back to carray boolarr = bquery.carray(boolarr) return boolarr
def where_terms(self, term_list): """ TEMPORARY WORKAROUND TILL NUMEXPR WORKS WITH IN where_terms(term_list, outcols=None, limit=None, skip=0) Iterate over rows where `term_list` is true. A terms list has a [(col, operator, value), ..] construction. Eg. [('sales', '>', 2), ('state', 'in', ['IL', 'AR'])] :param term_list: :param outcols: :param limit: :param skip: :return: :raise ValueError: """ if type(term_list) not in [list, set, tuple]: raise ValueError("Only term lists are supported") eval_string = '' eval_list = [] for term in term_list: filter_col = term[0] filter_operator = term[1].lower() filter_value = term[2] # convert postgres eq and ne to python equivalents if filter_operator == '=': filter_operator = '==' elif filter_operator == '<>': filter_operator = '!=' if filter_operator not in ['in', 'not in' ] and filter_operator not in opMap: # direct filters should be added to the eval_string # add and logic if not the first term if eval_string: eval_string += ' & ' eval_string += '(' + filter_col + ' ' \ + filter_operator + ' ' \ + str(filter_value) + ')' print eval_string elif filter_operator in opMap: eval_list.append((filter_col, filter_operator, filter_value)) elif filter_operator in ['in', 'not in']: # Check input if type(filter_value) not in [list, set, tuple]: raise ValueError( "In selections need lists, sets or tuples") if len(filter_value) < 1: raise ValueError("A value list needs to have values") elif len(filter_value) == 1: # handle as eval # add and logic if not the first term if eval_string: eval_string += ' & ' if filter_operator == 'not in': filter_operator = '!=' else: filter_operator = '==' eval_string += '(' + filter_col + ' ' + \ filter_operator filter_value = filter_value[0] if type(filter_value) == str: filter_value = '"' + filter_value + '"' else: filter_value = str(filter_value) eval_string += filter_value + ') ' else: if type(filter_value) in [list, tuple]: filter_value = set(filter_value) eval_list.append( (filter_col, filter_operator, filter_value)) else: raise ValueError( "Input not correctly formatted for eval or list filtering") # (1) Evaluate terms in eval # return eval_string, eval_list if eval_string: boolarr = self.eval(eval_string) if eval_list: # convert to numpy array for array_is_in boolarr = boolarr[:] else: boolarr = np.ones(self.size, dtype=bool) # (2) Evaluate other terms like 'in' or 'not in' ... for term in eval_list: name = term[0] col = self.cols[name] operator = term[1] if operator.lower() in ['not in', 'in']: if operator.lower() == 'not in': reverse = True elif operator.lower() == 'in': reverse = False else: raise ValueError( "Input not correctly formatted for list filtering") value_set = set(term[2]) ctable_ext.carray_is_in(col, value_set, boolarr, reverse) elif operator in opMap: opFunc = getOperatorFunction(operator) value = term[2] i = 0 for row in col.iter(): if not opFunc(name, value): boolarr[i] = False i += 1 if eval_list: # convert boolarr back to carray boolarr = bquery.carray(boolarr) return boolarr