def _Dynamic_RunQuery(self, query, query_result): client = self._GetRiakClient() kind = query.kind() keys_only = query.keys_only() filters = query.filter_list() orders = query.order_list() offset = query.offset() limit = query.limit() namespace = query.name_space() logging.debug('offset: %d limit: %d' %(offset, limit)) # query history clone = datastore_pb.Query() clone.CopyFrom(query) clone.clear_hint() # FIXME: use a hashable object for history clone.__hash__ = lambda: hash(str(clone)) if clone in self.__query_history: self.__query_history[clone] += 1 else: self.__query_history[clone] = 1 entity_bucket_name = '%s_%s_%s' % (self.__app_id, namespace, kind) entity_bucket = client.bucket(entity_bucket_name) binary_bucket_name = entity_bucket_name + _BINARY_BUCKET_SUFFIX binary_bucket = client.bucket(binary_bucket_name) operators = {datastore_pb.Query_Filter.LESS_THAN: '<', datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: '<=', datastore_pb.Query_Filter.GREATER_THAN: '>', datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=', datastore_pb.Query_Filter.EQUAL: '==', } queue = Queue.Queue() index_key_sets = [] index_query_threads = [] for filt in filters: assert filt.op() != datastore_pb.Query_Filter.IN prop = filt.property(0).name().decode('utf-8') op = operators[filt.op()] filter_val_list = [datastore_types.FromPropertyPb(filter_prop) for filter_prop in filt.property_list()] filter_val = self.__get_filter_value_for_query(filter_val_list[0]) # spawn new thread to do the index query thd = threading.Thread(target=self.__filter_to_index_query, args=(entity_bucket_name, prop, op, filter_val, queue)) index_query_threads.append(thd) thd.start() # wait for the index query threads to finish [thd.join() for thd in index_query_threads] # get the index key sets from the shared queue while not queue.empty(): index_key_sets.append(queue.get()) if index_key_sets: mapreduce_inputs = reduce(lambda x, y: x.intersection(y), index_key_sets) else: mapreduce_inputs = set() logging.info('mapreduce input: %d keys' % len(mapreduce_inputs)) if filters and not mapreduce_inputs: results = [] else: # key inputs to MapReduce Job if not mapreduce_inputs: riak_query = client.add(entity_bucket_name) else: riak_query = riak.RiakMapReduce(client) for input in mapreduce_inputs: riak_query.add(entity_bucket_name, input) riak_query.map(_JS_MAP_FUNCTION) for order in orders: prop = order.property().decode('utf-8') if order.direction() is datastore_pb.Query_Order.DESCENDING: direction = 'desc' else: direction = 'asc' logging.debug('sort(%s, %s)' %(prop, direction)) riak_query.reduce(_JS_REDUCE_SORT_FUNCTION, {'arg': {'by': prop, 'order': direction}}) if limit: # reduce phase for applying limit start = offset end = offset + limit if (end > len(mapreduce_inputs)) and filters: end = 0; logging.debug('slice(start: %d, end:%d)' %(start, end)) riak_query.reduce('Riak.reduceSlice', {'arg': [start, end]}) for phase in riak_query._phases: logging.debug(phase.to_array()) results = [] for result in riak_query.run(): metadata, riak_entity = result key = metadata['X-Riak-Meta-Key'] key = datastore_types.Key(encoded=key) entity = datastore.Entity(kind=kind, parent=key.parent(), name=key.name(), id=key.id()) for property_name, property_value in riak_entity.iteritems(): if property_name == '__key__': continue try: property_type_name = metadata['X-Riak-Meta-%s' % property_name.capitalize()] except KeyError: property_type_name = metadata['X-Riak-Meta-%s' % property_name] property_value = self.__create_value_for_riak_value(property_type_name, property_value, binary_bucket) entity[property_name] = property_value results.append(entity) query.set_app(self.__app_id) datastore_types.SetNamespace(query, namespace) encoded = datastore_types.EncodeAppIdNamespace(self.__app_id, namespace) #cursor = _Cursor(query, results) #self.__queries[cursor.cursor] = cursor #if query.has_count(): # count = query.count() #elif query.has_limit(): # count = query.limit() #else: # count = _BATCH_SIZE #cursor.PopulateQueryResult(query_result, count, # query.offset(), compile=query.compile()) query_result.result_list().extend(r._ToPb() for r in results) query_result.set_skipped_results(len(results)) query_result.set_keys_only(keys_only) if query.compile(): compiled_query = query_result.mutable_compiled_query() compiled_query.set_keys_only(query.keys_only()) compiled_query.mutable_primaryscan().set_index_name(query.Encode())
def _Dynamic_RunQuery(self, query, query_result): client = self._GetThriftClient() kind = query.kind() keys_only = query.keys_only() filters = query.filter_list() orders = query.order_list() offset = query.offset() limit = query.limit() namespace = query.name_space() if filters or orders: row_limit = 0 else: row_limit = offset + limit scanner_id = None try: ns = client.open_namespace('%s/%s' % (self.__app_id, namespace)) scanner_id = client.open_scanner( ns, kind, ScanSpec(columns=['entity'], row_limit=row_limit, revs=1, keys_only=keys_only), True) total_cells = [] while True: cells = client.next_cells(scanner_id) if len(cells) > 0: total_cells += cells else: break except ClientException: log.warning('No data for %s' % kind) client.close() return finally: if scanner_id: client.close_scanner(scanner_id) # make a cell-key dictionary key_cell_dict = {} for cell in total_cells: if key_cell_dict.has_key(cell.key.row): key_cell_dict[cell.key.row].append(cell) else: key_cell_dict[cell.key.row] = [cell] pb_entities = [] for key in key_cell_dict: key_obj = datastore_types.Key(encoded=key) key_pb = key_obj._ToPb() for cell in key_cell_dict[key]: if cell.key.column_family == 'entity' and cell.key.column_qualifier == 'proto': entity_proto = entity_pb.EntityProto(str(cell.value)) entity_proto.mutable_key().CopyFrom(key_pb) pb_entities.append(entity_proto) results = map(lambda entity: datastore.Entity.FromPb(entity), pb_entities) query.set_app(self.__app_id) datastore_types.SetNamespace(query, namespace) encoded = datastore_types.EncodeAppIdNamespace(self.__app_id, namespace) operators = { datastore_pb.Query_Filter.LESS_THAN: '<', datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: '<=', datastore_pb.Query_Filter.GREATER_THAN: '>', datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=', datastore_pb.Query_Filter.EQUAL: '==', } def has_prop_indexed(entity, prop): """Returns True if prop is in the entity and is indexed.""" if prop in datastore_types._SPECIAL_PROPERTIES: return True elif prop in entity.unindexed_properties(): return False values = entity.get(prop, []) if not isinstance(values, (tuple, list)): values = [values] for value in values: if type(value) not in datastore_types._RAW_PROPERTY_TYPES: return True return False for filt in filters: assert filt.op() != datastore_pb.Query_Filter.IN prop = filt.property(0).name().decode('utf-8') op = operators[filt.op()] filter_val_list = [ datastore_types.FromPropertyPb(filter_prop) for filter_prop in filt.property_list() ] def passes_filter(entity): """Returns True if the entity passes the filter, False otherwise. The filter being evaluated is filt, the current filter that we're on in the list of filters in the query. """ if not has_prop_indexed(entity, prop): return False try: entity_vals = datastore._GetPropertyValue(entity, prop) except KeyError: entity_vals = [] if not isinstance(entity_vals, list): entity_vals = [entity_vals] for fixed_entity_val in entity_vals: for filter_val in filter_val_list: fixed_entity_type = self._PROPERTY_TYPE_TAGS.get( fixed_entity_val.__class__) filter_type = self._PROPERTY_TYPE_TAGS.get( filter_val.__class__) if fixed_entity_type == filter_type: comp = u'%r %s %r' % (fixed_entity_val, op, filter_val) elif op != '==': comp = '%r %s %r' % (fixed_entity_type, op, filter_type) else: continue logging.log(logging.DEBUG - 1, 'Evaling filter expression "%s"', comp) try: ret = eval(comp) if ret and ret != NotImplementedError: return True except TypeError: pass return False results = filter(passes_filter, results) for order in orders: prop = order.property().decode('utf-8') results = [ entity for entity in results if has_prop_indexed(entity, prop) ] def order_compare_entities(a, b): """ Return a negative, zero or positive number depending on whether entity a is considered smaller than, equal to, or larger than b, according to the query's orderings. """ cmped = 0 for o in orders: prop = o.property().decode('utf-8') reverse = (o.direction() is datastore_pb.Query_Order.DESCENDING) a_val = datastore._GetPropertyValue(a, prop) if isinstance(a_val, list): a_val = sorted(a_val, order_compare_properties, reverse=reverse)[0] b_val = datastore._GetPropertyValue(b, prop) if isinstance(b_val, list): b_val = sorted(b_val, order_compare_properties, reverse=reverse)[0] cmped = order_compare_properties(a_val, b_val) if o.direction() is datastore_pb.Query_Order.DESCENDING: cmped = -cmped if cmped != 0: return cmped if cmped == 0: return cmp(a.key(), b.key()) def order_compare_properties(x, y): """Return a negative, zero or positive number depending on whether property value x is considered smaller than, equal to, or larger than property value y. If x and y are different types, they're compared based on the type ordering used in the real datastore, which is based on the tag numbers in the PropertyValue PB. """ if isinstance(x, datetime.datetime): x = datastore_types.DatetimeToTimestamp(x) if isinstance(y, datetime.datetime): y = datastore_types.DatetimeToTimestamp(y) x_type = self._PROPERTY_TYPE_TAGS.get(x.__class__) y_type = self._PROPERTY_TYPE_TAGS.get(y.__class__) if x_type == y_type: try: return cmp(x, y) except TypeError: return 0 else: return cmp(x_type, y_type) results.sort(order_compare_entities) cursor = _Cursor(query, results, order_compare_entities) self.__queries[cursor.cursor] = cursor if query.has_count(): count = query.count() elif query.has_limit(): count = query.limit() else: count = _BATCH_SIZE cursor.PopulateQueryResult(query_result, count, query.offset(), compile=query.compile()) if query.compile(): compiled_query = query_result.mutable_compiled_query() compiled_query.set_keys_only(query.keys_only()) compiled_query.mutable_primaryscan().set_index_name(query.Encode()) client.close()
def _Dynamic_RunQuery(self, query, query_result): kind = query.kind() keys_only = query.keys_only() filters = query.filter_list() orders = query.order_list() offset = query.offset() limit = query.limit() namespace = query.name_space() #predicate = query.predicate() table_name = str('%s_%s' % (self._app_id, kind)) table = self._client.open_table(table_name) scan_spec_builder = ht.ScanSpecBuilder() scan_spec_builder.set_max_versions(1) if filters or orders: scan_spec_builder.set_row_limit(0) else: scan_spec_builder.set_row_limit(offset + limit) # get the hypertable cells total_cells = [ cell for cell in table.create_scanner(scan_spec_builder) ] # make a cell-key dictionary key_cell_dict = {} for cell in total_cells: if key_cell_dict.has_key(cell.row_key): key_cell_dict[cell.row_key].append(cell) else: key_cell_dict[cell.row_key] = [cell] results = [] for key in key_cell_dict: key_obj = datastore_types.Key(encoded=key) entity = datastore.Entity(kind, _app=self._app_id, name=key_obj.name(), id=key_obj.id()) for cell in key_cell_dict[key]: if cell.column_family == 'props': entity[cell.column_qualifier] = pickle.loads(cell.value) results.append(entity) query.set_app(self._app_id) datastore_types.SetNamespace(query, namespace) encoded = datastore_types.EncodeAppIdNamespace(self._app_id, namespace) operators = { datastore_pb.Query_Filter.LESS_THAN: '<', datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: '<=', datastore_pb.Query_Filter.GREATER_THAN: '>', datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=', datastore_pb.Query_Filter.EQUAL: '==', } def has_prop_indexed(entity, prop): """Returns True if prop is in the entity and is indexed.""" if prop in datastore_types._SPECIAL_PROPERTIES: return True elif prop in entity.unindexed_properties(): return False values = entity.get(prop, []) if not isinstance(values, (tuple, list)): values = [values] for value in values: if type(value) not in datastore_types._RAW_PROPERTY_TYPES: return True return False for filt in filters: assert filt.op() != datastore_pb.Query_Filter.IN prop = filt.property(0).name().decode('utf-8') op = operators[filt.op()] filter_val_list = [ datastore_types.FromPropertyPb(filter_prop) for filter_prop in filt.property_list() ] def passes_filter(entity): """Returns True if the entity passes the filter, False otherwise. The filter being evaluated is filt, the current filter that we're on in the list of filters in the query. """ log.debug('filter check for entity: %r' % entity) if not has_prop_indexed(entity, prop): return False try: entity_vals = datastore._GetPropertyValue(entity, prop) except KeyError: entity_vals = [] if not isinstance(entity_vals, list): entity_vals = [entity_vals] for fixed_entity_val in entity_vals: for filter_val in filter_val_list: fixed_entity_type = self._PROPERTY_TYPE_TAGS.get( fixed_entity_val.__class__) filter_type = self._PROPERTY_TYPE_TAGS.get( filter_val.__class__) if fixed_entity_type == filter_type: comp = u'%r %s %r' % (fixed_entity_val, op, filter_val) elif op != '==': comp = '%r %s %r' % (fixed_entity_type, op, filter_type) else: continue logging.log(logging.DEBUG - 1, 'Evaling filter expression "%s"', comp) try: ret = eval(comp) if ret and ret != NotImplementedError: return True except TypeError: pass return False results = filter(passes_filter, results) log.debug('entity list after filter operation: %r' % results) for order in orders: prop = order.property().decode('utf-8') results = [ entity for entity in results if has_prop_indexed(entity, prop) ] def order_compare_entities(a, b): """ Return a negative, zero or positive number depending on whether entity a is considered smaller than, equal to, or larger than b, according to the query's orderings. """ cmped = 0 for o in orders: prop = o.property().decode('utf-8') reverse = (o.direction() is datastore_pb.Query_Order.DESCENDING) a_val = datastore._GetPropertyValue(a, prop) if isinstance(a_val, list): a_val = sorted(a_val, order_compare_properties, reverse=reverse)[0] b_val = datastore._GetPropertyValue(b, prop) if isinstance(b_val, list): b_val = sorted(b_val, order_compare_properties, reverse=reverse)[0] cmped = order_compare_properties(a_val, b_val) if o.direction() is datastore_pb.Query_Order.DESCENDING: cmped = -cmped if cmped != 0: return cmped if cmped == 0: return cmp(a.key(), b.key()) def order_compare_properties(x, y): """Return a negative, zero or positive number depending on whether property value x is considered smaller than, equal to, or larger than property value y. If x and y are different types, they're compared based on the type ordering used in the real datastore, which is based on the tag numbers in the PropertyValue PB. """ if isinstance(x, datetime.datetime): x = datastore_types.DatetimeToTimestamp(x) if isinstance(y, datetime.datetime): y = datastore_types.DatetimeToTimestamp(y) x_type = self._PROPERTY_TYPE_TAGS.get(x.__class__) y_type = self._PROPERTY_TYPE_TAGS.get(y.__class__) if x_type == y_type: try: return cmp(x, y) except TypeError: return 0 else: return cmp(x_type, y_type) results.sort(order_compare_entities) cursor = _Cursor(query, results, order_compare_entities) self.__queries[cursor.cursor] = cursor if query.has_count(): count = query.count() elif query.has_limit(): count = query.limit() else: count = _BATCH_SIZE cursor.PopulateQueryResult(query_result, count, query.offset(), compile=query.compile()) if query.compile(): compiled_query = query_result.mutable_compiled_query() compiled_query.set_keys_only(query.keys_only()) compiled_query.mutable_primaryscan().set_index_name(query.Encode())