def _rank_aggregate(self,query): """(QueryObject) -> list of json object Calculate the total rank score for each object associated with the query. """ ############## ## matching ## ############## collection = self.mongo_connection["index"]["termdoc"] with Profiler("index_DB_access_for_approx_match"): results_cursor = list(collection.find(query.match_statement)) results = self._groupbyDoc(query,results_cursor) return results
def execute(self,query): """ (QueryObject) -> list of json objects execute the local aggregator function. (similar to A.local() in echo context) """ ######################### # matching + projection # ######################### # assign local variables object_collection = self.mongo_connection["sensor"]["objects"] limit = query.parameters['limit'] # also filter out 'content' and '_id' fields if not query.projection_statement or len(query.projection_statement)==0: query.projection_statement = {"content":0} #,"_id":0} #TODO: should have else: filter "_id":0 if query.parameters['isApprox']: # Approx. match # get index objects results = self._rank_aggregate(query) sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True) sorted_results = sorted_results[:limit] # get list of real-object oids from the index oid_list = map(operator.itemgetter('document'),sorted_results) with Profiler("object_DB_access_for_approx_match"): # get actual objects if oid_list: real_object_results = list(object_collection.find({"_id":{'$in':oid_list}},query.projection_statement)) else: return [] # add 'total_score' attribute to the real object temp = {} for obj in real_object_results: temp[obj['_id']] = obj for obj_index in sorted_results: try: temp[obj_index['document']]['total_score'] = obj_index['total_score'] except KeyError: logging.warning("mismatch index object and real object : " + str(obj_index['document'])) results = temp.itervalues() sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True) else: # exact match with Profiler("object_DB_access_for_exact_match"): results_cursor = object_collection.find(query.match_statement,query.projection_statement,limit=limit if limit else 0) sorted_results = list(results_cursor) ######################## # Aggregation function # ######################## ## unique if query.aggregation_function_list and len(query.aggregation_function_list)==1 and 'unique' in query.aggregation_function_list[0]: temp = {} func_name,attr_name = query.aggregation_function_list[0] # generator object of values of an attribute that exists generator_values = (obj[attr_name] for obj in sorted_results if attr_name in obj) for attr_value in generator_values: temp[attr_value] = 1 sorted_results = [{attr_name:list(temp.iterkeys())}] return sorted_results temp_results = [] # perform group by if query.group_attribute_list: #filter the object that doesn't have the group attribute out sorted_results = filter( lambda x : all (k in x for k in query.group_attribute_list) , sorted_results) # sort base on attributes in a group_attribute_list sorted_results = sorted(sorted_results,key=operator.itemgetter(*query.group_attribute_list)) # perform group by for k,g in itertools.groupby(sorted_results,key = operator.itemgetter(*query.group_attribute_list)): # perform aggregate on g obj = self._aggr_perform(query.aggregation_function_list,g) # add to result object for i in range(len(query.group_attribute_list)): obj[query.group_attribute_list[i]] = k[i] if isinstance(k,tuple) else k temp_results.append(obj) sorted_results = temp_results ## normal aggregation case elif query.aggregation_function_list: sorted_results = [self._aggr_perform(query.aggregation_function_list,sorted_results)] ## don't perform an aggregation else: pass return sorted_results