def process(self, query, *args, **kwargs): # distinct key to be used to group query splits. key = 1 # If query has a user set limit, then the query cannot be split. if query.HasField('limit'): return [(key, query)] # Compute the estimated numSplits if not specified by the user. if self._num_splits == 0: estimated_num_splits = ReadFromDatastore.get_estimated_num_splits( self._project, self._datastore_namespace, self._query, self._datastore) else: estimated_num_splits = self._num_splits logging.info("Splitting the query into %d splits", estimated_num_splits) try: query_splits = query_splitter.get_splits( self._datastore, query, estimated_num_splits, helper.make_partition(self._project, self._datastore_namespace)) except Exception: logging.warning("Unable to parallelize the given query: %s", query, exc_info=True) query_splits = [query] sharded_query_splits = [] for split_query in query_splits: sharded_query_splits.append((key, split_query)) key += 1 return sharded_query_splits