def __init__(self, partition_key_target_range, client, collection_link,
                 query, document_producer_comp, options):
        '''
        Constructor
        '''
        self._options = options
        self._partition_key_target_range = partition_key_target_range
        self._doc_producer_comp = document_producer_comp
        self._client = client
        self._buffer = deque()

        self._is_finished = False
        self._has_started = False
        self._cur_item = None
        # initiate execution context

        path = base.GetPathFromLink(collection_link, 'docs')
        collection_id = base.GetResourceIdOrFullNameFromLink(collection_link)

        def fetch_fn(options):
            return self._client.QueryFeed(path, collection_id, query, options,
                                          partition_key_target_range['id'])

        self._ex_context = _DefaultQueryExecutionContext(
            client, self._options, fetch_fn)
    def get_overlapping_ranges(self, collection_link, partition_key_ranges):
        '''
        Given a partition key range and a collection, 
        returns the list of overlapping partition key ranges
        
        :param str collection_link:
            The name of the collection.
        :param list partition_key_range: 
            List of partition key range.
        
        :return:
            List of overlapping partition key ranges.
        :rtype: list
        '''
        cl = self._documentClient

        collection_id = base.GetResourceIdOrFullNameFromLink(collection_link)

        collection_routing_map = self._collection_routing_map_by_item.get(
            collection_id)
        if collection_routing_map is None:
            collection_pk_ranges = list(
                cl._ReadPartitionKeyRanges(collection_link))
            # for large collections, a split may complete between the read partition key ranges query page responses,
            # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need
            # to discard the parent ranges to have a valid routing map.
            collection_pk_ranges = _PartitionKeyRangeCache._discard_parent_ranges(
                collection_pk_ranges)
            collection_routing_map = _CollectionRoutingMap.CompleteRoutingMap(
                [(r, True) for r in collection_pk_ranges], collection_id)
            self._collection_routing_map_by_item[
                collection_id] = collection_routing_map
        return collection_routing_map.get_overlapping_ranges(
            partition_key_ranges)
Exemplo n.º 3
0
    def find_docs_by_partition_key_range_id(self):
        query = {'query': 'SELECT * FROM root r'}

        partition_key_range = list(
            self.client.client_connection._ReadPartitionKeyRanges(
                self.collection_link))
        docs_by_partition_key_range_id = {}
        for r in partition_key_range:
            options = {}

            path = base.GetPathFromLink(self.collection_link, 'docs')
            collection_id = base.GetResourceIdOrFullNameFromLink(
                self.collection_link)

            def fetch_fn(options):
                return self.client.client_connection.QueryFeed(
                    path, collection_id, query, options, r['id'])

            docResultsIterable = query_iterable.QueryIterable(
                self.client.client_connection, query, options, fetch_fn,
                self.collection_link)

            docs = list(docResultsIterable)
            self.assertFalse(r['id'] in docs_by_partition_key_range_id)
            docs_by_partition_key_range_id[r['id']] = docs
        return docs_by_partition_key_range_id
Exemplo n.º 4
0
    def __init__(self, client, options, database_link, query, partition_key):
        """
        Constructor
        :param CosmosClient client:
        :param dict options:
            The request options for the request.
        :param str database_link: database self link or ID based link
        :param (str or dict) query:
            Partition_key (str): partition key for the query
        
        """
        super(_MultiCollectionQueryExecutionContext,
              self).__init__(client, options)

        self._current_collection_index = 0
        self._collection_links = []
        self._collection_links_length = 0

        self._query = query
        self._client = client

        partition_resolver = client.GetPartitionResolver(database_link)

        if (partition_resolver is None):
            raise ValueError(client.PartitionResolverErrorMessage)
        else:
            self._collection_links = partition_resolver.ResolveForRead(
                partition_key)

        self._collection_links_length = len(self._collection_links)

        if self._collection_links is None:
            raise ValueError("_collection_links is None.")

        if self._collection_links_length <= 0:
            raise ValueError("_collection_links_length is not greater than 0.")

        # Creating the QueryFeed for the first collection
        path = base.GetPathFromLink(
            self._collection_links[self._current_collection_index], 'docs')
        collection_id = base.GetResourceIdOrFullNameFromLink(
            self._collection_links[self._current_collection_index])

        self._current_collection_index += 1

        def fetch_fn(options):
            return client.QueryFeed(path, collection_id, query, options)

        self._fetch_function = fetch_fn
Exemplo n.º 5
0
    def _fetch_next_block(self):
        """Fetches the next block of query results.
        
        This iterates fetches the next block of results from the current collection link.
        Once the current collection results were exhausted. It moves to the next collection link.

        :return:
            List of fetched items.
        :rtype: list
        """
        # Fetch next block of results by executing the query against the current document collection
        fetched_items = self._fetch_items_helper_with_retries(
            self._fetch_function)

        # If there are multiple document collections to query for(in case of partitioning), keep looping through each one of them,
        # creating separate feed queries for each collection and fetching the items
        while not fetched_items:
            if self._collection_links and self._current_collection_index < self._collection_links_length:
                path = base.GetPathFromLink(
                    self._collection_links[self._current_collection_index],
                    'docs')
                collection_id = base.GetResourceIdOrFullNameFromLink(
                    self._collection_links[self._current_collection_index])

                self._continuation = None
                self._has_started = False

                def fetch_fn(options):
                    return self._client.QueryFeed(path, collection_id,
                                                  self._query, options)

                self._fetch_function = fetch_fn

                fetched_items = self._fetch_items_helper_with_retries(
                    self._fetch_function)
                self._current_collection_index += 1
            else:
                break

        return fetched_items
Exemplo n.º 6
0
    def _test_default_execution_context(self, options, query,
                                        expected_number_of_results):

        page_size = options['maxItemCount']
        collection_link = self.GetDocumentCollectionLink(
            self.created_db, self.created_collection)
        path = base.GetPathFromLink(collection_link, 'docs')
        collection_id = base.GetResourceIdOrFullNameFromLink(collection_link)

        def fetch_fn(options):
            return self.client.client_connection.QueryFeed(
                path, collection_id, query, options)

        ######################################
        # test next() behavior
        ######################################
        ex = base_execution_context._DefaultQueryExecutionContext(
            self.client.client_connection, options, fetch_fn)

        it = ex.__iter__()

        def invokeNext():
            return next(it)

        results = {}
        # validate that invocations of next() produces the same results as expected
        for _ in xrange(expected_number_of_results):
            item = invokeNext()
            results[item['id']] = item

        self.assertEqual(len(results), expected_number_of_results)

        # after the result set is exhausted, invoking next must raise a StopIteration exception
        self.assertRaises(StopIteration, invokeNext)

        ######################################
        # test fetch_next_block() behavior
        ######################################
        ex = base_execution_context._DefaultQueryExecutionContext(
            self.client.client_connection, options, fetch_fn)

        results = {}
        cnt = 0
        while True:
            fetched_res = ex.fetch_next_block()
            fetched_size = len(fetched_res)

            for item in fetched_res:
                results[item['id']] = item
            cnt += fetched_size

            if (cnt < expected_number_of_results):
                # backend may not necessarily return exactly page_size of results
                self.assertEqual(fetched_size, page_size, "page size")
            else:
                if cnt == expected_number_of_results:
                    self.assertTrue(fetched_size <= page_size,
                                    "last page size")
                    break
                else:
                    #cnt > expected_number_of_results
                    self.fail("more results than expected")

        # validate the number of collected results
        self.assertEqual(len(results), expected_number_of_results)

        # no more results will be returned
        self.assertEqual(ex.fetch_next_block(), [])