Пример #1
0
def model_datastoreio():
  """Using a Read and Write transform to read/write to Cloud Datastore."""

  import uuid
  from google.cloud.proto.datastore.v1 import entity_pb2
  from google.cloud.proto.datastore.v1 import query_pb2
  import googledatastore
  import apache_beam as beam
  from apache_beam.options.pipeline_options import PipelineOptions
  from apache_beam.io.gcp.datastore.v1.datastoreio import ReadFromDatastore
  from apache_beam.io.gcp.datastore.v1.datastoreio import WriteToDatastore

  project = 'my_project'
  kind = 'my_kind'
  query = query_pb2.Query()
  query.kind.add().name = kind

  # [START model_datastoreio_read]
  p = beam.Pipeline(options=PipelineOptions())
  entities = p | 'Read From Datastore' >> ReadFromDatastore(project, query)
  # [END model_datastoreio_read]

  # [START model_datastoreio_write]
  p = beam.Pipeline(options=PipelineOptions())
  musicians = p | 'Musicians' >> beam.Create(
      ['Mozart', 'Chopin', 'Beethoven', 'Vivaldi'])

  def to_entity(content):
    entity = entity_pb2.Entity()
    googledatastore.helper.add_key_path(entity.key, kind, str(uuid.uuid4()))
    googledatastore.helper.add_properties(entity, {'content': unicode(content)})
    return entity

  entities = musicians | 'To Entity' >> beam.Map(to_entity)
  entities | 'Write To Datastore' >> WriteToDatastore(project)
 def test_get_splits_query_with_unsupported_filter(self):
     query = query_pb2.Query()
     query.kind.add()
     test_filter = query.filter.composite_filter.filters.add()
     test_filter.property_filter.op = PropertyFilter.GREATER_THAN
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       2)
    def test_get_splits_query_with_order(self):
        query = query_pb2.Query()
        query.kind.add()
        query.order.add()

        self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                          3)
Пример #4
0
    def test__build_protobuf_all_values(self):
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None, None)
        query = Query(client)
        limit = 15
        offset = 9
        start_bytes = b'i\xb7\x1d'
        start_cursor = 'abcd'
        end_bytes = b'\xc3\x1c\xb3'
        end_cursor = 'wxyz'
        iterator = self._make_one(
            query, client, limit=limit, offset=offset,
            start_cursor=start_cursor, end_cursor=end_cursor)
        self.assertEqual(iterator.max_results, limit)
        iterator.num_results = 4
        iterator._skipped_results = 1

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query(
            start_cursor=start_bytes,
            end_cursor=end_bytes,
            offset=offset - iterator._skipped_results,
        )
        expected_pb.limit.value = limit - iterator.num_results
        self.assertEqual(pb, expected_pb)
Пример #5
0
    def _next_page_helper(self, txn_id=None):
        from google.cloud.iterator import Page
        from google.cloud.proto.datastore.v1 import datastore_pb2
        from google.cloud.proto.datastore.v1 import entity_pb2
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        more_enum = query_pb2.QueryResultBatch.NOT_FINISHED
        result = _make_query_response([], b'', more_enum, 0)
        project = 'prujekt'
        ds_api = _make_datastore_api(result)
        if txn_id is None:
            client = _Client(project, datastore_api=ds_api)
        else:
            transaction = mock.Mock(id=txn_id, spec=['id'])
            client = _Client(
                project, datastore_api=ds_api, transaction=transaction)

        query = Query(client)
        iterator = self._make_one(query, client)

        page = iterator._next_page()
        self.assertIsInstance(page, Page)
        self.assertIs(page._parent, iterator)

        partition_id = entity_pb2.PartitionId(project_id=project)
        if txn_id is None:
            read_options = datastore_pb2.ReadOptions()
        else:
            read_options = datastore_pb2.ReadOptions(transaction=txn_id)
        empty_query = query_pb2.Query()
        ds_api.run_query.assert_called_once_with(
            project, partition_id, read_options, query=empty_query)
Пример #6
0
    def get_namespaces(self):
        # Skip auth-ing to db in test operations
        if not self.argv:
            return ['4952435991248896_1']

        query_pb = query_pb2.Query()
        helper.set_kind(query_pb, "__namespace__")
        client = apache_helper.get_datastore(PROJECT)
        namespace_entities = apache_helper.fetch_entities(
            PROJECT, '', query_pb, client)

        namespaces = []
        for n in namespace_entities:
            # Get namespace name or id
            key_path = n.key.path[-1]
            if key_path.HasField('id'):
                name_or_id = key_path.id
            else:
                name_or_id = key_path.name

            # Avoid duplicates and test namespaces
            if len(str(name_or_id)) > 1 and name_or_id not in namespaces:
                namespaces.append(name_or_id)

        return namespaces
Пример #7
0
def _create_split(last_key, next_key, query):
    """Create a new {@link Query} given the query and range..

  Args:
    last_key: the previous key. If null then assumed to be the beginning.
    next_key: the next key. If null then assumed to be the end.
    query: the desired query.

  Returns:
    A split query with fetches entities in the range [last_key, next_key)
  """
    if not (last_key or next_key):
        return query

    split_query = query_pb2.Query()
    split_query.CopyFrom(query)
    composite_filter = split_query.filter.composite_filter
    composite_filter.op = CompositeFilter.AND

    if query.HasField('filter'):
        composite_filter.filters.add().CopyFrom(query.filter)

    if last_key:
        lower_bound = composite_filter.filters.add()
        lower_bound.property_filter.property.name = KEY_PROPERTY_NAME
        lower_bound.property_filter.op = PropertyFilter.GREATER_THAN_OR_EQUAL
        lower_bound.property_filter.value.key_value.CopyFrom(last_key)

    if next_key:
        upper_bound = composite_filter.filters.add()
        upper_bound.property_filter.property.name = KEY_PROPERTY_NAME
        upper_bound.property_filter.op = PropertyFilter.LESS_THAN
        upper_bound.property_filter.value.key_value.CopyFrom(next_key)

    return split_query
Пример #8
0
 def split_query(self, query, num_splits):
     """Generate dummy query splits."""
     split_queries = []
     for _ in range(0, num_splits):
         q = query_pb2.Query()
         q.CopyFrom(query)
         split_queries.append(q)
     return split_queries
    def test_get_splits_with_two_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 2
        num_entities = 97
        batch_size = 9

        self.check_get_splits(query, num_splits, num_entities, batch_size)
    def test_get_splits_with_multiple_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 369
        batch_size = 12

        self.check_get_splits(query, num_splits, num_entities, batch_size)
    def test_get_splits_with_large_num_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 10
        num_entities = 4
        batch_size = 10

        self.check_get_splits(query, num_splits, num_entities, batch_size)
    def test_get_splits_with_batch_size_exact_multiple(self):
        """Test get_splits when num scatter keys is a multiple of batch size."""
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 400
        batch_size = 32

        self.check_get_splits(query, num_splits, num_entities, batch_size)
    def test_get_splits_with_large_batch_size(self):
        """Test get_splits when all scatter keys are retured in a single req."""
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 400
        batch_size = 500

        self.check_get_splits(query, num_splits, num_entities, batch_size)
Пример #14
0
    def test__build_protobuf_empty(self):
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None, None)
        query = Query(client)
        iterator = self._make_one(query, client)

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query()
        self.assertEqual(pb, expected_pb)
Пример #15
0
def make_latest_timestamp_query(namespace):
    """Make a Query to fetch the latest timestamp statistics."""
    query = query_pb2.Query()
    if namespace is None:
        query.kind.add().name = '__Stat_Total__'
    else:
        query.kind.add().name = '__Stat_Ns_Total__'

    # Descending order of `timestamp`
    datastore_helper.add_property_orders(query, "-timestamp")
    # Only get the latest entity
    query.limit.value = 1
    return query
 def test_create_scatter_query(self):
     query = query_pb2.Query()
     kind = query.kind.add()
     kind.name = 'shakespeare-demo'
     num_splits = 10
     scatter_query = query_splitter._create_scatter_query(query, num_splits)
     self.assertEqual(scatter_query.kind[0], kind)
     self.assertEqual(scatter_query.limit.value,
                      (num_splits - 1) * query_splitter.KEYS_PER_SPLIT)
     self.assertEqual(scatter_query.order[0].direction,
                      query_pb2.PropertyOrder.ASCENDING)
     self.assertEqual(scatter_query.projection[0].property.name,
                      query_splitter.KEY_PROPERTY_NAME)
Пример #17
0
def make_ancestor_query(kind, namespace, ancestor):
  """Creates a Cloud Datastore ancestor query."""
  ancestor_key = entity_pb2.Key()
  datastore_helper.add_key_path(ancestor_key, kind, ancestor)
  if namespace is not None:
    ancestor_key.partition_id.namespace_id = namespace

  query = query_pb2.Query()
  query.kind.add().name = kind

  datastore_helper.set_property_filter(
      query.filter, '__key__', PropertyFilter.HAS_ANCESTOR, ancestor_key)

  return query
Пример #18
0
 def create_query(self, kinds=(), order=False, limit=None, offset=None,
                  inequality_filter=False):
   query = query_pb2.Query()
   for kind in kinds:
     query.kind.add().name = kind
   if order:
     query.order.add()
   if limit is not None:
     query.limit.value = limit
   if offset is not None:
     query.offset = offset
   if inequality_filter:
     test_filter = query.filter.composite_filter.filters.add()
     test_filter.property_filter.op = PropertyFilter.GREATER_THAN
   return query
Пример #19
0
def make_query(kind):
    """Creates a Cloud Datastore query to retrieve all entities with a
  'created_at' date > N days ago.
  """
    days = 4
    now = datetime.datetime.now()
    earlier = now - datetime.timedelta(days=days)

    query = query_pb2.Query()
    query.kind.add().name = kind

    datastore_helper.set_property_filter(query.filter, 'created_at',
                                         PropertyFilter.GREATER_THAN, earlier)

    return query
Пример #20
0
    def query(self):
        # Instantiate a filter protobuf
        # You MUST instantiate the filter before the query, then instantiate
        # the query with the filter.
        filter_pb = query_pb2.Filter()

        # Get all non-deleted model instances
        helper.set_property_filter(filter_pb, 'deleted',
                                   query_pb2.PropertyFilter.EQUAL, False)

        # Instantiate a query protobuf
        query_pb = query_pb2.Query(filter=filter_pb)
        helper.set_kind(query_pb, self.model)

        return query_pb
Пример #21
0
    def setUp(self):
        self._mock_datastore = MagicMock()
        self._query = query_pb2.Query()
        self._query.kind.add().name = 'dummy_kind'
        patch_retry(self, helper)
        self._retriable_errors = [
            RPCError("dummy", code_pb2.INTERNAL, "failed"),
            SocketError(errno.ECONNRESET, "Connection Reset"),
            SocketError(errno.ETIMEDOUT, "Timed out")
        ]

        self._non_retriable_errors = [
            RPCError("dummy", code_pb2.UNAUTHENTICATED, "failed"),
            SocketError(errno.EADDRNOTAVAIL, "Address not available")
        ]
Пример #22
0
def make_ancestor_query(kind, namespace, ancestor):
  """Creates a Cloud Datastore ancestor query.
  The returned query will fetch all the entities that have the parent key name
  set to the given `ancestor`.
  """
  ancestor_key = entity_pb2.Key()
  datastore_helper.add_key_path(ancestor_key, kind, ancestor)
  if namespace is not None:
    ancestor_key.partition_id.namespace_id = namespace

  query = query_pb2.Query()
  query.kind.add().name = kind

  datastore_helper.set_property_filter(
      query.filter, '__key__', PropertyFilter.HAS_ANCESTOR, ancestor_key)

  return query
Пример #23
0
    def expand(self, pcoll):
        query = query_pb2.Query()
        query.kind.add().name = 'Tweet'
        now = datetime.datetime.now()
        # The 'earlier' var will be set to a static value on template creation.
        # That is, because of the way that templates work, the value is defined
        # at template compile time, not runtime.
        # But defining a filter based on this value will still serve to make the
        # query more efficient than if we didn't filter at all.
        earlier = now - datetime.timedelta(days=self.days)
        datastore_helper.set_property_filter(query.filter, 'created_at',
                                             PropertyFilter.GREATER_THAN,
                                             earlier)

        return (pcoll
                | 'read from datastore' >> ReadFromDatastore(
                    self.project, query, None))
Пример #24
0
def make_kind_stats_query(namespace, kind, latest_timestamp):
  """Make a Query to fetch the latest kind statistics."""
  kind_stat_query = query_pb2.Query()
  if namespace is None:
    kind_stat_query.kind.add().name = '__Stat_Kind__'
  else:
    kind_stat_query.kind.add().name = '__Stat_Ns_Kind__'

  kind_filter = datastore_helper.set_property_filter(
      query_pb2.Filter(), 'kind_name', PropertyFilter.EQUAL, unicode(kind))
  timestamp_filter = datastore_helper.set_property_filter(
      query_pb2.Filter(), 'timestamp', PropertyFilter.EQUAL,
      latest_timestamp)

  datastore_helper.set_composite_filter(kind_stat_query.filter,
                                        CompositeFilter.AND, kind_filter,
                                        timestamp_filter)
  return kind_stat_query
Пример #25
0
def _create_scatter_query(query, num_splits):
    """Creates a scatter query from the given user query."""

    scatter_query = query_pb2.Query()
    for kind in query.kind:
        scatter_kind = scatter_query.kind.add()
        scatter_kind.CopyFrom(kind)

    # ascending order
    datastore_helper.add_property_orders(scatter_query, SCATTER_PROPERTY_NAME)

    # There is a split containing entities before and after each scatter entity:
    # ||---*------*------*------*------*------*------*---||  * = scatter entity
    # If we represent each split as a region before a scatter entity, there is an
    # extra region following the last scatter point. Thus, we do not need the
    # scatter entity for the last region.
    scatter_query.limit.value = (num_splits - 1) * KEYS_PER_SPLIT
    datastore_helper.add_projection(scatter_query, KEY_PROPERTY_NAME)

    return scatter_query
Пример #26
0
    def test__next_page(self):
        from google.cloud.iterator import Page
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        connection = _Connection()
        more_enum = query_pb2.QueryResultBatch.NOT_FINISHED
        result = _make_query_response([], b'', more_enum, 0)
        connection._results = [result]
        project = 'prujekt'
        client = _Client(project, connection)
        query = Query(client)
        iterator = self._make_one(query, client)

        page = iterator._next_page()
        self.assertIsInstance(page, Page)
        self.assertIs(page._parent, iterator)

        self.assertEqual(connection._called_with, [{
            'query_pb': query_pb2.Query(),
            'project': project,
            'namespace': None,
            'transaction_id': None,
        }])
Пример #27
0
def run(argv=None):
    """Main entry point."""

    parser = argparse.ArgumentParser()

    parser.add_argument('--kind',
                        dest='kind',
                        default='writereadtest',
                        help='Datastore Kind')
    parser.add_argument('--num_entities',
                        dest='num_entities',
                        type=int,
                        required=True,
                        help='Number of entities to write')
    parser.add_argument('--limit',
                        dest='limit',
                        type=int,
                        help='Limit of number of entities to write')

    known_args, pipeline_args = parser.parse_known_args(argv)
    pipeline_options = PipelineOptions(pipeline_args)
    pipeline_options.view_as(SetupOptions).save_main_session = True
    gcloud_options = pipeline_options.view_as(GoogleCloudOptions)
    job_name = gcloud_options.job_name
    kind = known_args.kind
    num_entities = known_args.num_entities
    project = gcloud_options.project
    # a random ancesor key
    ancestor = str(uuid.uuid4())
    query = make_ancestor_query(kind, None, ancestor)

    # Pipeline 1: Create and write the specified number of Entities to the
    # Cloud Datastore.
    logging.info('Writing %s entities to %s', num_entities, project)
    p = new_pipeline_with_job_name(pipeline_options, job_name, '-write')

    # pylint: disable=expression-not-assigned
    (p
     | 'Input' >> beam.Create(list(range(known_args.num_entities)))
     | 'To String' >> beam.Map(str)
     |
     'To Entity' >> beam.Map(EntityWrapper(kind, None, ancestor).make_entity)
     | 'Write to Datastore' >> WriteToDatastore(project))

    p.run()

    # Optional Pipeline 2: If a read limit was provided, read it and confirm
    # that the expected entities were read.
    if known_args.limit is not None:
        logging.info(
            'Querying a limited set of %s entities and verifying count.',
            known_args.limit)
        p = new_pipeline_with_job_name(pipeline_options, job_name,
                                       '-verify-limit')
        query_with_limit = query_pb2.Query()
        query_with_limit.CopyFrom(query)
        query_with_limit.limit.value = known_args.limit
        entities = p | 'read from datastore' >> ReadFromDatastore(
            project, query_with_limit)
        assert_that(entities | beam.combiners.Count.Globally(),
                    equal_to([known_args.limit]))

        p.run()

    # Pipeline 3: Query the written Entities and verify result.
    logging.info('Querying entities, asserting they match.')
    p = new_pipeline_with_job_name(pipeline_options, job_name, '-verify')
    entities = p | 'read from datastore' >> ReadFromDatastore(project, query)

    assert_that(entities | beam.combiners.Count.Globally(),
                equal_to([num_entities]))

    p.run()

    # Pipeline 4: Delete Entities.
    logging.info('Deleting entities.')
    p = new_pipeline_with_job_name(pipeline_options, job_name, '-delete')
    entities = p | 'read from datastore' >> ReadFromDatastore(project, query)
    # pylint: disable=expression-not-assigned
    (entities
     | 'To Keys' >> beam.Map(lambda entity: entity.key)
     | 'Delete keys' >> DeleteFromDatastore(project))

    p.run()

    # Pipeline 5: Query the written Entities, verify no results.
    logging.info(
        'Querying for the entities to make sure there are none present.')
    p = new_pipeline_with_job_name(pipeline_options, job_name,
                                   '-verify-deleted')
    entities = p | 'read from datastore' >> ReadFromDatastore(project, query)

    assert_that(entities | beam.combiners.Count.Globally(), equal_to([0]))

    p.run()
Пример #28
0
 def setUp(self):
     self._mock_datastore = MagicMock()
     self._query = query_pb2.Query()
     self._query.kind.add().name = self._KIND
 def test_get_splits_query_with_offset(self):
     query = query_pb2.Query()
     query.kind.add()
     query.offset = 10
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       2)
 def test_get_splits_query_with_multiple_kinds(self):
     query = query_pb2.Query()
     query.kind.add()
     query.kind.add()
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       4)