def DbGetValue(self, v, p): if not v.has_stringvalue(): return None serialized = v.stringvalue() if p.has_meaning() and p.meaning() == _MEANING_COMPRESSED: serialized = zlib.decompress(serialized) pb = entity_pb.EntityProto(serialized) entity = self._modelclass() entity.FromPb(pb) entity.key = None return entity
def get_alternatives(self, experiment_name): if experiment_name not in self.alternative_models: if experiment_name in self.alternatives: self.alternative_models[experiment_name] = [] for alternative_number in self.alternatives[experiment_name]: self.alternative_models[experiment_name].append( db.model_from_protobuf( entity_pb.EntityProto( self.alternatives[experiment_name] [alternative_number]))) return self.alternative_models.get(experiment_name) or []
def get_filter_models(self, bridge_name): """Callers must hold the lock returned by get_lock() when calling this method for thread-safe access.""" if bridge_name in self.filter_models: return self.filter_models[bridge_name] elif bridge_name in self.filters: models = [db.model_from_protobuf(entity_pb.EntityProto(filter)) for filter in self.filters[bridge_name]] self.filter_models[bridge_name] = models return models else: return None
def map(self, record): """Restore entity map handler. Args: record: A serialized entity_pb.EntityProto. Yields: A operation.db.Put for the mapped entity """ pb = entity_pb.EntityProto(contents=record) entity = datastore.Entity._FromPb(pb) yield op.db.Put(entity)
def _Dynamic_Put(self, put_request, put_response): if put_request.has_transaction(): self.__ValidateTransaction(put_request.transaction()) clones = [] for entity in put_request.entity_list(): self.__ValidateKey(entity.key()) clone = entity_pb.EntityProto() clone.CopyFrom(entity) for property in clone.property_list() + clone.raw_property_list(): if property.value().has_uservalue(): uid = md5.new( property.value().uservalue().email().lower()).digest() uid = '1' + ''.join(['%02d' % ord(x) for x in uid])[:20] property.mutable_value().mutable_uservalue( ).set_obfuscated_gaiaid(uid) clones.append(clone) assert clone.has_key() assert clone.key().path().element_size() > 0 last_path = clone.key().path().element_list()[-1] if last_path.id() == 0 and not last_path.has_name(): self.__id_lock.acquire() last_path.set_id(self.__next_id) self.__next_id += 1 self.__id_lock.release() assert clone.entity_group().element_size() == 0 group = clone.mutable_entity_group() root = clone.key().path().element(0) group.add_element().CopyFrom(root) else: assert (clone.has_entity_group() and clone.entity_group().element_size() > 0) self.__entities_lock.acquire() try: for clone in clones: self._StoreEntity(clone) finally: self.__entities_lock.release() if not put_request.has_transaction(): self.__WriteDatastore() put_response.key_list().extend([c.key() for c in clones])
def readfile(f, otype, outs): raw = open('{0}/kind_{1}/{2}'.format(DATA_DIRECTORY, otype, f), 'r') reader = records.RecordsReader(raw) last = '' for record in reader: entity_proto = entity_pb.EntityProto(contents=record) entity = datastore.Entity.FromPb(entity_proto) key = entity_proto.key() elems = key.path() the_type = elems.element_list()[-1].type() if the_type in objects: write_object(outs, the_type, entity) count[otype] += 1
def _Next(self): """Fetches the next unique result from the result set. Returns: A datastore_pb.EntityProto instance. """ if self._HasNext(): self.__seen.add(self.__next_result[0]) entity = entity_pb.EntityProto(self.__next_result[1]) datastore_stub_util.PrepareSpecialPropertiesForLoad(entity) self.__next_result = None, None return entity return None
def fn(): entity_proto = entity_pb.EntityProto(serialized) deserialized = datastore_lazy.LazyEntity(entity_proto) len(deserialized.prop_0) len(deserialized.prop_1) len(deserialized.prop_2) len(deserialized.prop_3) len(deserialized.prop_4) len(deserialized.prop_5) len(deserialized.prop_6) len(deserialized.prop_7) len(deserialized.prop_8) len(deserialized.prop_9)
def get_bridge_model(self, bridge_name): """Callers must hold the lock returned by get_lock() when calling this method for thread-safe access.""" if bridge_name in self.bridge_models: return self.bridge_models[bridge_name] elif bridge_name in self.bridges: model_proto_contents = self.bridges[bridge_name] model = db.model_from_protobuf( entity_pb.EntityProto(model_proto_contents)) self.bridge_models[bridge_name] = model return model else: return None
def _Dynamic_Put(self, put_request, put_response): if put_request.has_transaction(): self.__ValidateTransaction(put_request.transaction()) clones = [] for entity in put_request.entity_list(): self.__ValidateKey(entity.key()) clone = entity_pb.EntityProto() clone.CopyFrom(entity) for property in clone.property_list() + clone.raw_property_list(): datastore_stub_util.FillUser(property) clones.append(clone) assert clone.has_key() assert clone.key().path().element_size() > 0 last_path = clone.key().path().element_list()[-1] if last_path.id() == 0 and not last_path.has_name(): self.__id_lock.acquire() last_path.set_id(self.__next_id) self.__next_id += 1 self.__id_lock.release() assert clone.entity_group().element_size() == 0 group = clone.mutable_entity_group() root = clone.key().path().element(0) group.add_element().CopyFrom(root) else: assert (clone.has_entity_group() and clone.entity_group().element_size() > 0) datastore_stub_util.PrepareSpecialPropertiesForStore(clone) self.__entities_lock.acquire() try: for clone in clones: self._StoreEntity(clone) finally: self.__entities_lock.release() if not put_request.has_transaction(): self.__WriteDatastore() put_response.key_list().extend([c.key() for c in clones])
def _Dynamic_GetSchema(self, app_str, schema): minint = -sys.maxint - 1 app_str = self.ResolveAppId(app_str.value()) kinds = [] for app, kind in self.__entities: if app == app_str: kind_pb = entity_pb.EntityProto() kind_pb.mutable_key().set_app('') kind_pb.mutable_key().mutable_path().add_element().set_type( kind) kind_pb.mutable_entity_group() kinds.append(kind_pb) props = {} for entity in self.__entities[(app, kind)].values(): for prop in entity.property_list(): if prop.name() not in props: props[prop.name()] = entity_pb.PropertyValue() props[prop.name()].MergeFrom(prop.value()) for value_pb in props.values(): if value_pb.has_int64value(): value_pb.set_int64value(minint) if value_pb.has_booleanvalue(): value_pb.set_booleanvalue(False) if value_pb.has_stringvalue(): value_pb.set_stringvalue('') if value_pb.has_doublevalue(): value_pb.set_doublevalue(float('-inf')) if value_pb.has_pointvalue(): value_pb.mutable_pointvalue().set_x(float('-inf')) value_pb.mutable_pointvalue().set_y(float('-inf')) if value_pb.has_uservalue(): value_pb.mutable_uservalue().set_gaiaid(minint) value_pb.mutable_uservalue().set_email('') value_pb.mutable_uservalue().set_auth_domain('') value_pb.mutable_uservalue().set_nickname('') elif value_pb.has_referencevalue(): value_pb.clear_referencevalue() value_pb.mutable_referencevalue().set_app('') for name, value_pb in props.items(): prop_pb = kind_pb.add_property() prop_pb.set_name(name) prop_pb.mutable_value().CopyFrom(value_pb) schema.kind_list().extend(kinds)
def test_index_deletions(self): old_entity = self.get_new_entity_proto(*self.BASIC_ENTITY) # No deletions should occur when the entity doesn't change. db_batch = flexmock() db_batch.should_receive('valid_data_version').and_return(True) dd = DatastoreDistributed(db_batch, None) self.assertListEqual([], index_deletions(old_entity, old_entity)) # When a property changes, the previous index entries should be deleted. new_entity = entity_pb.EntityProto() new_entity.MergeFrom(old_entity) new_entity.property_list()[0].value().set_stringvalue('updated content') deletions = index_deletions(old_entity, new_entity) self.assertEqual(len(deletions), 2) self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) self.assertEqual(deletions[1]['table'], dbconstants.DSC_PROPERTY_TABLE) prop = old_entity.add_property() prop.set_name('author') value = prop.mutable_value() value.set_stringvalue('author1') prop = new_entity.add_property() prop.set_name('author') value = prop.mutable_value() value.set_stringvalue('author1') # When given an index, an entry should be removed from the composite table. composite_index = entity_pb.CompositeIndex() composite_index.set_id(123) composite_index.set_app_id('guestbook') definition = composite_index.mutable_definition() definition.set_entity_type('Greeting') prop1 = definition.add_property() prop1.set_name('content') prop1.set_direction(datastore_pb.Query_Order.ASCENDING) prop2 = definition.add_property() prop2.set_name('author') prop1.set_direction(datastore_pb.Query_Order.ASCENDING) deletions = index_deletions(old_entity, new_entity, (composite_index,)) self.assertEqual(len(deletions), 3) self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) self.assertEqual(deletions[1]['table'], dbconstants.DSC_PROPERTY_TABLE) self.assertEqual(deletions[2]['table'], dbconstants.COMPOSITE_TABLE) # No composite deletions should occur when the entity type differs. definition.set_entity_type('TestEntity') deletions = index_deletions(old_entity, new_entity, (composite_index,)) self.assertEqual(len(deletions), 2)
def pb_to_dict(pb, parent=None): """Convert a protocol buffer to a json-serializable dictionary""" entity = datastore.Entity._FromPb(entity_pb.EntityProto(pb)) # Create a json serializable dictionary from entity document = dict(entity) pre_process_entity_dict(entity.kind(), document) document['key'] = str(entity.key()) if parent and entity.parent(): document['parent'] = str(entity.parent()) document = apply_transform(document) return document
def _Get(self, key): conn = self._GetConnection() try: prefix = self._GetTablePrefix(key) c = conn.execute( 'SELECT entity FROM "%s!Entities" WHERE __path__ = ?' % (prefix, ), (self.__EncodeIndexPB(key.path()), )) row = c.fetchone() if row: entity = entity_pb.EntityProto() entity.ParseFromString(row[0]) return datastore_stub_util.LoadEntity(entity) finally: self._ReleaseConnection(conn)
def load_pbufs_to_db(config, mongo, entity_list, start_dt, end_dt, kind=None): """load protocol buffers to mongo""" if not kind: if len(entity_list) > 0: pb = entity_list[0] entity = datastore.Entity._FromPb(entity_pb.EntityProto(pb)) kind = entity.key().kind() else: kind = 'unknown' num = 0 for pb in entity_list: entity = datastore.Entity._FromPb(entity_pb.EntityProto(pb)) put_document(entity, config, mongo) num += 1 if (num % 100000) == 0: g_logger.info( "Writing to db for %s from %s to %s. # rows: %d loaded" % (kind, start_dt, end_dt, num)) # assume all entities are from the same kind g_logger.info("Writing to db for %s from %s to %s. # rows: %d finishes" % (kind, start_dt, end_dt, len(entity_list))) kdc.record_progress(mongo, config['coordinator_cfg'], kind, start_dt, end_dt, kdc.DownloadStatus.LOADED)
def v4_value_to_v3_property_value(self, v4_value, v3_value): """Converts a v4 Value to a v3 PropertyValue. Args: v4_value: an entity_v4_pb.Value v3_value: an entity_pb.PropertyValue to populate """ v3_value.Clear() if v4_value.has_boolean_value(): v3_value.set_booleanvalue(v4_value.boolean_value()) elif v4_value.has_integer_value(): v3_value.set_int64value(v4_value.integer_value()) elif v4_value.has_double_value(): v3_value.set_doublevalue(v4_value.double_value()) elif v4_value.has_timestamp_microseconds_value(): v3_value.set_int64value(v4_value.timestamp_microseconds_value()) elif v4_value.has_key_value(): v3_ref = entity_pb.Reference() self.v4_to_v3_reference(v4_value.key_value(), v3_ref) self.v3_reference_to_v3_property_value(v3_ref, v3_value) elif v4_value.has_blob_key_value(): v3_value.set_stringvalue(v4_value.blob_key_value()) elif v4_value.has_string_value(): v3_value.set_stringvalue(v4_value.string_value()) elif v4_value.has_blob_value(): v3_value.set_stringvalue(v4_value.blob_value()) elif v4_value.has_entity_value(): v4_entity_value = v4_value.entity_value() v4_meaning = v4_value.meaning() if (v4_meaning == MEANING_GEORSS_POINT or v4_meaning == MEANING_PREDEFINED_ENTITY_POINT): self.__v4_to_v3_point_value(v4_entity_value, v3_value.mutable_pointvalue()) elif v4_meaning == MEANING_PREDEFINED_ENTITY_USER: self.v4_entity_to_v3_user_value(v4_entity_value, v3_value.mutable_uservalue()) else: v3_entity_value = entity_pb.EntityProto() self.v4_to_v3_entity(v4_entity_value, v3_entity_value) v3_value.set_stringvalue( v3_entity_value.SerializePartialToString()) elif v4_value.has_geo_point_value(): point_value = v3_value.mutable_pointvalue() point_value.set_x(v4_value.geo_point_value().latitude()) point_value.set_y(v4_value.geo_point_value().longitude()) else: pass
def Read(self): """ Reads the datastore and history files into memory. The in-memory query history is cleared, but the datastore is *not* cleared; the entities in the files are merged into the entities in memory. If you want them to overwrite the in-memory datastore, call Clear() before calling Read(). If the datastore file contains an entity with the same app name, kind, and key as an entity already in the datastore, the entity from the file overwrites the entity in the datastore. Also sets __next_id to one greater than the highest id allocated so far. """ pb_exceptions = (ProtocolBuffer.ProtocolBufferDecodeError, LookupError, TypeError, ValueError) error_msg = ('Data in %s is corrupt or a different version. ' 'Try running with the --clear_datastore flag.\n%r') if self.__datastore_file and self.__datastore_file != '/dev/null': for encoded_entity in self.__ReadPickled(self.__datastore_file): try: entity = entity_pb.EntityProto(encoded_entity) except pb_exceptions, e: raise datastore_errors.InternalError( error_msg % (self.__datastore_file, e)) last_path = entity.key().path().element_list()[-1] app_kind = (entity.key().app(), last_path.type()) kind_dict = self.__entities.setdefault(app_kind, {}) kind_dict[entity.key()] = entity if last_path.has_id() and last_path.id() >= self.__next_id: self.__next_id = last_path.id() + 1 self.__query_history = {} for encoded_query, count in self.__ReadPickled( self.__history_file): try: query_pb = datastore_pb.Query(encoded_query) except pb_exceptions, e: raise datastore_errors.InternalError( error_msg % (self.__history_file, e)) if query_pb in self.__query_history: self.__query_history[query_pb] += count else: self.__query_history[query_pb] = count
def get(path): """Returns the StaticContent object for the provided path. Args: path: The path to retrieve StaticContent for. Returns: A StaticContent object, or None if no content exists for this path. """ entity = memcache.get(path) if entity: entity = db.model_from_protobuf(entity_pb.EntityProto(entity)) else: entity = StaticContent.get_by_key_name(path) if entity: memcache.set(path, db.model_to_protobuf(entity).Encode()) return entity
def FixPropertyList(property_list): for prop in property_list: prop_value = prop.mutable_value() if prop_value.has_referencevalue(): FixKey(prop_value.mutable_referencevalue()) elif prop.meaning() == entity_pb.Property.ENTITY_PROTO: embedded_entity_proto = entity_pb.EntityProto() try: embedded_entity_proto.ParsePartialFromString(prop_value.stringvalue()) except Exception: logging.exception('Failed to fix-keys for property %s of %s', prop.name(), entity_proto.key()) else: FixKeys(embedded_entity_proto, app_id) prop_value.set_stringvalue( embedded_entity_proto.SerializePartialToString())
def process_statistics(self, key, entity, version): """ Processes an entity and adds to the global statistics. Args: key: The key to the entity table. entity: The entity in string serialized form. version: The version of the entity in the datastore. Returns: True on success, False otherwise. """ ent_proto = entity_pb.EntityProto() ent_proto.ParseFromString(entity) kind = datastore_server.DatastoreDistributed.\ get_entity_kind(ent_proto.key()) namespace = ent_proto.key().name_space() if not kind: logging.warning("Entity did not have a kind {0}"\ .format(entity)) return False if re.match(self.PROTECTED_KINDS, kind): return True if re.match(self.PRIVATE_KINDS, kind): return True app_id = ent_proto.key().app() if not app_id: logging.warning("Entity of kind {0} did not have an app id"\ .format(kind)) return False # Do not generate statistics for applications which are internal to # AppScale. if app_id in self.APPSCALE_APPLICATIONS: return True self.initialize_kind(app_id, kind) self.initialize_namespace(app_id, namespace) self.namespace_info[app_id][namespace]['size'] += len(entity) self.namespace_info[app_id][namespace]['number'] += 1 self.stats[app_id][kind]['size'] += len(entity) self.stats[app_id][kind]['number'] += 1 return True
def map(self, record): """Restore entity map handler. Args: record: A serialized entity_pb.EntityProto. Yields: A operation.db.Put for the mapped entity """ self.initialize() pb = entity_pb.EntityProto(contents=record) if self.app_id: utils.FixKeys(pb, self.app_id) entity = datastore.Entity.FromPb(pb) if not self.kind_filter or entity.kind() in self.kind_filter: yield op.db.Put(entity) if self.app_id: yield utils.AllocateMaxId(entity.key(), self.app_id)
def _Dynamic_Put(self, put_request, put_response): clones = [] for entity in put_request.entity_list(): clone = entity_pb.EntityProto() clone.CopyFrom(entity) clones.append(clone) assert clone.has_key() assert clone.key().path().element_size() > 0 app = self.ResolveAppId(clone.key().app()) clone.mutable_key().set_app(app) last_path = clone.key().path().element_list()[-1] if last_path.id() == 0 and not last_path.has_name(): self.__id_lock.acquire() last_path.set_id(self.__next_id) self.__next_id += 1 self.__id_lock.release() assert clone.entity_group().element_size() == 0 group = clone.mutable_entity_group() root = clone.key().path().element(0) group.add_element().CopyFrom(root) else: assert (clone.has_entity_group() and clone.entity_group().element_size() > 0) self.__entities_lock.acquire() try: for clone in clones: last_path = clone.key().path().element_list()[-1] kind_dict = self.__entities.setdefault((app, last_path.type()), {}) kind_dict[clone.key()] = clone finally: self.__entities_lock.release() if not put_request.has_transaction(): self.__WriteDatastore() put_response.key_list().extend([c.key() for c in clones])
def run(): # Set your downloaded folder's path here (must be readable by dev_appserver) mypath = '/local_target' # Set your app's name here appname = "dev~yourappnamehere" # Do the harlem shake onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] ec = datastore_pbs.get_entity_converter() for file in onlyfiles: i = 0 try: raw = open(mypath + "/" + file, 'r') reader = records.RecordsReader(raw) to_put = list() for record in reader: entity_proto = entity_pb.EntityProto(contents=record) entity_proto.key_.app_ = appname entity = db.model_from_protobuf(entity_proto) a = db.model_from_protobuf(entity_proto) for pp in dir(a): try: ppp = getattr(a, "_" + pp) if isinstance(ppp, db.Key): ppp._Key__reference.set_app(appname) ppp except AttributeError: """ It's okay """ to_put.append(a) i += 1 if i % 100 == 0: print "Saved %d %ss" % (i, entity.kind()) db.put(to_put) to_put = list() db.put(to_put) to_put = list() print "Saved %d" % i except ProtocolBufferDecodeError: """ All good """
def Read(self): """ Reads the datastore and history files into memory. The in-memory query history is cleared, but the datastore is *not* cleared; the entities in the files are merged into the entities in memory. If you want them to overwrite the in-memory datastore, call Clear() before calling Read(). If the datastore file contains an entity with the same app name, kind, and key as an entity already in the datastore, the entity from the file overwrites the entity in the datastore. Also sets each ID counter to one greater than the highest ID allocated so far in that counter's ID space. """ if self.__datastore_file and self.__datastore_file != '/dev/null': entities = self.__ReadPickled(self.__datastore_file) if entities and isinstance(entities[-1], (int, long)): self._commit_timestamp = int(entities.pop()) for encoded_entity in entities: try: entity = entity_pb.EntityProto(encoded_entity) record = datastore_stub_util._FromStorageEntity(entity) except self.READ_PB_EXCEPTIONS, e: raise apiproxy_errors.ApplicationError( datastore_pb.Error.INTERNAL_ERROR, self.READ_ERROR_MSG % (self.__datastore_file, e)) except struct.error, e: if (sys.version_info[0:3] == (2, 5, 0) and e.message.startswith( 'unpack requires a string argument')): raise apiproxy_errors.ApplicationError( datastore_pb.Error.INTERNAL_ERROR, self.READ_PY250_MSG + self.READ_ERROR_MSG % (self.__datastore_file, e)) else: raise self._StoreRecord(record) last_path = _FinalElement(entity.key()) if last_path.id(): self._SetMaxId(last_path.id())
def __init__(self, query, binary_results, last_entity): """ Initializes an UnprocessedQueryCursor object. Args: query: A query protocol buffer object. binary_results: A list of strings that contain encoded protocol buffer results. last_entity: A string that contains the last entity. It is used to generate the cursor, and it can be defined even if there are no results. """ self.__binary_results = binary_results self.__query = query self.__last_ent = last_entity if len(binary_results) > 0: # _EncodeCompiledCursor just uses the last entity. results = [entity_pb.EntityProto(binary_results[-1])] else: results = [] super(UnprocessedQueryCursor, self).__init__(query, results, last_entity)
def fetch_journal_entry(db_access, key): """ Fetches the given key from the journal. Args: db_access: A datastore accessor. keys: A str, the key to fetch. Returns: The entity fetched from the datastore, or None if it was deleted. """ result = db_access.batch_get_entity(JOURNAL_TABLE, [key], JOURNAL_SCHEMA) if len(result.keys()) == 0: return None if JOURNAL_SCHEMA[0] in result.keys()[0]: ent_string = result[0][JOURNAL_SCHEMA[0]] if ent_string == datastore_server.TOMBSTONE: return None return entity_pb.EntityProto().ParseFromString(ent_string) else: return None
def process_entity(self, entity): """ Processes an entity by updating statistics, indexes, and removes tombstones. Args: entity: The entity to operate on. Returns: True on success, False otherwise. """ logging.debug("Process entity {0}".format(str(entity))) key = entity.keys()[0] one_entity = entity[key][dbconstants.APP_ENTITY_SCHEMA[0]] logging.debug("Entity value: {0}".format(entity)) ent_proto = entity_pb.EntityProto() ent_proto.ParseFromString(one_entity) self.process_statistics(key, ent_proto, len(one_entity)) return True
def prop_result(self): entity = entity_pb.EntityProto() entity.mutable_key().MergeFrom(self.key) entity.mutable_entity_group().MergeFrom(self.group) def add_prop(prop_name, multiple, value): prop = entity.add_property() prop.set_name(prop_name) prop.set_meaning(entity_pb.Property.INDEX_VALUE) prop.set_multiple(multiple) prop.mutable_value().MergeFrom(value) for prop_name, value in self.properties: if isinstance(value, list): for multiple_val in value: add_prop(prop_name, True, multiple_val) else: add_prop(prop_name, False, value) return entity
def ToPb(self): pb = entity_pb.EntityProto() # TODO: Move the key stuff into ModelAdapter.entity_to_pb()? key = self._key if key is None: ref = ndb.key._ReferenceFromPairs([(self.GetKind(), None)], reference=pb.mutable_key()) else: ref = key._reference() # Don't copy pb.mutable_key().CopyFrom(ref) group = pb.mutable_entity_group() elem = ref.path().element(0) if elem.id() or elem.name(): group.add_element().CopyFrom(elem) for name, prop in sorted(self._properties.iteritems()): prop.Serialize(self, pb) return pb
def _EncodeCompiledCursor(self, compiled_cursor): """Converts the current state of the cursor into a compiled_cursor. Args: query: the datastore_pb.Query this cursor is related to compiled_cursor: an empty datstore_pb.CompiledCursor """ if self.__results: last_result = self.__results[-1] elif self.__last_ent: last_result = entity_pb.EntityProto() last_result.ParseFromString(self.__last_ent) else: last_result = None position = compiled_cursor.add_position() query_info = self._MinimalQueryInfo(self.__query) entity_info = self._MinimalEntityInfo(last_result, self.__query) start_key = _CURSOR_CONCAT_STR.join( (query_info.Encode(), entity_info.Encode())) position.set_start_key(str(start_key)) position.set_start_inclusive(False)