def _to_map_job_config(cls, mr_spec, queue_name): """Converts model.MapreduceSpec back to JobConfig. This method allows our internal methods to use JobConfig directly. This method also allows us to expose JobConfig as an API during execution, despite that it is not saved into datastore. Args: mr_spec: model.MapreduceSpec. queue_name: queue name. Returns: The JobConfig object for this job. """ mapper_spec = mr_spec.mapper api_version = mr_spec.params.get("api_version", 0) old_api = api_version == 0 return cls(_lenient=old_api, job_name=mr_spec.name, job_id=mr_spec.mapreduce_id, mapper=util.for_name(mapper_spec.handler_spec), input_reader_cls=mapper_spec.input_reader_class(), input_reader_params=input_readers._get_params(mapper_spec), output_writer_cls=mapper_spec.output_writer_class(), output_writer_params=output_writers._get_params(mapper_spec), shard_count=mapper_spec.shard_count, queue_name=queue_name, user_params=mr_spec.params.get("user_params"), shard_max_attempts=mr_spec.params.get("shard_max_attempts"), done_callback_url=mr_spec.params.get("done_callback"), _force_writes=mr_spec.params.get("force_writes"), _base_path=mr_spec.params["base_path"], _task_max_attempts=mr_spec.params.get("task_max_attempts"), _task_max_data_processing_attempts=( mr_spec.params.get("task_max_data_processing_attempts")), _hooks_cls=util.for_name(mr_spec.hooks_class_name), _app=mr_spec.params.get("app_id"), _api_version=api_version)
def __iter__(self): """Create a generator for model instances for entities. Iterating through entities moves query range past the consumed entities. Yields: next model instance. """ while True: if self._current_key_range is None: break while True: query = self._current_key_range.make_ascending_query( util.for_name(self._entity_kind)) results = query.fetch(limit=self._batch_size) if not results: self._advance_key_range() break for model_instance in results: key = model_instance.key() self._current_key_range.advance(key) yield model_instance
def _get_params(self, validator_parameter, name_prefix): """Retrieves additional user-supplied params for the job and validates them. Args: validator_parameter: name of the request parameter which supplies validator for this parameter set. name_prefix: common prefix for all parameter names in the request. Raises: Any exception raised by the 'params_validator' request parameter if the params fail to validate. """ params_validator = self.request.get(validator_parameter) user_params = {} for key in self.request.arguments(): if key.startswith(name_prefix): values = self.request.get_all(key) adjusted_key = key[len(name_prefix):] if len(values) == 1: user_params[adjusted_key] = values[0] else: user_params[adjusted_key] = values if params_validator: resolved_validator = util.for_name(params_validator) resolved_validator(user_params) return user_params
def input_reader_class(self): """Get input reader class. Returns: input reader class object. """ return util.for_name(self.input_reader_spec)
def output_writer_class(self): """Get output writer class. Returns: output writer class object. """ return self.output_writer_spec and util.for_name(self.output_writer_spec)
def _get_raw_entity_kind(cls, model_classpath): entity_type = util.for_name(model_classpath) if isinstance(entity_type, db.Model): return entity_type.kind() elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)): return entity_type._get_kind() else: return util.get_short_name(model_classpath)
def validate(cls, job_config): """Inherit docs.""" super(ModelDatastoreInputReader, cls).validate(job_config) params = job_config.input_reader_params entity_kind = params[cls.ENTITY_KIND_PARAM] try: model_class = util.for_name(entity_kind) except ImportError, e: raise errors.BadReaderParamsError("Bad entity kind: %s" % e)
def validate(cls, mapper_spec): """Validates mapper spec and all mapper parameters. Args: mapper_spec: The MapperSpec for this InputReader. Raises: BadReaderParamsError: required parameters are missing or invalid. """ super(DatastoreInputReader, cls).validate(mapper_spec) params = mapper_spec.params keys_only = util.parse_bool(params.get(cls.KEYS_ONLY_PARAM, False)) if keys_only: raise BadReaderParamsError("The keys_only parameter is obsolete. " "Use DatastoreKeyInputReader instead.") entity_kind_name = params[cls.ENTITY_KIND_PARAM] try: util.for_name(entity_kind_name) except ImportError, e: raise BadReaderParamsError("Bad entity kind: %s" % e)
def get_hooks(self): """Returns a hooks.Hooks class or None if no hooks class has been set.""" if self.__hooks is None and self.hooks_class_name is not None: hooks_class = util.for_name(self.hooks_class_name) if not isinstance(hooks_class, type): raise ValueError("hooks_class_name must refer to a class, got %s" % type(hooks_class).__name__) if not issubclass(hooks_class, hooks.Hooks): raise ValueError( "hooks_class_name must refer to a hooks.Hooks subclass") self.__hooks = hooks_class() return self.__hooks
def __init__(self, filters, model_class_path): """Init. Args: filters: user supplied filters. Each filter should be a list or tuple of format (<property_name_as_str>, <query_operator_as_str>, <value_of_certain_type>). Value type should satisfy the property's type. model_class_path: full path to the model class in str. """ self.filters = filters self.model_class_path = model_class_path self.model_class = util.for_name(self.model_class_path) self.prop, self.start, self.end = self._get_range_from_filters(self.filters, self.model_class)
def get_hooks(self): """Returns a hooks.Hooks class or None if no hooks class has been set.""" if self.__hooks is None and self.hooks_class_name is not None: hooks_class = util.for_name(self.hooks_class_name) if not isinstance(hooks_class, type): raise ValueError("hooks_class_name must refer to a class, got %s" % type(hooks_class).__name__) if not issubclass(hooks_class, hooks.Hooks): raise ValueError( "hooks_class_name must refer to a hooks.Hooks subclass") self.__hooks = hooks_class(self) return self.__hooks
def __init__(self, filters, model_class_path): """Init. Args: filters: user supplied filters. Each filter should be a list or tuple of format (<property_name_as_str>, <query_operator_as_str>, <value_of_certain_type>). Value type should satisfy the property's type. model_class_path: full path to the model class in str. """ self.filters = filters self.model_class_path = model_class_path self.model_class = util.for_name(self.model_class_path) self.prop, self.start, self.end = self._get_range_from_filters( self.filters, self.model_class)
def _iter_key_range(self, k_range): cursor = None while True: query = k_range.make_ascending_query(util.for_name(self._entity_kind)) if cursor: query.with_cursor(cursor) results = query.fetch(limit=self._batch_size) if not results: break for model_instance in results: key = model_instance.key() yield key, model_instance cursor = query.cursor()
def get_handler(self): """Get mapper handler instance. Returns: cached handler instance as callable. """ if self.__handler is None: resolved_spec = util.for_name(self.handler_spec) if isinstance(resolved_spec, type): self.__handler = resolved_spec() elif isinstance(resolved_spec, types.MethodType): self.__handler = getattr(resolved_spec.im_class(), resolved_spec.__name__) else: self.__handler = resolved_spec return self.__handler
def _iter_key_range(self, k_range): cursor = None while True: query = k_range.make_ascending_query( util.for_name(self._entity_kind)) if cursor: query.with_cursor(cursor) results = query.fetch(limit=self._batch_size) if not results: break for model_instance in results: key = model_instance.key() yield key, model_instance cursor = query.cursor()
def validate(cls, job_config): """Inherit docs.""" super(ModelDatastoreInputReader, cls).validate(job_config) params = job_config.input_reader_params entity_kind = params[cls.ENTITY_KIND_PARAM] try: model_class = util.for_name(entity_kind) except ImportError as e: raise errors.BadReaderParamsError("Bad entity kind: %s" % e) if cls.FILTERS_PARAM in params: filters = params[cls.FILTERS_PARAM] if issubclass(model_class, db.Model): cls._validate_filters(filters, model_class) else: cls._validate_filters_ndb(filters, model_class) property_range.PropertyRange(filters, entity_kind)
def __iter__(self): self._query = self._key_range.make_ascending_query( util.for_name(self._query_spec.model_class_path), filters=self._query_spec.filters) if isinstance(self._query, db.Query): if self._cursor: self._query.with_cursor(self._cursor) for model_instance in self._query.run( batch_size=self._query_spec.batch_size, keys_only=self._query_spec.keys_only): yield model_instance else: self._query = self._query.iter(batch_size=self._query_spec.batch_size, keys_only=self._query_spec.keys_only, start_cursor=self._cursor, produce_cursors=True) for model_instance in self._query: yield model_instance
def tx(): operation = DatastoreAdminOperation.get(operation_key) if mapreduce_id in operation.active_job_ids: operation.active_jobs -= 1 operation.completed_jobs += 1 operation.active_job_ids.remove(mapreduce_id) if not operation.active_jobs: if operation.status == DatastoreAdminOperation.STATUS_ACTIVE: operation.status = DatastoreAdminOperation.STATUS_COMPLETED db.delete(DatastoreAdminOperationJob.all().ancestor(operation), config=db_config) operation.put(config=db_config) if 'done_callback_handler' in mapreduce_params: done_callback_handler = util.for_name( mapreduce_params['done_callback_handler']) if done_callback_handler: done_callback_handler(operation, mapreduce_id, mapreduce_state) else: logging.error('done_callbackup_handler %s was not found', mapreduce_params['done_callback_handler'])