def handle(self): """Handles start request.""" mapreduce_name = self._get_required_param("name") mapper_input_reader_spec = self._get_required_param( "mapper_input_reader") mapper_handler_spec = self._get_required_param("mapper_handler") mapper_output_writer_spec = self.request.get("mapper_output_writer") mapper_params = self._get_params("mapper_params_validator", "mapper_params.") params = self._get_params("params_validator", "params.") mapper_params["processing_rate"] = int( mapper_params.get("processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC) queue_name = mapper_params["queue_name"] = mapper_params.get( "queue_name", "default") mapper_spec = model.MapperSpec( mapper_handler_spec, mapper_input_reader_spec, mapper_params, int(mapper_params.get("shard_count", model._DEFAULT_SHARD_COUNT)), output_writer_spec=mapper_output_writer_spec) mapreduce_id = type(self)._start_map(mapreduce_name, mapper_spec, params, base_path=self.base_path(), queue_name=queue_name, _app=mapper_params.get("_app")) self.json_response["mapreduce_id"] = mapreduce_id
def start_map(name, handler_spec, reader_spec, mapper_parameters, shard_count=_DEFAULT_SHARD_COUNT, output_writer_spec=None, mapreduce_parameters=None, base_path=_DEFAULT_BASE_PATH, queue_name="default", eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False): """Start a new, mapper-only mapreduce. Args: name: mapreduce name. Used only for display purposes. handler_spec: fully qualified name of mapper handler function/class to call. reader_spec: fully qualified name of mapper reader to use mapper_parameters: dictionary of parameters to pass to mapper. These are mapper-specific and also used for reader initialization. shard_count: number of shards to create. mapreduce_parameters: dictionary of mapreduce parameters relevant to the whole job. base_path: base path of mapreduce library handler specified in app.yaml. "/mapreduce" by default. queue_name: executor queue name to be used for mapreduce tasks. eta: Absolute time when the MR should execute. May not be specified if 'countdown' is also supplied. This may be timezone-aware or timezone-naive. countdown: Time in seconds into the future that this MR should execute. Defaults to zero. hooks_class_name: fully qualified name of a hooks.Hooks subclass. transactional: Specifies if job should be started as a part of already opened transaction. Returns: mapreduce id as string. """ mapper_spec = model.MapperSpec(handler_spec, reader_spec, mapper_parameters, shard_count, output_writer_spec=output_writer_spec) return handlers.StartJobHandler._start_map( name, mapper_spec, mapreduce_parameters or {}, base_path=base_path, queue_name=queue_name, eta=eta, countdown=countdown, hooks_class_name=hooks_class_name, _app=_app, transactional=transactional)
def _get_mapper_spec(self): """Converts self to model.MapperSpec.""" from google.appengine.ext.mapreduce import model return model.MapperSpec( handler_spec=util._obj_to_path(self.mapper), input_reader_spec=util._obj_to_path(self.input_reader_cls), params=self._get_mapper_params(), shard_count=self.shard_count, output_writer_spec=util._obj_to_path(self.output_writer_cls))
def start_map(name, handler_spec, reader_spec, mapper_parameters, shard_count=None, output_writer_spec=None, mapreduce_parameters=None, base_path=None, queue_name=None, eta=None, countdown=None, hooks_class_name=None, _app=None, in_xg_transaction=False): """Start a new, mapper-only mapreduce. Args: name: mapreduce name. Used only for display purposes. handler_spec: fully qualified name of mapper handler function/class to call. reader_spec: fully qualified name of mapper reader to use mapper_parameters: dictionary of parameters to pass to mapper. These are mapper-specific and also used for reader initialization. shard_count: number of shards to create. mapreduce_parameters: dictionary of mapreduce parameters relevant to the whole job. base_path: base path of mapreduce library handler specified in app.yaml. "/mapreduce" by default. queue_name: taskqueue queue name to be used for mapreduce tasks. see util.get_queue_name. eta: absolute time when the MR should execute. May not be specified if 'countdown' is also supplied. This may be timezone-aware or timezone-naive. countdown: time in seconds into the future that this MR should execute. Defaults to zero. hooks_class_name: fully qualified name of a hooks.Hooks subclass. in_xg_transaction: controls what transaction scope to use to start this MR job. If True, there has to be an already opened cross-group transaction scope. MR will use one entity group from it. If False, MR will create an independent transaction to start the job regardless of any existing transaction scopes. Returns: mapreduce id as string. """ if shard_count is None: shard_count = parameters.config.SHARD_COUNT if base_path is None: base_path = parameters.config.BASE_PATH if mapper_parameters: mapper_parameters = dict(mapper_parameters) if mapreduce_parameters: mapreduce_parameters = dict(mapreduce_parameters) if "base_path" not in mapreduce_parameters: mapreduce_parameters["base_path"] = base_path else: mapreduce_parameters = {"base_path": base_path} mapper_spec = model.MapperSpec(handler_spec, reader_spec, mapper_parameters, shard_count, output_writer_spec=output_writer_spec) if in_xg_transaction and not db.is_in_transaction(): logging.warning("Expects an opened xg transaction to start mapreduce " "when transactional is True.") return handlers.StartJobHandler._start_map( name, mapper_spec, mapreduce_parameters or {}, base_path=base_path, queue_name=util.get_queue_name(queue_name), eta=eta, countdown=countdown, hooks_class_name=hooks_class_name, _app=_app, in_xg_transaction=in_xg_transaction)
def start_map(name, handler_spec, reader_spec, mapper_parameters, shard_count=_DEFAULT_SHARD_COUNT, output_writer_spec=None, mapreduce_parameters=None, base_path=None, queue_name=None, eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False, transactional_parent=None): """Start a new, mapper-only mapreduce. Args: name: mapreduce name. Used only for display purposes. handler_spec: fully qualified name of mapper handler function/class to call. reader_spec: fully qualified name of mapper reader to use mapper_parameters: dictionary of parameters to pass to mapper. These are mapper-specific and also used for reader initialization. shard_count: number of shards to create. mapreduce_parameters: dictionary of mapreduce parameters relevant to the whole job. base_path: base path of mapreduce library handler specified in app.yaml. "/mapreduce" by default. queue_name: executor queue name to be used for mapreduce tasks. If unspecified it will be the "default" queue or inherit the queue of the currently running request. eta: absolute time when the MR should execute. May not be specified if 'countdown' is also supplied. This may be timezone-aware or timezone-naive. countdown: time in seconds into the future that this MR should execute. Defaults to zero. hooks_class_name: fully qualified name of a hooks.Hooks subclass. transactional: specifies if job should be started as a part of already opened transaction. transactional_parent: specifies the entity which is already a part of transaction. Child entity will be used to store task payload if mapreduce specification is too big. Returns: mapreduce id as string. """ if not shard_count: shard_count = _DEFAULT_SHARD_COUNT if base_path is None: base_path = base_handler._DEFAULT_BASE_PATH if mapper_parameters: mapper_parameters = dict(mapper_parameters) if mapreduce_parameters: mapreduce_parameters = dict(mapreduce_parameters) mapper_spec = model.MapperSpec(handler_spec, reader_spec, mapper_parameters, shard_count, output_writer_spec=output_writer_spec) if transactional and not transactional_parent: logging.error( "transactional_parent should be specified for transactional starts." "Your job will fail to start if mapreduce specification is too big." ) return handlers.StartJobHandler._start_map( name, mapper_spec, mapreduce_parameters or {}, base_path=base_path, queue_name=queue_name, eta=eta, countdown=countdown, hooks_class_name=hooks_class_name, _app=_app, transactional=transactional, parent_entity=transactional_parent)