Пример #1
0
    def setup(self):
        """Get options, start controller & watchdog, return command type.

        Returns:
            (dict): get info response (command type) and required fields
        """
        self.controller_options, self.partial_fit = self.handle_arguments(
            self.getinfo)
        self.controller = ChunkedController(self.getinfo,
                                            self.controller_options)

        self.watchdog = command_util.get_watchdog(
            self.controller.resource_limits['max_fit_time'],
            self.controller.resource_limits['max_memory_usage_mb'],
            os.path.join(self.getinfo['searchinfo']['dispatch_dir'],
                         'finalize'))

        required_fields = self.controller.get_required_fields()
        return {'type': 'events', 'required_fields': required_fields}
Пример #2
0
    def setup(self):
        """Get options, start controller, return command type.

        Returns:
            (dict): get info response (command type)
        """
        self.controller_options = self.handle_arguments(self.getinfo)  # pylint: disable=W1111
        self.controller = ChunkedController(self.getinfo,
                                            self.controller_options)
        return {'type': 'reporting', 'generating': True}
Пример #3
0
    def setup(self):
        """Parse search string, choose processor, initialize controller & watchdog.

        Returns:
            (dict): get info response (command type) and required fields. This
                response will be sent back to the CEXC process on the getinfo
                exchange (first chunk) to establish our execution type and
                required fields.
        """
        controller_options = self.handle_arguments(self.getinfo)
        self.controller = ChunkedController(self.getinfo, controller_options)

        self.watchdog = command_util.get_watchdog(
            self.controller.resource_limits['max_score_time'],
            self.controller.resource_limits['max_memory_usage_mb'],
            os.path.join(self.getinfo['searchinfo']['dispatch_dir'],
                         'finalize'))

        required_fields = self.controller.get_required_fields()
        return {'type': 'events', 'required_fields': required_fields}
Пример #4
0
    def setup(self):
        """Parse search string, choose processor, initialize controller.

        Returns:
            (dict): get info response (command type) and required fields. This
                response will be sent back to the CEXC process on the getinfo
                exchange (first chunk) to establish our execution type and
                required fields.
        """
        controller_options = self.handle_arguments(self.getinfo)
        self.controller = ChunkedController(self.getinfo, controller_options)

        self.watchdog = command_util.get_watchdog(
            time_limit=-1,
            memory_limit=self.controller.resource_limits['max_memory_usage_mb']
        )

        streaming_apply = self.controller.resource_limits.get('streaming_apply', False)
        exec_type = 'streaming' if streaming_apply else 'stateful'

        required_fields = self.controller.get_required_fields()
        return {'type': exec_type, 'required_fields': required_fields}
Пример #5
0
class ScoreCommand(cexc.BaseChunkHandler):
    """ScoreCommand uses ChunkedController & processor(s) to score field(s). """
    @staticmethod
    def handle_arguments(getinfo):
        """Take the getinfo metadata and return controller_options.

        Args:
            getinfo (dict): getinfo metadata from first chunk

        Returns:
            controller_options (dict): options to be passed to controller
        """
        if len(getinfo['searchinfo']['raw_args']) == 0:
            raise RuntimeError('First argument must be a scoring method')

        raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:])
        controller_options = ScoreCommand.handle_raw_options(raw_options)
        controller_options['scoring_name'] = getinfo['searchinfo']['args'][0]
        return controller_options

    @staticmethod
    def handle_raw_options(controller_options):
        """Load command specific options.

        Args:
            controller_options (dict): options from handle_arguments
        Returns:
            controller_options (dict): dict of controller options
        """
        controller_options['processor'] = 'ScoreProcessor'
        controller_options['variables'] = controller_options.pop(
            'feature_variables', [])
        return controller_options

    def setup(self):
        """Parse search string, choose processor, initialize controller & watchdog.

        Returns:
            (dict): get info response (command type) and required fields. This
                response will be sent back to the CEXC process on the getinfo
                exchange (first chunk) to establish our execution type and
                required fields.
        """
        controller_options = self.handle_arguments(self.getinfo)
        self.controller = ChunkedController(self.getinfo, controller_options)

        self.watchdog = command_util.get_watchdog(
            self.controller.resource_limits['max_score_time'],
            self.controller.resource_limits['max_memory_usage_mb'],
            os.path.join(self.getinfo['searchinfo']['dispatch_dir'],
                         'finalize'))

        required_fields = self.controller.get_required_fields()
        return {'type': 'events', 'required_fields': required_fields}

    def handler(self, metadata, body):
        """Main handler we override from BaseChunkHandler.

        Handles the reading and writing of data to the CEXC process, and
        finishes negotiation of the termination of the process.

        Args:
            metadata (dict): metadata information
            body (str): data payload from CEXC

        Returns:
            (dict): metadata to be sent back to CEXC
            output_body (str): data payload to be sent back to CEXC
        """
        if command_util.is_invalid_chunk(metadata):
            logger.debug('Not running without session key.')
            return {'finished': True}

        if command_util.is_getinfo_chunk(metadata):
            return self.setup()

        finished_flag = metadata.get('finished', False)

        if not self.watchdog.started:
            self.watchdog.start()

        # Load data
        self.controller.load_data(body)

        # score will execute on the last chunk.
        if finished_flag:
            self.controller.execute()
            output_body = self.controller.output_results()
        else:
            output_body = None

        if finished_flag:
            if self.watchdog.started:
                self.watchdog.join()

        # Our final farewell
        return ({'finished': finished_flag}, output_body)
Пример #6
0
class FitCommand(cexc.BaseChunkHandler):
    """FitCommand uses ChunkedController & one of two processors to fit models.

    The FitCommand can use either the FitBatchProcessor or the FitPartialProcessor,
    which is chosen based on the presence of the partial_fit parameter.
    """
    @staticmethod
    def handle_arguments(getinfo):
        """Take the getinfo metadata and return controller_options.

        Args:
            getinfo (dict): getinfo metadata from first chunk

        Returns:
            controller_options (dict): options to be passed to controller
            partial_fit (bool): boolean flag to indicate partial fit
        """
        if len(getinfo['searchinfo']['raw_args']) == 0:
            raise RuntimeError('First argument must be an "algorithm"')

        raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:])
        controller_options, partial_fit = FitCommand.handle_raw_options(
            raw_options)
        controller_options['algo_name'] = getinfo['searchinfo']['args'][0]
        return controller_options, partial_fit

    @staticmethod
    def handle_raw_options(controller_options):
        """Load command specific options.

        Args:
            controller_options (dict): options from handle_arguments
        Returns:
            controller_options (dict): dict of controller options
            partial_fit (dict): boolean flag for partial fit
        """
        controller_options['processor'] = 'FitBatchProcessor'
        partial_fit = False

        if 'params' in controller_options:
            try:
                fit_params = convert_params(
                    params=controller_options['params'],
                    ignore_extra=True,
                    bools=['apply', 'partial_fit'])
            except ValueError as e:
                raise RuntimeError(str(e))

            if 'apply' in fit_params:
                controller_options['apply'] = fit_params['apply']
                del controller_options['params']['apply']

                if 'model_name' not in controller_options and not fit_params[
                        'apply']:
                    raise RuntimeError(
                        'You must save a model if you are not applying it.')

            if 'partial_fit' in fit_params:
                partial_fit = fit_params['partial_fit']
                del controller_options['params']['partial_fit']

        if partial_fit:
            controller_options['processor'] = 'FitPartialProcessor'

        return controller_options, partial_fit

    def setup(self):
        """Get options, start controller & watchdog, return command type.

        Returns:
            (dict): get info response (command type) and required fields
        """
        self.controller_options, self.partial_fit = self.handle_arguments(
            self.getinfo)
        self.controller = ChunkedController(self.getinfo,
                                            self.controller_options)

        self.watchdog = command_util.get_watchdog(
            self.controller.resource_limits['max_fit_time'],
            self.controller.resource_limits['max_memory_usage_mb'],
            os.path.join(self.getinfo['searchinfo']['dispatch_dir'],
                         'finalize'))

        required_fields = self.controller.get_required_fields()
        return {'type': 'events', 'required_fields': required_fields}

    def get_output_body(self):
        """Collect output body from controller.

        Returns:
            (str): body
        """
        return self.controller.output_results()

    def handler(self, metadata, body):
        """Main handler we override from BaseChunkHandler.

        Args:
            metadata (dict): metadata information
            body (str): data payload from CEXC

        Returns:
            (dict): metadata to be sent back to CEXC
            output_body (str): data payload to be sent back to CEXC
        """
        if command_util.is_invalid_chunk(metadata):
            logger.debug('Not running without session key.')
            return {'finished': True}

        if command_util.is_getinfo_chunk(metadata):
            return self.setup()

        if self.getinfo.get('preview', False):
            logger.debug('Not running in preview.')
            return {'finished': True}

        if not self.watchdog.started:
            self.watchdog.start()

        finished_flag = metadata.get('finished', False)

        self.controller.load_data(body)

        # Partial fit should *always* execute on every chunk.
        # Non partial fit will execute on the last chunk.
        if self.partial_fit or finished_flag:
            self.controller.execute()
            output_body = self.get_output_body()
        else:
            output_body = None

        if finished_flag:
            self.controller.finalize()
            # Gracefully terminate watchdog
            if self.watchdog.started:
                self.watchdog.join()

        # Our final farewell
        self.log_performance_timers()
        return ({'finished': finished_flag}, output_body)

    def log_performance_timers(self):
        logger.debug(
            "command=fit, read_time=%f, handle_time=%f, write_time=%f, csv_parse_time=%f, csv_render_time=%f"
            % (self._read_time, self._handle_time, self._write_time,
               self.controller._csv_parse_time,
               self.controller._csv_render_time))
Пример #7
0
class ApplyCommand(BaseChunkHandler):
    """ApplyCommand uses the ChunkedController & ApplyProcessor to make
    predictions."""
    @staticmethod
    def handle_arguments(getinfo):
        """Take the getinfo metadata and return controller_options.

        Args:
            getinfo (dict): getinfo metadata

        Returns:
            controller_options (dict): options to be sent to controller
        """
        if len(getinfo['searchinfo']['args']) == 0:
            raise RuntimeError('First argument must be a saved model.')

        raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:])
        controller_options = ApplyCommand.handle_raw_options(raw_options)
        controller_options['namespace'], controller_options[
            'model_name'] = parse_namespace_model_name(
                getinfo['searchinfo']['args'][0])
        return controller_options

    @staticmethod
    def handle_raw_options(raw_options):
        """Load command specific options.

        Args:
            raw_options (dict): raw options

        Raises:
            RuntimeError

        Returns:
            raw_options (dict): modified raw_options
        """
        raw_options['processor'] = 'ApplyProcessor'

        if 'args' in raw_options:
            raise RuntimeError('Apply does not accept positional arguments.')
        return raw_options

    def setup(self):
        """Parse search string, choose processor, initialize controller.

        Returns:
            (dict): get info response (command type) and required fields. This
                response will be sent back to the CEXC process on the getinfo
                exchange (first chunk) to establish our execution type and
                required fields.
        """
        self.controller_options = self.handle_arguments(self.getinfo)
        self.controller = ChunkedController(self.getinfo,
                                            self.controller_options)

        self.watchdog = command_util.get_watchdog(
            time_limit=-1,
            memory_limit=self.controller.resource_limits['max_memory_usage_mb']
        )

        streaming_apply = is_truthy(
            conf.get_mlspl_prop('streaming_apply', default='f'))
        exec_type = 'streaming' if streaming_apply else 'stateful'

        required_fields = self.controller.get_required_fields()
        return {'type': exec_type, 'required_fields': required_fields}

    def handler(self, metadata, body):
        """Main handler we override from BaseChunkHandler.

        Handles the reading and writing of data to the CEXC process, and
        finishes negotiation of the termination of the process.

        Args:
            metadata (dict): metadata information
            body (str): data payload from CEXC

        Returns:
            (dict): metadata to be sent back to CEXC
            output_body (str): data payload to be sent back to CEXC
        """
        # Get info exchange an initialize controller, processor, algorithm
        if command_util.is_getinfo_chunk(metadata):
            return self.setup()

        finished_flag = metadata.get('finished', False)

        if not self.watchdog.started:
            self.watchdog.start()

        # Skip to next chunk if this chunk is empty
        if len(body) == 0:
            return {}

        # Load data, execute and collect results.
        self.controller.load_data(body)
        self.controller.execute()
        output_body = self.controller.output_results()

        if finished_flag:
            # Gracefully terminate watchdog
            if self.watchdog.started:
                self.watchdog.join()

        # Our final farewell
        return ({'finished': finished_flag}, output_body)