示例#1
0
    def load_resource_limits(algo_name):
        """Load algorithm specific resource limits.

        Args:
            algo_name (str): algo_name

        Returns:
            resource_limits (dict): dictionary of resource limits including
                max_fit_time, max_memory_usage_mb, and max_model_size_mb
        """
        resource_limits = {}
        resource_limits['max_memory_usage_mb'] = int(conf.get_mlspl_prop('max_memory_usage_mb', algo_name, -1))
        resource_limits['max_fit_time'] = int(conf.get_mlspl_prop('max_fit_time', algo_name, -1))
        resource_limits['max_model_size_mb'] = int(conf.get_mlspl_prop('max_model_size_mb', algo_name, -1))
        return resource_limits
示例#2
0
def tree_summary(algo, options=None):
    """Create summary for tree based models.

    Args:
        algo (object): an algo object
        options (dict): options

    Returns:
        (dataframe): dataframe representation of the tree summary
    """
    if options:
        out_params = convert_params(options.get('params', {}),
                                    ints=["limit"],
                                    bools=["json"])
        if "json" in out_params:
            return_json = out_params["json"]
        if "limit" in out_params:
            depth_limit = out_params["limit"]

    if 'return_json' not in locals():
        return_json = is_truthy(
            conf.get_mlspl_prop('summary_return_json', algo.__class__.__name__,
                                'f'))
    if 'depth_limit' not in locals():
        depth_limit = int(
            conf.get_mlspl_prop('summary_depth_limit', algo.__class__.__name__,
                                -1))
    if depth_limit <= 0:
        raise ValueError(
            'Limit = %d. Value for limit should be greater than 0.' %
            depth_limit)

    root = 0
    depth = 0
    if return_json:
        output_data = [
            json.dumps(tree_summary_dict(algo, depth_limit, root, depth),
                       sort_keys=True)
        ]
    else:
        output_data = tree_summary_str(algo, depth_limit, root, depth)
    return pd.DataFrame({'Decision Tree Summary': output_data})
示例#3
0
    def load_resource_limits(algo_name):
        """Load algorithm-specific limits.

        Args:
            algo_name (str): algorithm name

        Returns:
            resource_limits (dict): dictionary of resource limits
        """
        resource_limits = {}
        resource_limits['max_memory_usage_mb'] = int(
            conf.get_mlspl_prop('max_memory_usage_mb', algo_name, -1))
        return resource_limits
示例#4
0
    def load_sampler_limits(process_options, algo_name):
        """Read sampling limits from conf file and decide sample count.

        Args:
            process_options (dict): process options
            algo_name (str): algo name

        Returns:
            sampler_limits (dict): sampler limits
        """
        sampler_limits = {}

        # setting up the logic to choose the sample count
        sampler_limits['use_sampling'] = is_truthy(str(conf.get_mlspl_prop('use_sampling', algo_name, 'yes')))
        max_inputs = int(conf.get_mlspl_prop('max_inputs', algo_name, -1))
        if process_options['sample_count']:
            sampler_limits['sample_count'] = min(process_options['sample_count'], max_inputs)
        else:
            sampler_limits['sample_count'] = max_inputs

        # simply set sample seed
        sampler_limits['sample_seed'] = process_options['sample_seed']
        return sampler_limits
示例#5
0
文件: df_util.py 项目: TPLink32/spnk1
def get_unseen_value_behavior(options):
    """Load options for handling new values in categorical fields.

    Args:
        options (dict): options

    Returns:
        handle_new_cat (str): the choice to handle new values
    """
    handle_new_cat = conf.get_mlspl_prop('handle_new_cat',
                                         stanza='default',
                                         default='default')

    if 'params' in options:
        if options['params'].get('unseen_value', []):
            handle_new_cat = options['params']['unseen_value']
            del options['params']['unseen_value']

    return handle_new_cat
示例#6
0
    def setup(self):
        """Parse search string, choose processor, initialize controller.

        Returns:
            (dict): get info response (command type) and required fields. This
                response will be sent back to the CEXC process on the getinfo
                exchange (first chunk) to establish our execution type and
                required fields.
        """
        self.controller_options = self.handle_arguments(self.getinfo)
        self.controller = ChunkedController(self.getinfo,
                                            self.controller_options)

        self.watchdog = command_util.get_watchdog(
            time_limit=-1,
            memory_limit=self.controller.resource_limits['max_memory_usage_mb']
        )

        streaming_apply = is_truthy(
            conf.get_mlspl_prop('streaming_apply', default='f'))
        exec_type = 'streaming' if streaming_apply else 'stateful'

        required_fields = self.controller.get_required_fields()
        return {'type': exec_type, 'required_fields': required_fields}
示例#7
0
        # Our final farewell
        self.log_performance_timers()
        return ({'finished': finished_flag}, output_body)

    def log_performance_timers(self):
        logger.debug(
            "command=fit, read_time=%f, handle_time=%f, write_time=%f, csv_parse_time=%f, csv_render_time=%f"
            % (self._read_time, self._handle_time, self._write_time,
               self.controller._csv_parse_time,
               self.controller._csv_render_time))


if __name__ == "__main__":
    logger.debug("Starting fit.py.")
    do_profile = is_truthy(conf.get_mlspl_prop('profile', 'default', 'n'))

    if do_profile:
        import cProfile
        import pstats

        pr = cProfile.Profile()
        pr.enable()

    FitCommand(handler_data=BaseChunkHandler.DATA_RAW).run()

    if do_profile:
        pr.disable()
        s = StringIO()
        ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
        ps.print_stats(10)