Пример #1
0
    def __init__(self, ensemble, model_fns_dir, api=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.resource_id = None
        # to be deprecated
        self.ensemble_id = None
        self.objective_id = None
        self.distributions = None
        self.distribution = None
        self.models_splits = []
        self.multi_model = None
        self.boosting = None
        self.boosting_offsets = None
        self.regression = False
        self.fields = None
        self.class_names = None
        self.importance = {}
        self.predict_functions = []

        ensemble = self.get_ensemble_resource(ensemble)
        self.resource_id = get_ensemble_id(ensemble)
        self.ensemble_id = self.resource_id

        if lacks_info(ensemble, inner_key="ensemble"):
            # avoid checking fields because of old ensembles
            ensemble = retrieve_resource(self.api,
                                         self.resource_id,
                                         no_check_fields=True)
        if ensemble['object'].get('type') == BOOSTING:
            self.boosting = ensemble['object'].get('boosting')
        models = ensemble['object']['models']
        self.distributions = ensemble['object'].get('distributions', [])
        self.importance = ensemble['object'].get('importance', [])
        self.model_ids = models
        # new ensembles have the fields structure
        if ensemble['object'].get('ensemble'):
            self.fields = ensemble['object'].get( \
                'ensemble', {}).get("fields")
            self.objective_id = ensemble['object'].get("objective_field")
            self.input_fields = ensemble['object'].get("input_fields")

        if model_fns_dir:
            self.get_model_fns(model_fns_dir)
        else:
            raise ValueError("The EnsemblePredictor object expects as"
                             " argument the directory where the models"
                             " predict functions are stored. To generate "
                             " them, please check the 'bigmler export'"
                             " command.")

        if self.fields:
            summary = self.fields[self.objective_id]['summary']
            if 'bins' in summary:
                distribution = summary['bins']
            elif 'counts' in summary:
                distribution = summary['counts']
            elif 'categories' in summary:
                distribution = summary['categories']
            else:
                distribution = []
            self.distribution = distribution

        self.regression = \
            self.fields[self.objective_id].get('optype') == 'numeric'
        if self.boosting:
            self.boosting_offsets = ensemble['object'].get('initial_offset',
                                                           0) \
                if self.regression else dict(ensemble['object'].get( \
                    'initial_offsets', []))

        if not self.regression and self.boosting is None:
            try:
                objective_field = self.fields[self.objective_id]
                categories = objective_field['summary']['categories']
                classes = [category[0] for category in categories]
            except (AttributeError, KeyError):
                classes = set()
                for distribution in self.distributions:
                    for category in distribution['training']['categories']:
                        classes.add(category[0])

            self.class_names = sorted(classes)
Пример #2
0
    def __init__(self, ensemble, model_fns_dir, api=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.resource_id = None
        # to be deprecated
        self.ensemble_id = None
        self.objective_id = None
        self.distributions = None
        self.distribution = None
        self.models_splits = []
        self.multi_model = None
        self.boosting = None
        self.boosting_offsets = None
        self.regression = False
        self.fields = None
        self.class_names = None
        self.importance = {}
        self.predict_functions = []

        ensemble = self.get_ensemble_resource(ensemble)
        self.resource_id = get_ensemble_id(ensemble)
        self.ensemble_id = self.resource_id

        if lacks_info(ensemble, inner_key="ensemble"):
            # avoid checking fields because of old ensembles
            ensemble = retrieve_resource(self.api, self.resource_id,
                                         no_check_fields=True)
        if ensemble['object'].get('type') == BOOSTING:
            self.boosting = ensemble['object'].get('boosting')
        models = ensemble['object']['models']
        self.distributions = ensemble['object'].get('distributions', [])
        self.importance = ensemble['object'].get('importance', [])
        self.model_ids = models
        # new ensembles have the fields structure
        if ensemble['object'].get('ensemble'):
            self.fields = ensemble['object'].get( \
                'ensemble', {}).get("fields")
            self.objective_id = ensemble['object'].get("objective_field")
            self.input_fields = ensemble['object'].get("input_fields")

        if model_fns_dir:
            self.get_model_fns(model_fns_dir)
        else:
            raise ValueError("The EnsemblePredictor object expects as"
                             " argument the directory where the models"
                             " predict functions are stored. To generate "
                             " them, please check the 'bigmler export'"
                             " command.")

        if self.fields:
            summary = self.fields[self.objective_id]['summary']
            if 'bins' in summary:
                distribution = summary['bins']
            elif 'counts' in summary:
                distribution = summary['counts']
            elif 'categories' in summary:
                distribution = summary['categories']
            else:
                distribution = []
            self.distribution = distribution

        self.regression = \
            self.fields[self.objective_id].get('optype') == 'numeric'
        if self.boosting:
            self.boosting_offsets = ensemble['object'].get('initial_offset',
                                                           0) \
                if self.regression else dict(ensemble['object'].get( \
                    'initial_offsets', []))

        if not self.regression and self.boosting is None:
            try:
                objective_field = self.fields[self.objective_id]
                categories = objective_field['summary']['categories']
                classes = [category[0] for category in categories]
            except (AttributeError, KeyError):
                classes = set()
                for distribution in self.distributions:
                    for category in distribution['training']['categories']:
                        classes.add(category[0])

            self.class_names = sorted(classes)