def __init__(self, ensemble, model_fns_dir, api=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.resource_id = None # to be deprecated self.ensemble_id = None self.objective_id = None self.distributions = None self.distribution = None self.models_splits = [] self.multi_model = None self.boosting = None self.boosting_offsets = None self.regression = False self.fields = None self.class_names = None self.importance = {} self.predict_functions = [] ensemble = self.get_ensemble_resource(ensemble) self.resource_id = get_ensemble_id(ensemble) self.ensemble_id = self.resource_id if lacks_info(ensemble, inner_key="ensemble"): # avoid checking fields because of old ensembles ensemble = retrieve_resource(self.api, self.resource_id, no_check_fields=True) if ensemble['object'].get('type') == BOOSTING: self.boosting = ensemble['object'].get('boosting') models = ensemble['object']['models'] self.distributions = ensemble['object'].get('distributions', []) self.importance = ensemble['object'].get('importance', []) self.model_ids = models # new ensembles have the fields structure if ensemble['object'].get('ensemble'): self.fields = ensemble['object'].get( \ 'ensemble', {}).get("fields") self.objective_id = ensemble['object'].get("objective_field") self.input_fields = ensemble['object'].get("input_fields") if model_fns_dir: self.get_model_fns(model_fns_dir) else: raise ValueError("The EnsemblePredictor object expects as" " argument the directory where the models" " predict functions are stored. To generate " " them, please check the 'bigmler export'" " command.") if self.fields: summary = self.fields[self.objective_id]['summary'] if 'bins' in summary: distribution = summary['bins'] elif 'counts' in summary: distribution = summary['counts'] elif 'categories' in summary: distribution = summary['categories'] else: distribution = [] self.distribution = distribution self.regression = \ self.fields[self.objective_id].get('optype') == 'numeric' if self.boosting: self.boosting_offsets = ensemble['object'].get('initial_offset', 0) \ if self.regression else dict(ensemble['object'].get( \ 'initial_offsets', [])) if not self.regression and self.boosting is None: try: objective_field = self.fields[self.objective_id] categories = objective_field['summary']['categories'] classes = [category[0] for category in categories] except (AttributeError, KeyError): classes = set() for distribution in self.distributions: for category in distribution['training']['categories']: classes.add(category[0]) self.class_names = sorted(classes)