def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): self.tree = Tree( model['model']['root'], self.fields, self.objective_field) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model)
def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception( api.error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree(model['model']['root'], self.fields, objective_field=self.objective_field, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(api.error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree( model['model']['root'], self.fields, objective_field=self.objective_field, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def field_importance_data(self): """Computes field importance based on the field importance information of the individual models in the ensemble. """ field_importance = {} field_names = {} if self.importance: field_importance = self.importance field_names = {field_id: {'name': self.fields[field_id]["name"]} \ for field_id in field_importance.keys()} return [list(importance) for importance in \ sorted(field_importance.items(), key=lambda x: x[1], reverse=True)], field_names if (self.distributions is not None and isinstance(self.distributions, list) and all('importance' in item for item in self.distributions)): # Extracts importance from ensemble information importances = [ model_info['importance'] for model_info in self.distributions ] for index in range(0, len(importances)): model_info = importances[index] for field_info in model_info: field_id = field_info[0] if field_id not in field_importance: field_importance[field_id] = 0.0 name = self.fields[field_id]['name'] field_names[field_id] = {'name': name} field_importance[field_id] += field_info[1] else: # Old ensembles, extracts importance from model information for model_id in self.model_ids: local_model = BaseModel(model_id, api=self.api) for field_info in local_model.field_importance: field_id = field_info[0] if field_info[0] not in field_importance: field_importance[field_id] = 0.0 name = self.fields[field_id]['name'] field_names[field_id] = {'name': name} field_importance[field_id] += field_info[1] number_of_models = len(self.model_ids) for field_id in field_importance: field_importance[field_id] /= number_of_models return [list(importance) for importance in \ sorted(field_importance.items(), key=lambda x: x[1], reverse=True)], field_names
def __init__(self, model, api=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ # the string can be a path to a JSON file if isinstance(model, basestring): try: with open(model) as model_file: model = json.load(model_file) self.resource_id = get_model_id(model) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML model" " representation.") except IOError: # if it is not a path, it can be a model id self.resource_id = get_model_id(model) if self.resource_id is None: if model.find('model/') > -1: raise Exception( api.error_message(model, resource_type='model', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if 'object' in model and isinstance(model['object'], dict): model = model['object'] if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree(model['model']['root'], self.fields, objective_field=self.objective_id, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def __init__(self, model, api=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ self.resource_id = None self.ids_map = {} self.terms = {} # the string can be a path to a JSON file if isinstance(model, basestring): try: with open(model) as model_file: model = json.load(model_file) self.resource_id = get_model_id(model) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML model" " representation.") except IOError: # if it is not a path, it can be a model id self.resource_id = get_model_id(model) if self.resource_id is None: if model.find('model/') > -1: raise Exception( api.error_message(model, resource_type='model', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_model_id(model) BaseModel.__init__(self, model, api=api) if 'object' in model and isinstance(model['object'], dict): model = model['object'] if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: distribution = model['model']['distribution']['training'] self.tree = Tree( model['model']['root'], self.fields, objective_field=self.objective_id, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def __init__(self, model, api=None, fields=None, cache_get=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ if use_cache(cache_get): # using a cache to store the model attributes self.__dict__ = load(get_model_id(model), cache_get) return self.resource_id = None self.ids_map = {} self.terms = {} self.regression = False self.boosting = None self.class_names = None api = get_api_connection(api) # retrieving model information from self.resource_id, model = get_resource_dict( \ model, "model", api=api, no_check_fields=fields is not None) if 'object' in model and isinstance(model['object'], dict): model = model['object'] if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: # fill boosting info before creating modelfields if model.get("boosted_ensemble"): self.boosting = model.get('boosting', False) if self.boosting == {}: self.boosting = False self.input_fields = model["input_fields"] BaseModel.__init__(self, model, api=api, fields=fields) root = model['model']['root'] self.weighted = "weighted_objective_summary" in root if self.boosting: # build boosted tree self.tree = b.build_boosting_tree( \ model['model']['root']) elif self.regression: self.root_distribution = model['model'][ \ 'distribution']['training'] # build regression tree self.tree = r.build_regression_tree(root, \ distribution=self.root_distribution, \ weighted=self.weighted) else: # build classification tree self.root_distribution = model['model'][\ 'distribution']['training'] self.laplacian_term = laplacian_term( \ extract_distribution(self.root_distribution)[1], self.weighted) self.tree = c.build_classification_tree( \ model['model']['root'], \ distribution=self.root_distribution, \ weighted=self.weighted) self.class_names = sorted( \ [category[0] for category in \ self.root_distribution["categories"]]) self.objective_categories = [category for \ category, _ in self.fields[self.objective_id][ \ "summary"]["categories"]] if self.boosting: self.tree_type = BOOSTING self.offsets = b.OFFSETS elif self.regression: self.tree_type = REGRESSION self.offsets = r.OFFSETS[str(self.weighted)] else: self.tree_type = CLASSIFICATION self.offsets = c.OFFSETS[str(self.weighted)] else: raise Exception("Cannot create the Model instance." " Only correctly finished models can be" " used. The model status is currently:" " %s\n" % STATUSES[status['code']]) else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:" "\n\n%s" % model)
def __init__(self, model, api=None, fields=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ self.resource_id = None self.ids_map = {} self.terms = {} self.regression = False self.boosting = None self.class_names = None if not hasattr(self, 'tree_class'): self.tree_class = Tree # the string can be a path to a JSON file if isinstance(model, basestring): try: with open(model) as model_file: model = json.load(model_file) self.resource_id = get_model_id(model) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML model" " representation.") except IOError: # if it is not a path, it can be a model id self.resource_id = get_model_id(model) if self.resource_id is None: if model.find('model/') > -1: raise Exception( api.error_message(model, resource_type='model', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") # checks whether the information needed for local predictions is in # the first argument if isinstance(model, dict) and \ not fields and \ not check_model_fields(model): # if the fields used by the model are not # available, use only ID to retrieve it again model = get_model_id(model) self.resource_id = model if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) if fields is not None and isinstance(fields, dict): query_string = EXCLUDE_FIELDS else: query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_model_id(model) BaseModel.__init__(self, model, api=api, fields=fields) if 'object' in model and isinstance(model['object'], dict): model = model['object'] if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: # boosting models are to be handled using the BoostedTree # class if model.get("boosted_ensemble"): self.boosting = model.get('boosting', False) if self.boosting == {}: self.boosting = False self.regression = \ not self.boosting and \ self.fields[self.objective_id]['optype'] == 'numeric' \ or (self.boosting and \ self.boosting.get("objective_class") is None) if self.boosting: self.tree = BoostedTree( model['model']['root'], self.fields, objective_field=self.objective_id) else: distribution = model['model']['distribution']['training'] # will store global information in the tree: regression and # max_bins number tree_info = {'max_bins': 0} self.tree = self.tree_class( model['model']['root'], self.fields, objective_field=self.objective_id, root_distribution=distribution, parent_id=None, ids_map=self.ids_map, tree_info=tree_info) self.tree.regression = tree_info['regression'] if self.tree.regression: try: import numpy import scipy self._max_bins = tree_info['max_bins'] self.regression_ready = True except ImportError: self.regression_ready = False else: root_dist = self.tree.distribution self.class_names = sorted([category[0] for category in root_dist]) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model)
def __init__(self, model, api=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ self.resource_id = None self.ids_map = {} self.terms = {} # the string can be a path to a JSON file if isinstance(model, basestring): try: with open(model) as model_file: model = json.load(model_file) self.resource_id = get_model_id(model) if self.resource_id is None: raise ValueError( "The JSON file does not seem" " to contain a valid BigML model" " representation." ) except IOError: # if it is not a path, it can be a model id self.resource_id = get_model_id(model) if self.resource_id is None: if model.find("model/") > -1: raise Exception(api.error_message(model, resource_type="model", method="get")) else: raise IOError("Failed to open the expected JSON file" " at %s" % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") # checks whether the information needed for local predictions is in # the first argument if isinstance(model, dict) and not check_model_fields(model): # if the fields used by the model are not # available, use only ID to retrieve it again model = get_model_id(model) self.resource_id = model if not (isinstance(model, dict) and "resource" in model and model["resource"] is not None): if api is None: api = BigML(storage=STORAGE) query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_model_id(model) BaseModel.__init__(self, model, api=api) if "object" in model and isinstance(model["object"], dict): model = model["object"] if "model" in model and isinstance(model["model"], dict): status = get_status(model) if "code" in status and status["code"] == FINISHED: distribution = model["model"]["distribution"]["training"] # will store global information in the tree: regression and # max_bins number tree_info = {"max_bins": 0} self.tree = Tree( model["model"]["root"], self.fields, objective_field=self.objective_id, root_distribution=distribution, parent_id=None, ids_map=self.ids_map, tree_info=tree_info, ) self.tree.regression = tree_info["regression"] if self.tree.regression: self._max_bins = tree_info["max_bins"] else: raise Exception("The model isn't finished yet") else: raise Exception( "Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model ) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def __init__(self, model, api=None, fields=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure """ self.resource_id = None self.ids_map = {} self.terms = {} self.regression = False self.boosting = None self.class_names = None if not hasattr(self, 'tree_class'): self.tree_class = Tree # the string can be a path to a JSON file if isinstance(model, basestring): try: with open(model) as model_file: model = json.load(model_file) self.resource_id = get_model_id(model) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML model" " representation.") except IOError: # if it is not a path, it can be a model id self.resource_id = get_model_id(model) if self.resource_id is None: if model.find('model/') > -1: raise Exception( api.error_message(model, resource_type='model', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") # checks whether the information needed for local predictions is in # the first argument if isinstance(model, dict) and \ not fields and \ not check_model_fields(model): # if the fields used by the model are not # available, use only ID to retrieve it again model = get_model_id(model) self.resource_id = model if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) if fields is not None and isinstance(fields, dict): query_string = EXCLUDE_FIELDS else: query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_model_id(model) BaseModel.__init__(self, model, api=api, fields=fields) if 'object' in model and isinstance(model['object'], dict): model = model['object'] if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: # boosting models are to be handled using the BoostedTree # class if model.get("boosted_ensemble"): self.boosting = model.get('boosting', False) if self.boosting == {}: self.boosting = False self.regression = \ not self.boosting and \ self.fields[self.objective_id]['optype'] == 'numeric' \ or (self.boosting and \ self.boosting.get("objective_class") is None) if self.boosting: self.tree = BoostedTree(model['model']['root'], self.fields, objective_field=self.objective_id) else: distribution = model['model']['distribution']['training'] # will store global information in the tree: regression and # max_bins number tree_info = {'max_bins': 0} self.tree = self.tree_class( model['model']['root'], self.fields, objective_field=self.objective_id, root_distribution=distribution, parent_id=None, ids_map=self.ids_map, tree_info=tree_info) self.tree.regression = tree_info['regression'] if self.tree.regression: try: import numpy import scipy self._max_bins = tree_info['max_bins'] self.regression_ready = True except ImportError: self.regression_ready = False else: root_dist = self.tree.distribution self.class_names = sorted( [category[0] for category in root_dist]) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model)