Пример #1
0
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    self.objective_field)
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #2
0
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(
                    api.error_message(model,
                                      resource_type='model',
                                      method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api,
                                   self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(model['model']['root'],
                                 self.fields,
                                 objective_field=self.objective_field,
                                 root_distribution=distribution,
                                 parent_id=None,
                                 ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
Пример #3
0
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(api.error_message(model,
                                                  resource_type='model',
                                                  method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    objective_field=self.objective_field,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
Пример #4
0
    def field_importance_data(self):
        """Computes field importance based on the field importance information
           of the individual models in the ensemble.

        """
        field_importance = {}
        field_names = {}
        if self.importance:
            field_importance = self.importance
            field_names = {field_id: {'name': self.fields[field_id]["name"]} \
                           for field_id in field_importance.keys()}
            return [list(importance) for importance in \
                sorted(field_importance.items(), key=lambda x: x[1],
                       reverse=True)], field_names

        if (self.distributions is not None
                and isinstance(self.distributions, list)
                and all('importance' in item for item in self.distributions)):
            # Extracts importance from ensemble information
            importances = [
                model_info['importance'] for model_info in self.distributions
            ]
            for index in range(0, len(importances)):
                model_info = importances[index]
                for field_info in model_info:
                    field_id = field_info[0]
                    if field_id not in field_importance:
                        field_importance[field_id] = 0.0
                        name = self.fields[field_id]['name']
                        field_names[field_id] = {'name': name}
                    field_importance[field_id] += field_info[1]
        else:
            # Old ensembles, extracts importance from model information
            for model_id in self.model_ids:
                local_model = BaseModel(model_id, api=self.api)
                for field_info in local_model.field_importance:
                    field_id = field_info[0]
                    if field_info[0] not in field_importance:
                        field_importance[field_id] = 0.0
                        name = self.fields[field_id]['name']
                        field_names[field_id] = {'name': name}
                    field_importance[field_id] += field_info[1]

        number_of_models = len(self.model_ids)
        for field_id in field_importance:
            field_importance[field_id] /= number_of_models
        return [list(importance) for importance in \
            sorted(field_importance.items(), key=lambda x: x[1],
                   reverse=True)], field_names
Пример #5
0
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api,
                                      self.resource_id,
                                      query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(model['model']['root'],
                                 self.fields,
                                 objective_field=self.objective_id,
                                 root_distribution=distribution,
                                 parent_id=None,
                                 ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
Пример #6
0
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                distribution = model['model']['distribution']['training']
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    objective_field=self.objective_id,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map)
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
Пример #7
0
    def __init__(self, model, api=None, fields=None, cache_get=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """

        if use_cache(cache_get):
            # using a cache to store the model attributes
            self.__dict__ = load(get_model_id(model), cache_get)
            return

        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        api = get_api_connection(api)
        # retrieving model information from
        self.resource_id, model = get_resource_dict( \
            model, "model", api=api, no_check_fields=fields is not None)

        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                # fill boosting info before creating modelfields
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.input_fields = model["input_fields"]
                BaseModel.__init__(self, model, api=api, fields=fields)

                root = model['model']['root']
                self.weighted = "weighted_objective_summary" in root

                if self.boosting:
                    # build boosted tree
                    self.tree = b.build_boosting_tree( \
                        model['model']['root'])
                elif self.regression:
                    self.root_distribution = model['model'][ \
                        'distribution']['training']
                    # build regression tree
                    self.tree = r.build_regression_tree(root, \
                        distribution=self.root_distribution, \
                        weighted=self.weighted)
                else:
                    # build classification tree
                    self.root_distribution = model['model'][\
                        'distribution']['training']
                    self.laplacian_term = laplacian_term( \
                        extract_distribution(self.root_distribution)[1],
                        self.weighted)
                    self.tree = c.build_classification_tree( \
                        model['model']['root'], \
                        distribution=self.root_distribution, \
                        weighted=self.weighted)
                    self.class_names = sorted( \
                        [category[0] for category in \
                        self.root_distribution["categories"]])
                    self.objective_categories = [category for \
                        category, _ in self.fields[self.objective_id][ \
                       "summary"]["categories"]]

                if self.boosting:
                    self.tree_type = BOOSTING
                    self.offsets = b.OFFSETS
                elif self.regression:
                    self.tree_type = REGRESSION
                    self.offsets = r.OFFSETS[str(self.weighted)]
                else:
                    self.tree_type = CLASSIFICATION
                    self.offsets = c.OFFSETS[str(self.weighted)]

            else:
                raise Exception("Cannot create the Model instance."
                                " Only correctly finished models can be"
                                " used. The model status is currently:"
                                " %s\n" % STATUSES[status['code']])
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:"
                            "\n\n%s" % model)
Пример #8
0
    def __init__(self, model, api=None, fields=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        if not hasattr(self, 'tree_class'):
            self.tree_class = Tree
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and \
                not fields and \
                not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api, fields=fields)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:

                # boosting models are to be handled using the BoostedTree
                # class
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.regression = \
                    not self.boosting and \
                    self.fields[self.objective_id]['optype'] == 'numeric' \
                    or (self.boosting and \
                    self.boosting.get("objective_class") is None)

                if self.boosting:
                    self.tree = BoostedTree(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id)
                else:
                    distribution = model['model']['distribution']['training']
                    # will store global information in the tree: regression and
                    # max_bins number
                    tree_info = {'max_bins': 0}
                    self.tree = self.tree_class(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id,
                        root_distribution=distribution,
                        parent_id=None,
                        ids_map=self.ids_map,
                        tree_info=tree_info)

                    self.tree.regression = tree_info['regression']

                    if self.tree.regression:
                        try:
                            import numpy
                            import scipy
                            self._max_bins = tree_info['max_bins']
                            self.regression_ready = True
                        except ImportError:
                            self.regression_ready = False
                    else:
                        root_dist = self.tree.distribution
                        self.class_names = sorted([category[0]
                                                   for category in root_dist])
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #9
0
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError(
                            "The JSON file does not seem" " to contain a valid BigML model" " representation."
                        )
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find("model/") > -1:
                        raise Exception(api.error_message(model, resource_type="model", method="get"))
                    else:
                        raise IOError("Failed to open the expected JSON file" " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s." " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and "resource" in model and model["resource"] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id, query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api)
        if "object" in model and isinstance(model["object"], dict):
            model = model["object"]

        if "model" in model and isinstance(model["model"], dict):
            status = get_status(model)
            if "code" in status and status["code"] == FINISHED:
                distribution = model["model"]["distribution"]["training"]
                # will store global information in the tree: regression and
                # max_bins number
                tree_info = {"max_bins": 0}
                self.tree = Tree(
                    model["model"]["root"],
                    self.fields,
                    objective_field=self.objective_id,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map,
                    tree_info=tree_info,
                )
                self.tree.regression = tree_info["regression"]
                if self.tree.regression:
                    self._max_bins = tree_info["max_bins"]
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception(
                "Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model
            )
        if self.tree.regression:
            try:
                import numpy
                import scipy

                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
Пример #10
0
    def __init__(self, model, api=None, fields=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        if not hasattr(self, 'tree_class'):
            self.tree_class = Tree
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and \
                not fields and \
                not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api,
                                      self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api, fields=fields)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:

                # boosting models are to be handled using the BoostedTree
                # class
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.regression = \
                    not self.boosting and \
                    self.fields[self.objective_id]['optype'] == 'numeric' \
                    or (self.boosting and \
                    self.boosting.get("objective_class") is None)

                if self.boosting:
                    self.tree = BoostedTree(model['model']['root'],
                                            self.fields,
                                            objective_field=self.objective_id)
                else:
                    distribution = model['model']['distribution']['training']
                    # will store global information in the tree: regression and
                    # max_bins number
                    tree_info = {'max_bins': 0}
                    self.tree = self.tree_class(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id,
                        root_distribution=distribution,
                        parent_id=None,
                        ids_map=self.ids_map,
                        tree_info=tree_info)

                    self.tree.regression = tree_info['regression']

                    if self.tree.regression:
                        try:
                            import numpy
                            import scipy
                            self._max_bins = tree_info['max_bins']
                            self.regression_ready = True
                        except ImportError:
                            self.regression_ready = False
                    else:
                        root_dist = self.tree.distribution
                        self.class_names = sorted(
                            [category[0] for category in root_dist])
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)