示例#1
0
文件: model.py 项目: alyssaq/python
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    self.objective_field)
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#2
0
    def __init__(self, ensemble, api=None, max_models=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.ensemble_id = None
        if isinstance(ensemble, list):
            try:
                models = [get_model_id(model) for model in ensemble]
            except ValueError:
                raise ValueError('Failed to verify the list of models. Check '
                                 'your model id values.')
            self.distributions = None
        else:
            self.ensemble_id = get_ensemble_id(ensemble)
            ensemble = check_resource(ensemble, self.api.get_ensemble)
            models = ensemble['object']['models']
            self.distributions = ensemble['object'].get('distributions', None)
        self.model_ids = models
        self.fields = self.all_model_fields()

        number_of_models = len(models)
        if max_models is None:
            self.models_splits = [models]
        else:
            self.models_splits = [models[index:(index + max_models)] for index
                                  in range(0, number_of_models, max_models)]
        if len(self.models_splits) == 1:
            models = [retrieve_resource(self.api, model_id,
                                        query_string=ONLY_MODEL)
                      for model_id in self.models_splits[0]]
            self.multi_model = MultiModel(models, self.api)
示例#3
0
    def __init__(self, ensemble, api=None, max_models=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.ensemble_id = None
        if isinstance(ensemble, list):
            try:
                models = [get_model_id(model) for model in ensemble]
            except ValueError:
                raise ValueError('Failed to verify the list of models. Check '
                                 'your model id values.')
            self.distributions = None
        else:
            self.ensemble_id = get_ensemble_id(ensemble)
            ensemble = check_resource(ensemble, self.api.get_ensemble)
            models = ensemble['object']['models']
            self.distributions = ensemble['object'].get('distributions', None)
        self.model_ids = models
        self.fields = self.all_model_fields()

        number_of_models = len(models)
        if max_models is None:
            self.models_splits = [models]
        else:
            self.models_splits = [models[index:(index + max_models)] for index
                                  in range(0, number_of_models, max_models)]
        if len(self.models_splits) == 1:
            models = [retrieve_resource(self.api, model_id,
                                        query_string=ONLY_MODEL)
                      for model_id in self.models_splits[0]]
            self.multi_model = MultiModel(models, self.api)
示例#4
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            model = retrieve_model(api, self.resource_id)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.tree = Tree(
                    model['model']['root'],
                    fields,
                    model['objective_fields'])
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#5
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            model = retrieve_model(api, self.resource_id)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.tree = Tree(
                    model['model']['root'],
                    fields,
                    model['objective_fields'])
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#6
0
    def __init__(self, model, api=None):

        if check_model_structure(model):
            self.resource_id = model["resource"]
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(api.error_message(model, resource_type="model", method="get"))
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id, query_string=query_string)
            # Stored copies of the model structure might lack some necessary
            # keys
            if not check_model_structure(model):
                model = api.get_model(self.resource_id, query_string=query_string)

        if "object" in model and isinstance(model["object"], dict):
            model = model["object"]

        if "model" in model and isinstance(model["model"], dict):
            status = get_status(model)
            if "code" in status and status["code"] == FINISHED:
                if "model_fields" in model["model"] or "fields" in model["model"]:
                    fields = model["model"].get("model_fields", model["model"].get("fields", []))
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model["model"]["fields"] for key in fields.keys()):
                        raise Exception(
                            "Some fields are missing"
                            " to generate a local model."
                            " Please, provide a model with"
                            " the complete list of fields."
                        )
                    for field in fields:
                        field_info = model["model"]["fields"][field]
                        if "summary" in field_info:
                            fields[field]["summary"] = field_info["summary"]
                        fields[field]["name"] = field_info["name"]
                objective_field = model["objective_fields"]
                ModelFields.__init__(self, fields, objective_id=extract_objective(objective_field))
                self.description = model["description"]
                self.field_importance = model["model"].get("importance", None)
                if self.field_importance:
                    self.field_importance = [element for element in self.field_importance if element[0] in fields]
                self.locale = model.get("locale", DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception(
                "Cannot create the BaseModel instance. Could not" " find the 'model' key in the resource:\n\n%s" % model
            )
示例#7
0
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(
                    api.error_message(model,
                                      resource_type='model',
                                      method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api,
                                   self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(model['model']['root'],
                                 self.fields,
                                 objective_field=self.objective_field,
                                 root_distribution=distribution,
                                 parent_id=None,
                                 ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
示例#8
0
    def __init__(self, model, api=None):

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(api.error_message(model,
                                                  resource_type='model',
                                                  method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    objective_field=self.objective_field,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
示例#9
0
def are_models_created(path, number_of_models, api):
    """Reads the model ids from the models file in the path directory

    """
    model_ids = []
    try:
        with open("%s%smodels" % (path, os.sep)) as models_file:
            for line in models_file:
                model = line.strip()
                try:
                    model_id = api.get_model_id(model)
                    model_ids.append(model_id)
                except ValueError:
                    return False, model_ids
        if len(model_ids) == number_of_models:
            return True, model_ids
        else:
            return False, model_ids
    except IOError:
        return False, model_ids
示例#10
0
    def __init__(self, model, api=None):

        if check_model_structure(model):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(api.error_message(model,
                                                  resource_type='model',
                                                  method='get'))
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
            # Stored copies of the model structure might lack some necessary
            # keys
            if not check_model_structure(model):
                model = api.get_model(self.resource_id,
                                      query_string=query_string)

        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                if ('model_fields' in model['model'] or
                    'fields' in model['model']):
                    fields = model['model'].get('model_fields',
                                                model['model'].get('fields',
                                                                   []))
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                objective_field = model['objective_fields']
                ModelFields.__init__(
                    self, fields,
                    objective_id=extract_objective(objective_field))
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#11
0
文件: model.py 项目: araymund/python
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api,
                                      self.resource_id,
                                      query_string=query_string)
        BaseModel.__init__(self, model, api=api)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                distribution = model['model']['distribution']['training']
                self.ids_map = {}
                self.tree = Tree(model['model']['root'],
                                 self.fields,
                                 objective_field=self.objective_id,
                                 root_distribution=distribution,
                                 parent_id=None,
                                 ids_map=self.ids_map)
                self.terms = {}
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
示例#12
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            query_string = '%s;%s' % (ONLY_MODEL, EXCLUDE_ROOT)
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                objective_field = model['objective_fields']
                self.objective_field = extract_objective(objective_field)
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.fields = fields
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#13
0
文件: model.py 项目: rmmx/python
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                distribution = model['model']['distribution']['training']
                self.tree = Tree(
                    model['model']['root'],
                    self.fields,
                    objective_field=self.objective_id,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map)
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
        if self.tree.regression:
            try:
                import numpy
                import scipy
                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
示例#14
0
文件: model.py 项目: javinp/python
    def __init__(self, model, api=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError(
                            "The JSON file does not seem" " to contain a valid BigML model" " representation."
                        )
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find("model/") > -1:
                        raise Exception(api.error_message(model, resource_type="model", method="get"))
                    else:
                        raise IOError("Failed to open the expected JSON file" " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s." " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and "resource" in model and model["resource"] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id, query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api)
        if "object" in model and isinstance(model["object"], dict):
            model = model["object"]

        if "model" in model and isinstance(model["model"], dict):
            status = get_status(model)
            if "code" in status and status["code"] == FINISHED:
                distribution = model["model"]["distribution"]["training"]
                # will store global information in the tree: regression and
                # max_bins number
                tree_info = {"max_bins": 0}
                self.tree = Tree(
                    model["model"]["root"],
                    self.fields,
                    objective_field=self.objective_id,
                    root_distribution=distribution,
                    parent_id=None,
                    ids_map=self.ids_map,
                    tree_info=tree_info,
                )
                self.tree.regression = tree_info["regression"]
                if self.tree.regression:
                    self._max_bins = tree_info["max_bins"]
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception(
                "Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model
            )
        if self.tree.regression:
            try:
                import numpy
                import scipy

                self.regression_ready = True
            except ImportError:
                self.regression_ready = False
示例#15
0
    def __init__(self, model, api=None, fields=None):

        if check_model_structure(model):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(api.error_message(model,
                                                  resource_type='model',
                                                  method='get'))
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
            # Stored copies of the model structure might lack some necessary
            # keys
            if not check_model_structure(model):
                model = api.get_model(self.resource_id,
                                      query_string=query_string)

        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                if (fields is None and ('model_fields' in model['model'] or
                                        'fields' in model['model'])):
                    fields = model['model'].get('model_fields',
                                                model['model'].get('fields',
                                                                   []))
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                objective_field = model['objective_fields']
                ModelFields.__init__(
                    self, fields,
                    objective_id=extract_objective(objective_field))
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#16
0
    def __init__(self, model, api=None, fields=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        if not hasattr(self, 'tree_class'):
            self.tree_class = Tree
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and \
                not fields and \
                not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api, fields=fields)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:

                # boosting models are to be handled using the BoostedTree
                # class
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.regression = \
                    not self.boosting and \
                    self.fields[self.objective_id]['optype'] == 'numeric' \
                    or (self.boosting and \
                    self.boosting.get("objective_class") is None)

                if self.boosting:
                    self.tree = BoostedTree(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id)
                else:
                    distribution = model['model']['distribution']['training']
                    # will store global information in the tree: regression and
                    # max_bins number
                    tree_info = {'max_bins': 0}
                    self.tree = self.tree_class(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id,
                        root_distribution=distribution,
                        parent_id=None,
                        ids_map=self.ids_map,
                        tree_info=tree_info)

                    self.tree.regression = tree_info['regression']

                    if self.tree.regression:
                        try:
                            import numpy
                            import scipy
                            self._max_bins = tree_info['max_bins']
                            self.regression_ready = True
                        except ImportError:
                            self.regression_ready = False
                    else:
                        root_dist = self.tree.distribution
                        self.class_names = sorted([category[0]
                                                   for category in root_dist])
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#17
0
    def __init__(self, model, api=None, fields=None, checked=True):

        check_fn = check_local_but_fields if fields is not None else \
            check_local_info
        if isinstance(model, dict) and (checked or check_fn(model)):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            self.api = get_api_connection(api)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(self.api.error_message(model,
                                                       resource_type='model',
                                                       method='get'))
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api, self.resource_id,
                                      query_string=query_string,
                                      no_check_fields=fields is not None)

        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                if (fields is None and ('model_fields' in model['model'] or
                                        'fields' in model['model'])):
                    fields = model['model'].get('model_fields',
                                                model['model'].get('fields',
                                                                   []))
                    # model_fields doesn't contain the datetime fields
                    fields.update(datetime_fields(model['model'].get('fields',
                                                                     {})))
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in list(fields.keys())):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                objective_field = model['objective_fields']
                missing_tokens = model['model'].get('missing_tokens')

                ModelFields.__init__(
                    self, fields, objective_id=extract_objective(objective_field),
                    missing_tokens=missing_tokens)
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#18
0
    def __init__(self, model, api=None, fields=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """
        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        if not hasattr(self, 'tree_class'):
            self.tree_class = Tree
        # the string can be a path to a JSON file
        if isinstance(model, basestring):
            try:
                with open(model) as model_file:
                    model = json.load(model_file)
                    self.resource_id = get_model_id(model)
                    if self.resource_id is None:
                        raise ValueError("The JSON file does not seem"
                                         " to contain a valid BigML model"
                                         " representation.")
            except IOError:
                # if it is not a path, it can be a model id
                self.resource_id = get_model_id(model)
                if self.resource_id is None:
                    if model.find('model/') > -1:
                        raise Exception(
                            api.error_message(model,
                                              resource_type='model',
                                              method='get'))
                    else:
                        raise IOError("Failed to open the expected JSON file"
                                      " at %s" % model)
            except ValueError:
                raise ValueError("Failed to interpret %s."
                                 " JSON file expected.")

        # checks whether the information needed for local predictions is in
        # the first argument
        if isinstance(model, dict) and \
                not fields and \
                not check_model_fields(model):
            # if the fields used by the model are not
            # available, use only ID to retrieve it again
            model = get_model_id(model)
            self.resource_id = model

        if not (isinstance(model, dict) and 'resource' in model
                and model['resource'] is not None):
            if api is None:
                api = BigML(storage=STORAGE)
            if fields is not None and isinstance(fields, dict):
                query_string = EXCLUDE_FIELDS
            else:
                query_string = ONLY_MODEL
            model = retrieve_resource(api,
                                      self.resource_id,
                                      query_string=query_string)
        else:
            self.resource_id = get_model_id(model)
        BaseModel.__init__(self, model, api=api, fields=fields)
        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:

                # boosting models are to be handled using the BoostedTree
                # class
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.regression = \
                    not self.boosting and \
                    self.fields[self.objective_id]['optype'] == 'numeric' \
                    or (self.boosting and \
                    self.boosting.get("objective_class") is None)

                if self.boosting:
                    self.tree = BoostedTree(model['model']['root'],
                                            self.fields,
                                            objective_field=self.objective_id)
                else:
                    distribution = model['model']['distribution']['training']
                    # will store global information in the tree: regression and
                    # max_bins number
                    tree_info = {'max_bins': 0}
                    self.tree = self.tree_class(
                        model['model']['root'],
                        self.fields,
                        objective_field=self.objective_id,
                        root_distribution=distribution,
                        parent_id=None,
                        ids_map=self.ids_map,
                        tree_info=tree_info)

                    self.tree.regression = tree_info['regression']

                    if self.tree.regression:
                        try:
                            import numpy
                            import scipy
                            self._max_bins = tree_info['max_bins']
                            self.regression_ready = True
                        except ImportError:
                            self.regression_ready = False
                    else:
                        root_dist = self.tree.distribution
                        self.class_names = sorted(
                            [category[0] for category in root_dist])
            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
示例#19
0
    def __init__(self, ensemble, api=None, max_models=None, cache_get=None):

        self.model_splits = []
        self.multi_model = None
        self.api = get_api_connection(api)
        self.fields = None
        self.class_names = None
        if use_cache(cache_get):
            # using a cache to store the model attributes
            self.__dict__ = load(get_ensemble_id(ensemble), cache_get)
            self.api = get_api_connection(api)
            if len(self.models_splits) == 1:
                # retrieve the models from a cache get function
                try:
                    models = [
                        Model(model_id, cache_get=cache_get)
                        for model_id in self.models_splits[0]
                    ]
                except Exception as exc:
                    raise Exception('Error while calling the user-given'
                                    ' function %s: %s' %
                                    (cache_get.__name__, str(exc)))
                self.multi_model = MultiModel(models,
                                              self.api,
                                              fields=self.fields,
                                              class_names=self.class_names,
                                              cache_get=cache_get)
            return

        self.resource_id = None
        self.objective_id = None
        self.distributions = None
        self.distribution = None
        self.boosting = None
        self.boosting_offsets = None
        self.cache_get = None
        self.regression = False
        self.importance = {}
        query_string = ONLY_MODEL
        no_check_fields = False
        self.input_fields = []
        if isinstance(ensemble, list):
            if all([isinstance(model, Model) for model in ensemble]):
                models = ensemble
                self.model_ids = [
                    local_model.resource_id for local_model in models
                ]
            else:
                try:
                    models = [get_model_id(model) for model in ensemble]
                    self.model_ids = models
                except ValueError as exc:
                    raise ValueError('Failed to verify the list of models.'
                                     ' Check your model id values: %s' %
                                     str(exc))

        else:
            ensemble = self.get_ensemble_resource(ensemble)
            self.resource_id = get_ensemble_id(ensemble)
            if not check_local_but_fields(ensemble):
                # avoid checking fields because of old ensembles
                ensemble = retrieve_resource(self.api,
                                             self.resource_id,
                                             no_check_fields=True)

            if ensemble['object'].get('type') == BOOSTING:
                self.boosting = ensemble['object'].get('boosting')
            models = ensemble['object']['models']
            self.distributions = ensemble['object'].get('distributions', [])
            self.importance = ensemble['object'].get('importance', [])
            self.model_ids = models
            # new ensembles have the fields structure
            if ensemble['object'].get('ensemble'):
                self.fields = ensemble['object'].get( \
                    'ensemble', {}).get("fields")
                self.objective_id = ensemble['object'].get("objective_field")
                query_string = EXCLUDE_FIELDS
                no_check_fields = True
            self.input_fields = ensemble['object'].get('input_fields')

        number_of_models = len(models)
        if max_models is None:
            self.models_splits = [models]
        else:
            self.models_splits = [
                models[index:(index + max_models)]
                for index in range(0, number_of_models, max_models)
            ]
        if len(self.models_splits) == 1:
            if not isinstance(models[0], Model):
                if use_cache(cache_get):
                    # retrieve the models from a cache get function
                    try:
                        models = [
                            Model(model_id, cache_get=cache_get)
                            for model_id in self.models_splits[0]
                        ]
                        self.cache_get = cache_get
                    except Exception as exc:
                        raise Exception('Error while calling the user-given'
                                        ' function %s: %s' %
                                        (cache_get.__name__, str(exc)))
                else:
                    models = [retrieve_resource( \
                        self.api,
                        model_id,
                        query_string=query_string,
                        no_check_fields=no_check_fields)
                              for model_id in self.models_splits[0]]
            model = models[0]

        else:
            # only retrieving first model
            self.cache_get = cache_get
            if not isinstance(models[0], Model):
                if use_cache(cache_get):
                    # retrieve the models from a cache get function
                    try:
                        model = Model(self.models_splits[0][0],
                                      cache_get=cache_get)
                        self.cache_get = cache_get
                    except Exception as exc:
                        raise Exception('Error while calling the user-given'
                                        ' function %s: %s' %
                                        (cache_get.__name__, str(exc)))
                else:
                    model = retrieve_resource( \
                        self.api,
                        self.models_splits[0][0],
                        query_string=query_string,
                        no_check_fields=no_check_fields)

                models = [model]

        if self.distributions is None:
            try:
                self.distributions = []
                for model in models:
                    self.distributions.append(
                        {'training': model.root_distribution})
            except AttributeError:
                self.distributions = [
                    model['object']['model']['distribution']
                    for model in models
                ]

        if self.boosting is None:
            self._add_models_attrs(model, max_models)

        if self.fields is None:
            self.fields, self.objective_id = self.all_model_fields(
                max_models=max_models)

        if self.fields:
            add_distribution(self)
        self.regression = \
            self.fields[self.objective_id].get('optype') == NUMERIC
        if self.boosting:
            self.boosting_offsets = ensemble['object'].get('initial_offset',
                                                           0) \
                if self.regression else dict(ensemble['object'].get( \
                    'initial_offsets', []))
        if not self.regression:
            try:
                objective_field = self.fields[self.objective_id]
                categories = objective_field['summary']['categories']
                classes = [category[0] for category in categories]
            except (AttributeError, KeyError):
                classes = set()
                for distribution in self.distributions:
                    for category in distribution['training']['categories']:
                        classes.add(category[0])

            self.class_names = sorted(classes)
            self.objective_categories = [category for \
                category, _ in self.fields[self.objective_id][ \
               "summary"]["categories"]]

        ModelFields.__init__( \
            self, self.fields,
            objective_id=self.objective_id)

        if len(self.models_splits) == 1:
            self.multi_model = MultiModel(models,
                                          self.api,
                                          fields=self.fields,
                                          class_names=self.class_names)
示例#20
0
    def __init__(self, model, api=None, fields=None, cache_get=None):
        """The Model constructor can be given as first argument:
            - a model structure
            - a model id
            - a path to a JSON file containing a model structure

        """

        if use_cache(cache_get):
            # using a cache to store the model attributes
            self.__dict__ = load(get_model_id(model), cache_get)
            return

        self.resource_id = None
        self.ids_map = {}
        self.terms = {}
        self.regression = False
        self.boosting = None
        self.class_names = None
        api = get_api_connection(api)
        # retrieving model information from
        self.resource_id, model = get_resource_dict( \
            model, "model", api=api, no_check_fields=fields is not None)

        if 'object' in model and isinstance(model['object'], dict):
            model = model['object']

        if 'model' in model and isinstance(model['model'], dict):
            status = get_status(model)
            if 'code' in status and status['code'] == FINISHED:
                # fill boosting info before creating modelfields
                if model.get("boosted_ensemble"):
                    self.boosting = model.get('boosting', False)
                if self.boosting == {}:
                    self.boosting = False

                self.input_fields = model["input_fields"]
                BaseModel.__init__(self, model, api=api, fields=fields)

                root = model['model']['root']
                self.weighted = "weighted_objective_summary" in root

                if self.boosting:
                    # build boosted tree
                    self.tree = b.build_boosting_tree( \
                        model['model']['root'])
                elif self.regression:
                    self.root_distribution = model['model'][ \
                        'distribution']['training']
                    # build regression tree
                    self.tree = r.build_regression_tree(root, \
                        distribution=self.root_distribution, \
                        weighted=self.weighted)
                else:
                    # build classification tree
                    self.root_distribution = model['model'][\
                        'distribution']['training']
                    self.laplacian_term = laplacian_term( \
                        extract_distribution(self.root_distribution)[1],
                        self.weighted)
                    self.tree = c.build_classification_tree( \
                        model['model']['root'], \
                        distribution=self.root_distribution, \
                        weighted=self.weighted)
                    self.class_names = sorted( \
                        [category[0] for category in \
                        self.root_distribution["categories"]])
                    self.objective_categories = [category for \
                        category, _ in self.fields[self.objective_id][ \
                       "summary"]["categories"]]

                if self.boosting:
                    self.tree_type = BOOSTING
                    self.offsets = b.OFFSETS
                elif self.regression:
                    self.tree_type = REGRESSION
                    self.offsets = r.OFFSETS[str(self.weighted)]
                else:
                    self.tree_type = CLASSIFICATION
                    self.offsets = c.OFFSETS[str(self.weighted)]

            else:
                raise Exception("Cannot create the Model instance."
                                " Only correctly finished models can be"
                                " used. The model status is currently:"
                                " %s\n" % STATUSES[status['code']])
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:"
                            "\n\n%s" % model)