Пример #1
0
    def __init__(self,
                 fields,
                 missing_tokens=[''],
                 data_locale=DEFAULT_LOCALE,
                 verbose=False,
                 objective_field=None,
                 objective_field_present=False,
                 include=None):

        find_locale(data_locale, verbose)

        self.fields = fields
        self.fields_by_name = invert_dictionary(fields, 'name')
        self.fields_by_column_number = invert_dictionary(
            fields, 'column_number')
        self.missing_tokens = missing_tokens
        self.fields_columns = sorted(self.fields_by_column_number.keys())
        # Ids of the fields to be included
        self.filtered_fields = (self.fields.keys()
                                if include is None else include)
        # To be updated in update_objective_field
        self.row_ids = None
        self.headers = None
        self.objective_field = None
        self.objective_field_present = None
        self.filtered_indexes = None
        self.update_objective_field(objective_field, objective_field_present)
Пример #2
0
 def __init__(self, model):
     if (isinstance(model, dict) and 'object' in model and
             isinstance(model['object'], dict)):
         if ('status' in model['object'] and
                 'code' in model['object']['status']):
             if model['object']['status']['code'] == FINISHED:
                 fields = model['object']['model']['fields']
                 self.inverted_fields = invert_dictionary(fields)
                 self.tree = Tree(
                     model['object']['model']['root'],
                     fields,
                     model['object']['objective_fields'])
             else:
                 raise Exception("The model isn't finished yet")
     elif (isinstance(model, dict) and 'model' in model and
             isinstance(model['model'], dict)):
         if ('status' in model and 'code' in model['status']):
             if model['status']['code'] == FINISHED:
                 fields = model['model']['fields']
                 self.inverted_fields = invert_dictionary(fields)
                 self.tree = Tree(
                     model['model']['root'],
                     fields,
                     model['objective_fields'])
             else:
                 raise Exception("The model isn't finished yet")
     else:
         raise Exception("Invalid model structure")
Пример #3
0
    def __init__(
        self,
        fields,
        missing_tokens=[""],
        data_locale=DEFAULT_LOCALE,
        verbose=False,
        objective_field=None,
        objective_field_present=False,
        include=None,
    ):

        find_locale(data_locale, verbose)

        self.fields = fields
        self.fields_by_name = invert_dictionary(fields, "name")
        self.fields_by_column_number = invert_dictionary(fields, "column_number")
        self.missing_tokens = missing_tokens
        self.fields_columns = sorted(self.fields_by_column_number.keys())
        # Ids of the fields to be included
        self.filtered_fields = self.fields.keys() if include is None else include
        # To be updated in update_objective_field
        self.row_ids = None
        self.headers = None
        self.objective_field = None
        self.objective_field_present = None
        self.filtered_indexes = None
        self.update_objective_field(objective_field, objective_field_present)
Пример #4
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            model = retrieve_model(api, self.resource_id)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.tree = Tree(
                    model['model']['root'],
                    fields,
                    model['objective_fields'])
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #5
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            if api is None:
                api = BigML(storage=STORAGE)
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            model = retrieve_model(api, self.resource_id)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.tree = Tree(
                    model['model']['root'],
                    fields,
                    model['objective_fields'])
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the Model instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #6
0
    def __init__(self,
                 resource_or_fields,
                 missing_tokens=None,
                 data_locale=None,
                 verbose=False,
                 objective_field=None,
                 objective_field_present=False,
                 include=None,
                 errors=None):

        # The constructor can be instantiated with resources or a fields
        # structure. The structure is checked and fields structure is returned
        # if a resource type is matched.
        try:
            resource_info = get_fields_structure(resource_or_fields, True)
            (self.fields, resource_locale, resource_missing_tokens,
             objective_column, resource_errors) = resource_info
            if data_locale is None:
                data_locale = resource_locale
            if missing_tokens is None:
                if resource_missing_tokens:
                    missing_tokens = resource_missing_tokens
            if errors is None:
                errors = resource_errors
        except ValueError:
            # If the resource structure is not in the expected set, fields
            # structure is assumed
            self.fields = resource_or_fields
            if data_locale is None:
                data_locale = DEFAULT_LOCALE
            if missing_tokens is None:
                missing_tokens = DEFAULT_MISSING_TOKENS
            objective_column = None
        if self.fields is None:
            raise ValueError("No fields structure was found.")
        self.fields_by_name = invert_dictionary(self.fields, 'name')
        self.fields_by_column_number = invert_dictionary(
            self.fields, 'column_number')
        find_locale(data_locale, verbose)
        self.missing_tokens = missing_tokens
        self.fields_columns = sorted(self.fields_by_column_number.keys())
        # Ids of the fields to be included
        self.filtered_fields = (self.fields.keys()
                                if include is None else include)
        # To be updated in update_objective_field
        self.row_ids = None
        self.headers = None
        self.objective_field = None
        self.objective_field_present = None
        self.filtered_indexes = None
        self.field_errors = errors
        # if the objective field is not set by the user
        # use the one extracted from the resource info
        if objective_field is None and objective_column is not None:
            objective_field = objective_column
            objective_field_present = True
        self.update_objective_field(objective_field, objective_field_present)
Пример #7
0
    def __init__(self, fields, missing_tokens=[''],
                 data_locale=DEFAULT_LOCALE, verbose=False):

        find_locale(data_locale, verbose)

        self.fields = fields
        self.fields_by_name = invert_dictionary(fields, 'name')
        self.fields_by_column_number = invert_dictionary(fields,
                                                         'column_number')
        self.missing_tokens = missing_tokens
Пример #8
0
    def __init__(self, resource_or_fields, missing_tokens=None,
                 data_locale=None, verbose=False,
                 objective_field=None, objective_field_present=False,
                 include=None, errors=None):

        # The constructor can be instantiated with resources or a fields
        # structure. The structure is checked and fields structure is returned
        # if a resource type is matched.
        try:
            resource_info = get_fields_structure(resource_or_fields, True)
            (self.fields,
             resource_locale,
             resource_missing_tokens,
             objective_column,
             resource_errors) = resource_info
            if data_locale is None:
                data_locale = resource_locale
            if missing_tokens is None:
                if resource_missing_tokens:
                    missing_tokens = resource_missing_tokens
            if errors is None:
                errors = resource_errors
        except ValueError:
            # If the resource structure is not in the expected set, fields
            # structure is assumed
            self.fields = resource_or_fields
            if data_locale is None:
                data_locale = DEFAULT_LOCALE
            if missing_tokens is None:
                missing_tokens = DEFAULT_MISSING_TOKENS
            objective_column = None
        if self.fields is None:
            raise ValueError("No fields structure was found.")
        self.fields_by_name = invert_dictionary(self.fields, 'name')
        self.fields_by_column_number = invert_dictionary(self.fields,
                                                         'column_number')
        find_locale(data_locale, verbose)
        self.missing_tokens = missing_tokens
        self.fields_columns = sorted(self.fields_by_column_number.keys())
        # Ids of the fields to be included
        self.filtered_fields = (self.fields.keys() if include is None
                                else include)
        # To be updated in update_objective_field
        self.row_ids = None
        self.headers = None
        self.objective_field = None
        self.objective_field_present = None
        self.filtered_indexes = None
        self.field_errors = errors
        # if the objective field is not set by the user
        # use the one extracted from the resource info
        if objective_field is None and objective_column is not None:
            objective_field = objective_column
            objective_field_present = True
        self.update_objective_field(objective_field, objective_field_present)
Пример #9
0
    def __init__(self, model):

        if (isinstance(model, dict) and 'resource' in model):
            self.resource_id = model['resource']
        else:
            raise Exception("Invalid model structure")

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            if ('status' in model and 'code' in model['status']):
                if model['status']['code'] == FINISHED:
                    if 'model_fields' in model['model']:
                        fields = model['model']['model_fields']
                        # pagination or exclusion might cause a field not to
                        # be in available fields dict
                        if not all(key in model['model']['fields']
                                   for key in fields.keys()):
                            raise Exception("Some fields are missing"
                                            " to generate a local model."
                                            " Please, provide a model with"
                                            " the complete list of fields.")
                        for field in fields:
                            field_info = model['model']['fields'][field]
                            fields[field]['summary'] = field_info['summary']
                            fields[field]['name'] = field_info['name']
                    else:
                        fields = model['model']['fields']
                    self.inverted_fields = invert_dictionary(fields)
                    self.all_inverted_fields = invert_dictionary(model['model']
                                                                 ['fields'])
                    self.tree = Tree(
                        model['model']['root'],
                        fields,
                        model['objective_fields'])
                    self.description = model['description']
                    self.field_importance = model['model'].get('importance',
                                                               None)
                    if self.field_importance:
                        self.field_importance = [element for element
                                                 in self.field_importance
                                                 if element[0] in fields]
                    self.locale = model.get('locale', DEFAULT_LOCALE)

                else:
                    raise Exception("The model isn't finished yet")
        else:
            raise Exception("Invalid model structure")
Пример #10
0
    def create_prediction(self, model, input_data=None, args=None,
                          wait_time=3):
        """Creates a new prediction.

        """
        model_id = get_model_id(model)

        if model_id:
            if wait_time > 0:
                while not self.model_is_ready(model_id):
                    time.sleep(wait_time)

            if input_data is None:
                input_data = {}
            else:
                fields = self.get_fields(model_id)
                inverted_fields = invert_dictionary(fields)
                try:
                    input_data = dict(
                        [[inverted_fields[key], value]
                         for key, value in input_data.items()])
                except KeyError, field:
                    LOGGER.error("Wrong field name %s" % field)

            if args is None:
                args = {}
            args.update({
                "model": model_id,
                "input_data": input_data})
            body = json.dumps(args)
            return self._create(self.prediction_url, body)
Пример #11
0
    def __init__(self, fields, objective_id=None, data_locale=None,
                 missing_tokens=None, terms=False, categories=False,
                 numerics=False):
        if isinstance(fields, dict):
            try:
                self.objective_id = objective_id
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = {}
                self.fields.update(fields)
                self.data_locale = data_locale
                self.missing_tokens = missing_tokens
                if self.data_locale is None:
                    self.data_locale = DEFAULT_LOCALE
                if self.missing_tokens is None:
                    self.missing_tokens = DEFAULT_MISSING_TOKENS
                if terms:
                    # adding text and items information to handle terms
                    # expansion
                    self.term_forms = {}
                    self.tag_clouds = {}
                    self.term_analysis = {}
                    self.items = {}
                    self.item_analysis = {}
                if categories:
                    self.categories = {}
                if terms or categories or numerics:
                    self.add_terms(categories, numerics)

            except KeyError:
                raise Exception("Wrong field structure.")
Пример #12
0
    def create_prediction(self, model, input_data=None, by_name=True,
                          args=None, wait_time=3):
        """Creates a new prediction.

        """
        model_id = get_model_id(model)

        if model_id:
            if wait_time > 0:
                while not self.model_is_ready(model_id):
                    time.sleep(wait_time)

            if input_data is None:
                input_data = {}
            elif by_name:
                fields = self.get_fields(model_id)
                inverted_fields = invert_dictionary(fields)
                wrong_keys = [key for key in input_data.keys() if not key
                              in inverted_fields]
                if wrong_keys:
                    LOGGER.error(("Some input fields are"
                                  " not used in the model: %s") %
                                 ", ".join(wrong_keys))
                input_data = dict(
                    [[inverted_fields[key], value]
                     for key, value in input_data.items()
                     if key in inverted_fields])

            if args is None:
                args = {}
            args.update({
                "model": model_id,
                "input_data": input_data})
            body = json.dumps(args)
            return self._create(self.prediction_url, body)
Пример #13
0
    def __init__(self, fields, objective_id=None, data_locale=None,
                 missing_tokens=None, terms=False, categories=False,
                 numerics=False):
        if isinstance(fields, dict):
            try:
                self.objective_id = objective_id
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = {}
                self.fields.update(fields)
                self.data_locale = data_locale
                self.missing_tokens = missing_tokens
                if self.data_locale is None:
                    self.data_locale = DEFAULT_LOCALE
                if self.missing_tokens is None:
                    self.missing_tokens = DEFAULT_MISSING_TOKENS
                if terms:
                    # adding text and items information to handle terms
                    # expansion
                    self.term_forms = {}
                    self.tag_clouds = {}
                    self.term_analysis = {}
                    self.items = {}
                    self.item_analysis = {}
                if categories:
                    self.categories = {}
                if terms or categories or numerics:
                    self.add_terms(categories, numerics)

            except KeyError:
                raise Exception("Wrong field structure.")
Пример #14
0
 def __init__(self, fields, objective_id=None):
     if isinstance(fields, dict):
         try:
             self.objective_id = objective_id
             self.uniquify_varnames(fields)
             self.inverted_fields = invert_dictionary(fields)
             self.fields = {}
             self.fields.update(fields)
         except KeyError:
             raise Exception("Wrong field structure.")
Пример #15
0
    def __init__(self,
                 fields,
                 objective_id=None,
                 data_locale=None,
                 missing_tokens=None,
                 terms=False,
                 categories=False,
                 numerics=False):
        if isinstance(fields, dict):
            try:
                self.objective_id = objective_id
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = {}
                self.fields.update(fields)
                if not (hasattr(self, "input_fields") and self.input_fields):
                    self.input_fields = [field_id for field_id, field in \
                        sorted( \
                        [(field_id, field) for field_id,
                         field in self.fields.items()],
                        key=lambda(x): x[1].get("column_number")) \
                        if not self.objective_id or \
                        field_id != self.objective_id]
                self.model_fields = {}
                self.datetime_parents = []
                self.model_fields.update(
                    dict([(field_id, field) for field_id, field in \
                    self.fields.items() if field_id in self.input_fields and \
                    self.fields[field_id].get("preferred", True)]))
                # if any of the model fields is a generated datetime field
                # we need to add the parent datetime field
                self.model_fields = self.add_datetime_parents()
                self.data_locale = data_locale
                self.missing_tokens = missing_tokens
                if self.data_locale is None:
                    self.data_locale = DEFAULT_LOCALE
                if self.missing_tokens is None:
                    self.missing_tokens = DEFAULT_MISSING_TOKENS
                if terms:
                    # adding text and items information to handle terms
                    # expansion
                    self.term_forms = {}
                    self.tag_clouds = {}
                    self.term_analysis = {}
                    self.items = {}
                    self.item_analysis = {}
                if categories:
                    self.categories = {}
                if terms or categories or numerics:
                    self.add_terms(categories, numerics)

            except KeyError:
                raise Exception("Wrong field structure.")
Пример #16
0
 def __init__(self, fields, objective_id=None, data_locale=None, missing_tokens=None):
     if isinstance(fields, dict):
         try:
             self.objective_id = objective_id
             self.uniquify_varnames(fields)
             self.inverted_fields = invert_dictionary(fields)
             self.fields = {}
             self.fields.update(fields)
             self.data_locale = data_locale
             self.missing_tokens = missing_tokens
             if self.data_locale is None:
                 self.data_locale = DEFAULT_LOCALE
             if self.missing_tokens is None:
                 self.missing_tokens = DEFAULT_MISSING_TOKENS
         except KeyError:
             raise Exception("Wrong field structure.")
Пример #17
0
 def __init__(self, fields, objective_id=None, data_locale=None,
              missing_tokens=None):
     if isinstance(fields, dict):
         try:
             self.objective_id = objective_id
             self.uniquify_varnames(fields)
             self.inverted_fields = invert_dictionary(fields)
             self.fields = {}
             self.fields.update(fields)
             self.data_locale = data_locale
             self.missing_tokens = missing_tokens
             if self.data_locale is None:
                 self.data_locale = DEFAULT_LOCALE
             if self.missing_tokens is None:
                 self.missing_tokens = DEFAULT_MISSING_TOKENS
         except KeyError:
             raise Exception("Wrong field structure.")
Пример #18
0
    def create_prediction(self, model_or_ensemble, input_data=None,
                          by_name=True, args=None, wait_time=3, retries=10):
        """Creates a new prediction.

        """
        ensemble_id = None
        model_id = None
        try:
            ensemble_id = get_ensemble_id(model_or_ensemble)
            if ensemble_id is not None:
                if wait_time > 0:
                    count = 0
                    while (not self.ensemble_is_ready(ensemble_id) and
                           count < retries):
                        time.sleep(wait_time)
                        count += 1
                try:
                    ensemble = self.get_ensemble(ensemble_id)
                    model_id = ensemble['object']['models'][0]
                except (KeyError, IndexError), exc:
                    LOGGER.error("The ensemble has no valid model"
                                 " information: %s" % str(exc))
                    model_id = None
        except ValueError:
            model_id = get_model_id(model_or_ensemble)

        if model_id is not None:
            if ensemble_id is None:
                if wait_time > 0:
                    count = 0
                    while (not self.model_is_ready(model_id) and
                           count < retries):
                        time.sleep(wait_time)
                        count += 1

            if input_data is None:
                input_data = {}
            elif by_name:
                fields = self.get_fields(model_id)
                inverted_fields = invert_dictionary(fields)
                wrong_keys = [key for key in input_data.keys() if not key
                              in inverted_fields]
                if wrong_keys:
                    LOGGER.error(("Some input fields are"
                                  " not used in the model: %s") %
                                 ", ".join(wrong_keys))
                input_data = dict(
                    [[inverted_fields[key], value]
                     for key, value in input_data.items()
                     if key in inverted_fields])

            if args is None:
                args = {}
            args.update({
                "input_data": input_data})
            if ensemble_id is None:
                args.update({
                    "model": model_id})
            else:
                args.update({
                    "ensemble": ensemble_id})

            body = json.dumps(args)
            return self._create(self.prediction_url, body)
Пример #19
0
    def __init__(self,
                 fields,
                 objective_id=None,
                 data_locale=None,
                 missing_tokens=None,
                 terms=False,
                 categories=False,
                 numerics=False):
        if isinstance(fields, dict):
            try:
                self.objective_id = objective_id
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = {}
                self.fields.update(fields)
                if not (hasattr(self, "input_fields") and self.input_fields):
                    self.input_fields = [field_id for field_id, field in \
                        sorted(list(self.fields.items()),
                               key=lambda x: x[1].get("column_number")) \
                        if not self.objective_id or \
                        field_id != self.objective_id]
                self.model_fields = {}
                self.datetime_parents = []
                self.model_fields.update(
                    dict([(field_id, field) for field_id, field in \
                    list(self.fields.items()) if field_id in self.input_fields and \
                    self.fields[field_id].get("preferred", True)]))
                # if any of the model fields is a generated datetime field
                # we need to add the parent datetime field
                self.model_fields = self.add_datetime_parents()
                self.data_locale = data_locale
                self.missing_tokens = missing_tokens
                if self.data_locale is None:
                    self.data_locale = DEFAULT_LOCALE
                if self.missing_tokens is None:
                    self.missing_tokens = DEFAULT_MISSING_TOKENS
                if terms:
                    # adding text and items information to handle terms
                    # expansion
                    self.term_forms = {}
                    self.tag_clouds = {}
                    self.term_analysis = {}
                    self.items = {}
                    self.item_analysis = {}
                if categories:
                    self.categories = {}
                if terms or categories or numerics:
                    self.add_terms(categories, numerics)

                if self.objective_id is not None and \
                        hasattr(self, "resource_id") and self.resource_id and \
                        get_resource_type(self.resource_id) != ENSEMBLE_PATH:
                    # Only for models. Ensembles need their own logic
                    self.regression = \
                        (not hasattr(self, "boosting") or not self.boosting) \
                        and self.fields[self.objective_id][ \
                        'optype'] == NUMERIC \
                        or (hasattr(self, "boosting") and self.boosting and \
                        self.boosting.get("objective_class") is None)

            except KeyError:
                raise Exception("Wrong field structure.")
Пример #20
0
    def __init__(self, model, api=None):

        if (isinstance(model, dict) and 'resource' in model and
                model['resource'] is not None):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(error_message(model,
                                              resource_type='model',
                                              method='get'))
            query_string = '%s;%s' % (ONLY_MODEL, EXCLUDE_ROOT)
            model = retrieve_model(api, self.resource_id,
                                   query_string=query_string)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                else:
                    fields = model['model']['fields']
                objective_field = model['objective_fields']
                self.objective_field = extract_objective(objective_field)
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.all_inverted_fields = invert_dictionary(model['model']
                                                             ['fields'])
                self.fields = fields
                self.description = model['description']
                self.field_importance = model['model'].get('importance',
                                                           None)
                if self.field_importance:
                    self.field_importance = [element for element
                                             in self.field_importance
                                             if element[0] in fields]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #21
0
    def __init__(self, model, api=None):

        if check_model_structure(model):
            self.resource_id = model['resource']
        else:
            # If only the model id is provided, the short version of the model
            # resource is used to build a basic summary of the model
            if api is None:
                api = BigML()
            self.resource_id = get_model_id(model)
            if self.resource_id is None:
                raise Exception(
                    api.error_message(model,
                                      resource_type='model',
                                      method='get'))
            query_string = ONLY_MODEL
            model = retrieve_model(api,
                                   self.resource_id,
                                   query_string=query_string)
            # Stored copies of the model structure might lack some necessary
            # keys
            if not check_model_structure(model):
                model = api.get_model(self.resource_id,
                                      query_string=query_string)

        if ('object' in model and isinstance(model['object'], dict)):
            model = model['object']

        if ('model' in model and isinstance(model['model'], dict)):
            status = get_status(model)
            if ('code' in status and status['code'] == FINISHED):
                if 'model_fields' in model['model']:
                    fields = model['model']['model_fields']
                    # pagination or exclusion might cause a field not to
                    # be in available fields dict
                    if not all(key in model['model']['fields']
                               for key in fields.keys()):
                        raise Exception("Some fields are missing"
                                        " to generate a local model."
                                        " Please, provide a model with"
                                        " the complete list of fields.")
                    for field in fields:
                        field_info = model['model']['fields'][field]
                        if 'summary' in field_info:
                            fields[field]['summary'] = field_info['summary']
                        fields[field]['name'] = field_info['name']
                objective_field = model['objective_fields']
                self.objective_field = extract_objective(objective_field)
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = fields
                self.description = model['description']
                self.field_importance = model['model'].get('importance', None)
                if self.field_importance:
                    self.field_importance = [
                        element for element in self.field_importance
                        if element[0] in fields
                    ]
                self.locale = model.get('locale', DEFAULT_LOCALE)

            else:
                raise Exception("The model isn't finished yet")
        else:
            raise Exception("Cannot create the BaseModel instance. Could not"
                            " find the 'model' key in the resource:\n\n%s" %
                            model)
Пример #22
0
    def create_prediction(self, model_or_ensemble, input_data=None,
                          by_name=True, args=None, wait_time=3, retries=10):
        """Creates a new prediction.

        """
        ensemble_id = None
        model_id = None
        try:
            ensemble_id = get_ensemble_id(model_or_ensemble)
            if ensemble_id is not None:
                if wait_time > 0:
                    count = 0
                    while (not self.ensemble_is_ready(ensemble_id) and
                           count < retries):
                        time.sleep(wait_time)
                        count += 1
                try:
                    ensemble = self.get_ensemble(ensemble_id)
                    model_id = ensemble['object']['models'][0]
                except (KeyError, IndexError), exc:
                    LOGGER.error("The ensemble has no valid model"
                                 " information: %s" % str(exc))
                    model_id = None
        except ValueError:
            model_id = get_model_id(model_or_ensemble)

        if model_id is not None:
            if ensemble_id is None:
                if wait_time > 0:
                    count = 0
                    while (not self.model_is_ready(model_id) and
                           count < retries):
                        time.sleep(wait_time)
                        count += 1

            if input_data is None:
                input_data = {}
            elif by_name:
                fields = self.get_fields(model_id)
                inverted_fields = invert_dictionary(fields)
                wrong_keys = [key for key in input_data.keys() if not key
                              in inverted_fields]
                if wrong_keys:
                    LOGGER.error(("Some input fields are"
                                  " not used in the model: %s") %
                                 ", ".join(wrong_keys))
                input_data = dict(
                    [[inverted_fields[key], value]
                     for key, value in input_data.items()
                     if key in inverted_fields])

            if args is None:
                args = {}
            args.update({
                "input_data": input_data})
            if ensemble_id is None:
                args.update({
                    "model": model_id})
            else:
                args.update({
                    "ensemble": ensemble_id})

            body = json.dumps(args)
            return self._create(self.prediction_url, body)