Exemplo n.º 1
0
def get_input_fields(resource, referrer=None):
    """New list of input fields

    """
    if referrer is None:
        referrer = {}
    input_fields_ids = resource.get('input_fields', [])
    if referrer:
        referrer_input_fields = [[]]
        # compare fields by name
        resource_fields = Fields({
            'resource': resource['resource'],
            'object': resource
        })
        referrer_fields = Fields({
            'resource': referrer['resource'],
            'object': referrer
        })
        input_fields = [
            resource_fields.field_name(field_id)
            for field_id in input_fields_ids
        ]
        input_fields = sorted(input_fields)
        referrer_type = get_resource_type(referrer)
        if referrer_type == 'dataset':
            referrer_fields = Fields(referrer_fields.preferred_fields())
            referrer_fields_names = sorted( \
                [field['name'] for _, field in referrer_fields.fields.items()])
        else:
            referrer_fields_names = sorted( \
                referrer_fields.fields_by_name.keys())
        # check referrer input fields to see if they are equal
        referrer_input_fields.append(referrer_fields_names)
        # check whether the resource has an objective field not included in
        # the input fields list
        resource_type = get_resource_type(resource)
        if resource_type == 'model':
            objective_id = resource.get('objective_field')
            try:
                objective_id = objective_id.get('id')
            except AttributeError:
                pass
            referrer_objective = resource_fields.field_name(objective_id)
            referrer_input_fields.append([
                name for name in referrer_fields_names
                if name != referrer_objective
            ])
        if input_fields in referrer_input_fields:
            return []
    return referrer_fields.fields.keys()
Exemplo n.º 2
0
    def create_execution(self,
                         origin_resource,
                         args=None,
                         wait_time=3,
                         retries=10):
        """Creates an execution from a `script` or a list of `scripts`.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)

        if (isinstance(origin_resource, basestring)
                or isinstance(origin_resource, dict)):
            # single script
            scripts = [origin_resource]
        else:
            scripts = origin_resource
        try:
            script_ids = [get_script_id(script) for script in scripts]
        except TypeError:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if all([
                get_resource_type(script_id) == SCRIPT_PATH
                for script_id in script_ids
        ]):
            for script in scripts:
                check_resource(script,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
        else:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if len(scripts) > 1:
            create_args.update({"scripts": script_ids})
        else:
            create_args.update({"script": script_ids[0]})

        body = json.dumps(create_args)
        return self._create(self.execution_url, body)
Exemplo n.º 3
0
    def create_projection(self, pca, input_data=None,
                          args=None, wait_time=3, retries=10):
        """Creates a new projection.
           The pca parameter can be a pca resource or ID

        """
        pca_id = None

        resource_type = get_resource_type(pca)
        if resource_type != PCA_PATH:
            raise Exception("A PCA resource id is needed"
                            " to create a projection. %s found." %
                            resource_type)

        pca_id = get_resource_id(pca)
        if pca_id is not None:
            check_resource(pca_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if pca_id is not None:
            create_args.update({
                "pca": pca_id})

        body = json.dumps(create_args)
        return self._create(self.projection_url, body,
                            verify=self.verify)
Exemplo n.º 4
0
def get_input_fields(resource, referrer=None):
    """New list of input fields

    """
    if referrer is None:
        referrer = {}
    input_fields_ids = resource.get('input_fields', [])
    if referrer:
        referrer_fields = Fields(
            {'resource': referrer['resource'], 'object': referrer})
        referrer_fields_ids = referrer_fields.fields.keys()
        # case where objective field is not in input fields
        # check whether the resource has an objective field not included in
        # the input fields list
        resource_type = get_resource_type(resource)
        if resource_type == 'model':
            objective_id = resource.get('objective_field')
            try:
                objective_id = objective_id.get('id')
            except AttributeError:
                pass
            if objective_id not in input_fields_ids:
                input_fields_ids.append(objective_id)
        if input_fields_ids.sort() == referrer_fields_ids.sort():
            return []
    return input_fields_ids
Exemplo n.º 5
0
def check_model_fields(model):
    """Checks the model structure to see whether it contains the required
    fields information

    """
    inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model')
    if check_model_structure(model, inner_key):
        model = model.get('object', model)
        fields = model.get("fields", model.get(inner_key, {}).get('fields'))
        input_fields = model.get("input_fields")
        # models only need model_fields to work. The rest of resources will
        # need all fields to work
        model_fields = model.get(inner_key, {}).get( \
            'model_fields', {}).keys()
        # fusions don't have input fields
        if input_fields is None and inner_key != "fusion":
            return False
        if not model_fields:
            fields_meta = model.get('fields_meta', \
                model.get(inner_key, {}).get('fields_meta', {}))
            try:
                return fields_meta['count'] == fields_meta['total']
            except KeyError:
                # stored old models will not have the fields_meta info, so
                # we return True to avoid failing in this case
                return True
        else:
            if fields is None:
                return False
            return all([field_id in fields.keys() \
                for field_id in model_fields])
    return False
Exemplo n.º 6
0
def check_model_fields(model):
    """Checks the model structure to see whether it contains the required
    fields information

    """
    inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model')
    if check_model_structure(model, inner_key):
        model = model.get('object', model)
        fields = model.get("fields", model.get(inner_key, {}).get('fields'))
        # models only need model_fields to work. The rest of resources will
        # need all fields to work
        model_fields = model.get(inner_key, {}).get( \
            'model_fields', {}).keys()
        if not model_fields:
            fields_meta = model.get('fields_meta', \
                model.get(inner_key, {}).get('fields_meta', {}))
            try:
                return fields_meta['count'] == fields_meta['total']
            except KeyError:
                # stored old models will not have the fields_meta info, so
                # we return True to avoid failing in this case
                return True
        else:
            if fields is None:
                return False
            return all([field_id in fields.keys() \
                for field_id in model_fields])
    return False
Exemplo n.º 7
0
    def reify_python(self, alias=None):
        """REST call command line in python. See ``reify`` method.

        """

        resource_type = get_resource_type(self.resource_id)
        resource_name = resource_alias(self.resource_id, alias)
        resource_method_suffix = RENAMED_RESOURCES.get(
            resource_type, resource_type)
        origin_names = [resource_alias(resource_id, alias) for resource_id
                        in self.origins]

        arguments = ", ".join(origin_names)
        if self.suffix:
            arguments = "%s%s" % (arguments, self.suffix)
        if self.input_data:
            arguments = "%s, \\\n%s%s" % ( \
                arguments, INDENT,
                pprint.pformat(self.input_data).replace("\n", "\n%s" % INDENT))
        if self.args:
            sort_lists(self.args)
            arguments = "%s, \\\n%s%s" % (arguments, \
                INDENT, \
                pprint.pformat(self.args).replace( \
                    "\n", "\n%s" % INDENT))
        out = "%s = api.%s_%s(%s)\napi.ok(%s)\n\n" % (
            resource_name,
            self.action,
            resource_method_suffix,
            arguments,
            resource_name)
        return out
Exemplo n.º 8
0
    def create_centroid(self, cluster, input_data=None,
                        args=None, wait_time=3, retries=10):
        """Creates a new centroid.

        """
        cluster_id = None
        resource_type = get_resource_type(cluster)
        if resource_type == CLUSTER_PATH:
            cluster_id = get_cluster_id(cluster)
            check_resource(cluster_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A cluster id is needed to create a"
                            " centroid. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        create_args.update({
            "cluster": cluster_id})

        body = json.dumps(create_args)
        return self._create(self.centroid_url, body,
                            verify=self.verify)
Exemplo n.º 9
0
    def create_forecast(self, time_series, input_data=None,
                        args=None, wait_time=3, retries=10):
        """Creates a new forecast.

        """
        time_series_id = get_time_series_id(time_series)
        resource_type = get_resource_type(time_series_id)
        if resource_type == TIME_SERIES_PATH and time_series_id is not None:
            check_resource(time_series_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A time series model id is needed to create a"
                            " forecast. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if time_series_id is not None:
            create_args.update({
                "timeseries": time_series_id})

        body = json.dumps(create_args)
        return self._create(self.forecast_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 10
0
    def create_association_set(self, association, input_data=None,
                               args=None, wait_time=3, retries=10):
        """Creates a new association set.

        """
        association_id = None
        resource_type = get_resource_type(association)
        if resource_type == ASSOCIATION_PATH:
            association_id = get_association_id(association)
            check_resource(association_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A association id is needed to create an"
                            " association set. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        create_args.update({
            "association": association_id})

        body = json.dumps(create_args)
        return self._create(self.association_set_url, body,
                            verify=self.verify)
Exemplo n.º 11
0
    def reify_python(self, alias=None):
        """REST call command line in python. See ``reify`` method.

        """

        resource_type = get_resource_type(self.resource_id)
        resource_name = resource_alias(self.resource_id, alias)
        resource_method_suffix = RENAMED_RESOURCES.get(resource_type,
                                                       resource_type)
        origin_names = [
            resource_alias(resource_id, alias) for resource_id in self.origins
        ]

        arguments = ", ".join(origin_names)
        if self.suffix:
            arguments = "%s%s" % (arguments, self.suffix)
        if self.input_data:
            arguments = "%s, \\\n%s%s" % ( \
                arguments, INDENT,
                pprint.pformat(self.input_data).replace("\n", "\n%s" % INDENT))
        if self.args:
            sort_lists(self.args)
            arguments = "%s, \\\n%s%s" % (arguments, \
                INDENT, \
                pprint.pformat(self.args).replace( \
                    "\n", "\n%s" % INDENT))
        out = "%s = api.%s_%s(%s)\napi.ok(%s)\n\n" % (
            resource_name, self.action, resource_method_suffix, arguments,
            resource_name)
        return out
Exemplo n.º 12
0
    def create_anomaly_score(self, anomaly, input_data=None,
                             args=None, wait_time=3, retries=10):
        """Creates a new anomaly score.

        """
        anomaly_id = None
        resource_type = get_resource_type(anomaly)
        if resource_type == ANOMALY_PATH:
            anomaly_id = get_anomaly_id(anomaly)
            check_resource(anomaly_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("An anomaly detector id is needed to create an"
                            " anomaly score. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        create_args.update({
            "anomaly": anomaly_id})

        body = json.dumps(create_args)
        return self._create(self.anomaly_score_url, body,
                            verify=self.verify)
Exemplo n.º 13
0
    def create_correlation(self, dataset, args=None, wait_time=3, retries=10):
        """Creates a correlation from a `dataset`.

        """
        dataset_id = None
        resource_type = get_resource_type(dataset)
        if resource_type == DATASET_PATH:
            dataset_id = get_dataset_id(dataset)
            check_resource(dataset_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            raise Exception("A dataset id is needed to create a"
                            " correlation. %s found." % resource_type)

        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"dataset": dataset_id})

        body = json.dumps(create_args)
        return self._create(self.correlation_url, body)
Exemplo n.º 14
0
    def create_anomaly_score(self,
                             anomaly,
                             input_data=None,
                             args=None,
                             wait_time=3,
                             retries=10):
        """Creates a new anomaly score.

        """
        anomaly_id = None
        resource_type = get_resource_type(anomaly)
        if resource_type == ANOMALY_PATH:
            anomaly_id = get_anomaly_id(anomaly)
            check_resource(anomaly_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            raise Exception("An anomaly detector id is needed to create an"
                            " anomaly score. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"input_data": input_data})
        create_args.update({"anomaly": anomaly_id})

        body = json.dumps(create_args)
        return self._create(self.anomaly_score_url, body, verify=self.verify)
Exemplo n.º 15
0
    def create_forecast(self, time_series, input_data=None,
                        args=None, wait_time=3, retries=10):
        """Creates a new forecast.

        """
        time_series_id = get_time_series_id(time_series)
        resource_type = get_resource_type(time_series_id)
        if resource_type == TIME_SERIES_PATH and time_series_id is not None:
            check_resource(time_series_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A time series model id is needed to create a"
                            " forecast. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if time_series_id is not None:
            create_args.update({
                "timeseries": time_series_id})

        body = json.dumps(create_args)
        return self._create(self.forecast_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 16
0
    def create_topic_distribution(self, topic_model, input_data=None,
                                  args=None, wait_time=3, retries=10):
        """Creates a new topic distribution.

        """
        topic_model_id = get_topic_model_id(topic_model)
        if topic_model_id is not None:
            check_resource(topic_model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            resource_type = get_resource_type(topic_model)
            raise Exception("A topic model id is needed to create a"
                            " topic distribution. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data,
            "topicmodel": topic_model_id})

        body = json.dumps(create_args)
        return self._create(self.topic_distribution_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 17
0
    def retrieve_resource(self, resource_id, query_string=None,
                          check_local_fn=None, retries=None):
        """ Retrieves resource info either from the local repo or
            from the remote server

        """
        if query_string is None:
            query_string = ''
        if self.storage is not None:
            try:
                stored_resource = os.path.join(self.storage,
                                              resource_id.replace("/", "_"))
                with open(stored_resource) as resource_file:
                    resource = json.loads(resource_file.read())
                # we check that the stored resource has the information
                # needed (for instance, input_fields for predicting)
                if check_local_fn is None or check_local_fn(resource):
                    return resource
            except ValueError:
                raise ValueError("The file %s contains no JSON")
            except IOError:
                pass
        if self.auth == '?username=;api_key=;':
            raise ValueError("The credentials information is missing. This"
                             " information is needed to download resource %s"
                             " for the first time and store it locally for further"
                             " use. Please export BIGML_USERNAME"
                             " and BIGML_API_KEY."  % resource_id)
        api_getter = self.getters[get_resource_type(resource_id)]
        resource = check_resource(resource_id, api_getter, query_string,
                                  retries=retries)
        return resource
Exemplo n.º 18
0
def get_input_fields(resource, referrer=None):
    """New list of input fields

    """
    if referrer is None:
        referrer = {}
    input_fields_ids = resource.get('input_fields', [])
    if referrer:
        referrer_fields = Fields({
            'resource': referrer['resource'],
            'object': referrer
        })
        referrer_fields_ids = referrer_fields.fields.keys()
        # case where objective field is not in input fields
        # check whether the resource has an objective field not included in
        # the input fields list
        resource_type = get_resource_type(resource)
        if resource_type == 'model':
            objective_id = resource.get('objective_field')
            try:
                objective_id = objective_id.get('id')
            except AttributeError:
                pass
            if objective_id not in input_fields_ids:
                input_fields_ids.append(objective_id)
        if input_fields_ids.sort() == referrer_fields_ids.sort():
            return []
    return input_fields_ids
Exemplo n.º 19
0
    def get_fields(self, resource):
        """Retrieve fields used by a resource.

        Returns a dictionary with the fields that uses
        the resource keyed by Id.

        """

        if isinstance(resource, dict) and 'resource' in resource:
            resource_id = resource['resource']
        elif isinstance(resource, basestring) and get_resource_type(resource) \
                in RESOURCES_WITH_FIELDS:
            resource_id = resource
            resource = self.retrieve_resource(resource,
                                              query_string=ALL_FIELDS)
        else:
            LOGGER.error("Wrong resource id")
            return
        # Tries to extract fields information from resource dict. If it fails,
        # a get remote call is used to retrieve the resource by id.
        fields = None
        try:
            fields = get_fields(resource)
        except KeyError:
            resource = self._get("%s%s" % (self.url, resource_id))
            fields = get_fields(resource)

        return fields
Exemplo n.º 20
0
def get_fields_changes(resource,
                       referrer=None,
                       updatable_attrs=DEFAULT_UPDATABLE):
    """Changed field attributes

    """
    if referrer is None:
        referrer = {}
    fields_attributes = {}

    resource_fields = Fields({
        'resource': resource['resource'],
        'object': resource
    }).fields
    resource_type = get_resource_type(resource)
    # for sources, extract all the updatable attributes
    if get_resource_type(resource) == 'source':
        updatable_attrs = SOURCE_UPDATABLE
        for field_id in resource_fields.keys():
            field_opts = {}
            field = resource_fields[field_id]
            for attribute in updatable_attrs:
                if field.get(attribute):
                    field_opts.update({attribute: field[attribute]})
            if field_opts != {}:
                fields_attributes.update({field_id: field_opts})
        return fields_attributes
    # for the rest of resources, check which attributes changed
    if referrer:
        referrer_fields = Fields({
            'resource': referrer['resource'],
            'object': referrer
        }).fields
        for field_id in resource_fields.keys():
            field_opts = {}
            if not field_id in referrer_fields.keys():
                continue
            field = resource_fields[field_id]

            for attribute in updatable_attrs:
                ref_values = ["", referrer_fields[field_id].get(attribute, "")]
                if not field.get(attribute, "") in ref_values:
                    field_opts.update({attribute: field[attribute]})

            if field_opts != {}:
                fields_attributes.update({field_id: field_opts})
    return fields_attributes
Exemplo n.º 21
0
def non_default_opts(resource, opts, call="create"):
    """Stores the options that are not constant defaults

    """
    resource_type = get_resource_type(resource)
    defaults = DEFAULTS[resource_type].get(call, {})
    for attribute, default_value in defaults.items():
        opts[call].update(default_setting(resource, attribute, *default_value))
Exemplo n.º 22
0
def non_default_opts(resource, opts, call="create"):
    """Stores the options that are not constant defaults

    """
    resource_type = get_resource_type(resource)
    defaults = DEFAULTS[resource_type].get(call, {})
    for attribute, default_value in defaults.items():
        opts[call].update(default_setting(resource, attribute, *default_value))
Exemplo n.º 23
0
    def create_prediction(self, model, input_data=None,
                          args=None, wait_time=3, retries=10, by_name=True):
        """Creates a new prediction.
           The model parameter can be:
            - a simple tree model
            - a simple logistic regression model
            - an ensemble
           The by_name argument is now deprecated. It will be removed.

        """
        logistic_regression_id = None
        ensemble_id = None
        model_id = None

        resource_type = get_resource_type(model)
        if resource_type == ENSEMBLE_PATH:
            ensemble_id = get_ensemble_id(model)
            if ensemble_id is not None:
                check_resource(ensemble_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time, retries=retries,
                               raise_on_error=True, api=self)
        elif resource_type == MODEL_PATH:
            model_id = get_model_id(model)
            check_resource(model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        elif resource_type == LOGISTIC_REGRESSION_PATH:
            logistic_regression_id = get_logistic_regression_id(model)
            check_resource(logistic_regression_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A model or ensemble id is needed to create a"
                            " prediction. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if model_id is not None:
            create_args.update({
                "model": model_id})
        elif ensemble_id is not None:
            create_args.update({
                "ensemble": ensemble_id})
        elif logistic_regression_id is not None:
            create_args.update({
                "logisticregression": logistic_regression_id})

        body = json.dumps(create_args)
        return self._create(self.prediction_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 24
0
    def create_prediction(self, model, input_data=None,
                          args=None, wait_time=3, retries=10, by_name=True):
        """Creates a new prediction.
           The model parameter can be:
            - a simple tree model
            - a simple logistic regression model
            - an ensemble
           The by_name argument is now deprecated. It will be removed.

        """
        logistic_regression_id = None
        ensemble_id = None
        model_id = None

        resource_type = get_resource_type(model)
        if resource_type == ENSEMBLE_PATH:
            ensemble_id = get_ensemble_id(model)
            if ensemble_id is not None:
                check_resource(ensemble_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time, retries=retries,
                               raise_on_error=True, api=self)
        elif resource_type == MODEL_PATH:
            model_id = get_model_id(model)
            check_resource(model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        elif resource_type == LOGISTIC_REGRESSION_PATH:
            logistic_regression_id = get_logistic_regression_id(model)
            check_resource(logistic_regression_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A model or ensemble id is needed to create a"
                            " prediction. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if model_id is not None:
            create_args.update({
                "model": model_id})
        elif ensemble_id is not None:
            create_args.update({
                "ensemble": ensemble_id})
        elif logistic_regression_id is not None:
            create_args.update({
                "logisticregression": logistic_regression_id})

        body = json.dumps(create_args)
        return self._create(self.prediction_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 25
0
def get_input_fields(resource, referrer=None):
    """New list of input fields

    """
    if referrer is None:
        referrer = {}
    input_fields_ids = resource.get('input_fields', [])
    if referrer:
        referrer_input_fields = [[]]
        # compare fields by name
        resource_fields = Fields(
            {'resource': resource['resource'], 'object': resource})
        referrer_fields = Fields(
            {'resource': referrer['resource'], 'object': referrer})
        input_fields = [resource_fields.field_name(field_id) for field_id in
                        input_fields_ids]
        input_fields = sorted(input_fields)
        referrer_type = get_resource_type(referrer)
        if referrer_type == 'dataset':
            referrer_fields = Fields(referrer_fields.preferred_fields())
            referrer_fields_names = sorted( \
                [field['name'] for _, field in referrer_fields.fields.items()])
        else:
            referrer_fields_names = sorted( \
                referrer_fields.fields_by_name.keys())
        # check referrer input fields to see if they are equal
        referrer_input_fields.append(referrer_fields_names)
        # check whether the resource has an objective field not included in
        # the input fields list
        resource_type = get_resource_type(resource)
        if resource_type == 'model':
            objective_id = resource.get('objective_field')
            try:
                objective_id = objective_id.get('id')
            except AttributeError:
                pass
            referrer_objective = resource_fields.field_name(
                objective_id)
            referrer_input_fields.append([name for name in
                                          referrer_fields_names
                                          if name != referrer_objective])
        if input_fields in referrer_input_fields:
            return []
    return referrer_fields.fields.keys()
Exemplo n.º 26
0
    def create_execution(self, origin_resource, args=None,
                         wait_time=3, retries=10):
        """Creates an execution from a `script` or a list of `scripts`.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)

        if (isinstance(origin_resource, basestring) or
                isinstance(origin_resource, dict)):
            # single script
            scripts = [origin_resource]
        else:
            scripts = origin_resource
        try:
            script_ids = [get_script_id(script) for script in scripts]
        except TypeError:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if all([get_resource_type(script_id) == SCRIPT_PATH for
                script_id in script_ids]):
            for script in scripts:
                check_resource(script,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time, retries=retries,
                               raise_on_error=True, api=self)
        else:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if len(scripts) > 1:
            create_args.update({
                "scripts": script_ids})
        else:
            create_args.update({
                "script": script_ids[0]})

        body = json.dumps(create_args)
        return self._create(self.execution_url, body)
Exemplo n.º 27
0
    def create_model(self, origin_resource, args=None, wait_time=3, retries=10):
        """Creates a model from an origin_resource.

        Uses a remote resource to create a new model using the
        arguments in `args`.
        The allowed remote resources can be:
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resource, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)
        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource, args=create_args, wait_time=wait_time,
                retries=retries)
        else:
            resource_type = get_resource_type(origin_resource)
            # model from cluster and centroid
            if resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True, api=self)
                if 'centroid' not in create_args:
                    try:
                        centroid = cluster['object'][
                            'cluster_models'].keys()[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the model. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a model from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource, args=create_args, wait_time=wait_time,
                    retries=retries)
            else:
                raise Exception("A dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.model_url, body)
Exemplo n.º 28
0
    def create_model(self, origin_resource, args=None, wait_time=3, retries=10):
        """Creates a model from an origin_resource.

        Uses a remote resource to create a new model using the
        arguments in `args`.
        The allowed remote resources can be:
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resource, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)
        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource, args=create_args, wait_time=wait_time,
                retries=retries)
        else:
            resource_type = get_resource_type(origin_resource)
            # model from cluster and centroid
            if resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True, api=self)
                if not 'centroid' in create_args:
                    try:
                        centroid = cluster['object'][
                            'cluster_models'].keys()[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the model. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a model from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource, args=create_args, wait_time=wait_time,
                    retries=retries)
            else:
                raise Exception("A dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.model_url, body)
Exemplo n.º 29
0
def get_fields_changes(resource, referrer=None,
                       updatable_attrs=DEFAULT_UPDATABLE):
    """Changed field attributes

    """
    if referrer is None:
        referrer = {}
    fields_attributes = {}

    resource_fields = Fields(
        {'resource': resource['resource'], 'object': resource}).fields
    resource_type = get_resource_type(resource)
    # for sources, extract all the updatable attributes
    if get_resource_type(resource) == 'source':
        updatable_attrs = SOURCE_UPDATABLE
        for field_id in resource_fields.keys():
            field_opts = {}
            field = resource_fields[field_id]
            for attribute in updatable_attrs:
                if field.get(attribute):
                    field_opts.update({attribute: field[attribute]})
            if field_opts != {}:
                fields_attributes.update({field_id: field_opts})
        return fields_attributes
    # for the rest of resources, check which attributes changed
    if referrer:
        referrer_fields = Fields(
            {'resource': referrer['resource'], 'object': referrer}).fields
        for field_id in resource_fields.keys():
            field_opts = {}
            if not field_id in referrer_fields.keys():
                continue
            field = resource_fields[field_id]

            for attribute in updatable_attrs:
                ref_values = ["", referrer_fields[field_id].get(attribute, "")]
                if not field.get(attribute, "") in ref_values:
                    field_opts.update({attribute: field[attribute]})

            if field_opts != {}:
                fields_attributes.update({field_id: field_opts})
    return fields_attributes
Exemplo n.º 30
0
def fields_map_options(resource, referrer1, referrer2, opts, call="create"):
    """Stores the fields_map option if needed

    """
    # model to dataset mapping
    resource_type = get_resource_type(referrer1['resource'])
    if resource_type == 'model':
        fields = referrer1['model']['model_fields']
    else:
        fields = referrer2['fields'].keys()
    default_map = dict(zip(fields, fields))

    opts[call].update(default_setting(resource, 'fields_map', default_map))
Exemplo n.º 31
0
    def create_script(self,
                      source_code=None,
                      args=None,
                      wait_time=3,
                      retries=10):
        """Creates a whizzml script from its source code. The `source_code`
           parameter can be a:
            {script ID}: the ID for an existing whizzml script
            {path}: the path to a file containing the source code
            {string} : the string containing the source code for the script

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if source_code is None:
            raise Exception('A valid code string'
                            ' or a script id must be provided.')
        resource_type = get_resource_type(source_code)
        if resource_type == SCRIPT_PATH:
            script_id = get_script_id(source_code)
            if script_id:
                check_resource(script_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
                create_args.update({"origin": script_id})
        elif isinstance(source_code, basestring):
            if is_url(source_code):
                script_args = retrieve_script_args(source_code)
                source_code = script_args.get("source_code")
                create_args.update(json.loads(script_args.get("json")))
            else:
                try:
                    if os.path.exists(source_code):
                        with open(source_code) as code_file:
                            source_code = code_file.read()
                except IOError:
                    raise IOError("Could not open the source code file %s." %
                                  source_code)
            create_args.update({"source_code": source_code})
        else:
            raise Exception("A script id or a valid source code"
                            " is needed to create a"
                            " script. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.script_url, body)
Exemplo n.º 32
0
def fields_map_options(resource, referrer1, referrer2, opts, call="create"):
    """Stores the fields_map option if needed

    """
    # model to dataset mapping
    resource_type = get_resource_type(referrer1['resource'])
    if resource_type == 'model':
        fields = referrer1['model']['model_fields']
    else:
        fields = referrer2['fields'].keys()
    default_map = dict(zip(fields, fields))

    opts[call].update(
        default_setting(resource, 'fields_map', default_map))
Exemplo n.º 33
0
def get_resource_alias(resource_id, counts, alias):
    """Creates a human-friendly alias for the resource

    """
    if alias.get(resource_id):
        return alias.get(resource_id)
    else:
        resource_type = get_resource_type(resource_id)
        if resource_type in counts:
            counts[resource_type] += 1
        else:
            counts[resource_type] = 1
        new_alias = "%s%s" % (resource_type, counts[resource_type])
        alias[resource_id] = new_alias
        return new_alias
Exemplo n.º 34
0
def get_resource_alias(resource_id, counts, alias):
    """Creates a human-friendly alias for the resource

    """
    if alias.get(resource_id):
        return alias.get(resource_id)
    else:
        resource_type = get_resource_type(resource_id)
        if resource_type in counts:
            counts[resource_type] += 1
        else:
            counts[resource_type] = 1
        new_alias = "%s%s" % (resource_type, counts[resource_type])
        alias[resource_id] = new_alias
        return new_alias
Exemplo n.º 35
0
    def reify_resource(self, resource_id):
        """Redirects to the reify method according to the resource type

        """
        # first check if this is a valid id
        resource_id = get_resource_id(resource_id)

        if resource_id is not None:
            resource_type = get_resource_type(resource_id)

            reify_handler = getattr(self, 'reify_%s' % resource_type)
            message = "Analyzing %s.\n" % resource_id
            self.logger(message)
            reify_handler(resource_id)
            if self.delete:
                self.delete_stored_resource(resource_id)
Exemplo n.º 36
0
    def reify_resource(self, resource_id):
        """Redirects to the reify method according to the resource type

        """
        # first check if this is a valid id
        resource_id = get_resource_id(resource_id)

        if resource_id is not None:
            resource_type = get_resource_type(resource_id)

            reify_handler = getattr(self, 'reify_%s' % resource_type)
            message = "Analyzing %s.\n" % resource_id
            self.logger(message)
            reify_handler(resource_id)
            if self.delete:
                self.delete_stored_resource(resource_id)
Exemplo n.º 37
0
    def create_prediction(self,
                          model,
                          input_data=None,
                          args=None,
                          wait_time=3,
                          retries=10):
        """Creates a new prediction.
           The model parameter can be:
            - a simple tree model
            - a simple logistic regression model
            - an ensemble
            - a deepnet
            . a linear regression
            - a fusion
           Note that the old `by_name` argument has been deprecated.

        """
        model_id = None

        resource_type = get_resource_type(model)
        if resource_type not in SUPERVISED_PATHS:
            raise Exception("A supervised model resource id is needed"
                            " to create a prediction. %s found." %
                            resource_type)

        model_id = get_resource_id(model)
        if model_id is not None:
            check_resource(model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"input_data": input_data})
        if model_id is not None:
            create_args.update({"model": model_id})

        body = json.dumps(create_args)
        return self._create(self.prediction_url,
                            body,
                            verify=self.verify_prediction)
Exemplo n.º 38
0
    def create_library(self,
                       source_code=None,
                       args=None,
                       wait_time=3,
                       retries=10):
        """Creates a whizzml library from its source code. The `source_code`
           parameter can be a:
            {library ID}: the ID for an existing whizzml library
            {path}: the path to a file containing the source code
            {string} : the string containing the source code for the library

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if source_code is None:
            raise Exception('A valid code string'
                            ' or a library id must be provided.')
        resource_type = get_resource_type(source_code)
        if resource_type == LIBRARY_PATH:
            library_id = get_library_id(source_code)
            if library_id:
                check_resource(library_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
                create_args.update({"origin": library_id})
        elif isinstance(source_code, basestring):
            try:
                if os.path.exists(source_code):
                    with open(source_code) as code_file:
                        source_code = code_file.read()
            except IOError:
                raise IOError("Could not open the source code file %s." %
                              source_code)
            create_args.update({"source_code": source_code})
        else:
            raise Exception("A library id or a valid source code"
                            " is needed to create a"
                            " library. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.library_url, body)
Exemplo n.º 39
0
    def reify_python(self, alias=None):
        """REST call command line in python. See ``reify`` method.

        """

        def resource_alias(resource_id):
            """Returns the alias if found

            """
            if isinstance(resource_id, basestring):
                return alias.get(resource_id, '"%s"' % resource_id)
            elif isinstance(resource_id, list):
                alias_names = []
                for resource_id_id in resource_id:
                    alias_names.append(
                        alias.get(resource_id_id, '"%s"' % resource_id_id))
                return repr(alias_names)

        resource_type = get_resource_type(self.resource_id)
        resource_name = resource_alias(self.resource_id)
        resource_method_suffix = RENAMED_RESOURCES.get(
            resource_type, resource_type)
        origin_names = [resource_alias(resource_id) for resource_id
                        in self.origins]

        arguments = ", ".join(origin_names)
        if self.suffix:
            arguments = "%s%s" % (arguments, self.suffix)
        if self.input_data:
            arguments = "%s, \\\n%s%s" % ( \
                arguments, INDENT,
                pprint.pformat(self.input_data).replace("\n", "\n%s" % INDENT))
        if self.args:
            sort_lists(self.args)
            arguments = "%s, \\\n%s%s" % (arguments, \
                INDENT, \
                pprint.pformat(self.args).replace( \
                    "\n", "\n%s" % INDENT))
        out = "%s = api.%s_%s(%s)\napi.ok(%s)\n\n" % (
            resource_name,
            self.action,
            resource_method_suffix,
            arguments,
            resource_name)
        return out
Exemplo n.º 40
0
    def reify_python(self, alias=None):
        """REST call command line in python. See ``reify`` method.

        """

        def resource_alias(resource_id):
            """Returns the alias if found

            """
            if isinstance(resource_id, basestring):
                return alias.get(resource_id, '"%s"' % resource_id)
            elif isinstance(resource_id, list):
                alias_names = []
                for resource_id_id in resource_id:
                    alias_names.append(
                        alias.get(resource_id_id, '"%s"' % resource_id_id))
                return repr(alias_names)

        resource_type = get_resource_type(self.resource_id)
        resource_name = resource_alias(self.resource_id)
        resource_method_suffix = RENAMED_RESOURCES.get(
            resource_type, resource_type)
        origin_names = [resource_alias(resource_id) for resource_id
                        in self.origins]

        arguments = ", ".join(origin_names)
        if self.suffix:
            arguments = "%s%s" % (arguments, self.suffix)
        if self.input_data:
            arguments = "%s, \\\n%s%s" % ( \
                arguments, INDENT,
                pprint.pformat(self.input_data).replace("\n", "\n%s" % INDENT))
        if self.args:
            sort_lists(self.args)
            arguments = "%s, \\\n%s%s" % (arguments, \
                INDENT, \
                pprint.pformat(self.args).replace( \
                    "\n", "\n%s" % INDENT))
        out = "%s = api.%s_%s(%s)\napi.ok(%s)\n\n" % (
            resource_name,
            self.action,
            resource_method_suffix,
            arguments,
            resource_name)
        return out
Exemplo n.º 41
0
    def create_library(self, source_code=None, args=None,
                       wait_time=3, retries=10):
        """Creates a whizzml library from its source code. The `source_code`
           parameter can be a:
            {library ID}: the ID for an existing whizzml library
            {path}: the path to a file containing the source code
            {string} : the string containing the source code for the library

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if source_code is None:
            raise Exception('A valid code string'
                            ' or a library id must be provided.')
        resource_type = get_resource_type(source_code)
        if resource_type == LIBRARY_PATH:
            library_id = get_library_id(source_code)
            if library_id:
                check_resource(library_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time, retries=retries,
                               raise_on_error=True, api=self)
                create_args.update({
                    "origin": library_id})
        elif isinstance(source_code, basestring):
            try:
                if os.path.exists(source_code):
                    with open(source_code) as code_file:
                        source_code = code_file.read()
            except IOError:
                raise IOError("Could not open the source code file %s." %
                              source_code)
            create_args.update({
                "source_code": source_code})
        else:
            raise Exception("A library id or a valid source code"
                            " is needed to create a"
                            " library. %s found." % resource_type)


        body = json.dumps(create_args)
        return self._create(self.library_url, body)
Exemplo n.º 42
0
    def reify_source(self, resource_id):
        """Extracts the REST API arguments from the source JSON structure

        """
        resource_type = get_resource_type(resource_id)
        child = self.get_resource(resource_id)

        opts = {"create": {}, "update": {}}

        # create options
        source_defaults = DEFAULTS[resource_type].get("create", {})
        source_defaults.update(COMMON_DEFAULTS.get("create", {}))
        # special case, souces can be named like uploaded files
        name_as_file = [child.get('file_name')]
        name_as_file.extend(source_defaults["name"])
        source_defaults["name"] = name_as_file

        for attribute, default_value in source_defaults.items():
            opts["create"].update(
                u.default_setting(child, attribute, *default_value))

        # data
        if child.get('remote') is not None:
            data = child['remote']
        elif child.get('file_name') is not None:
            data = child['file_name']
        else:
            data = "UNKNOWN-INLINE-DATA"

        # update options
        source_defaults = DEFAULTS[resource_type].get("update", {})

        for attribute, default_value in source_defaults.items():
            opts["update"].update(
                u.default_setting(child, attribute, *default_value))

        # We add the information for the updatable fields only when requested.
        if self.add_fields:
            opts["update"].update({"fields": u.get_fields_changes(child)})

        calls = u.build_calls(resource_id, [data], opts)
        self.add(resource_id, calls)
Exemplo n.º 43
0
    def create_prediction(self, model, input_data=None,
                          args=None, wait_time=3, retries=10):
        """Creates a new prediction.
           The model parameter can be:
            - a simple tree model
            - a simple logistic regression model
            - an ensemble
            - a deepnet
            . a linear regression
            - a fusion
           Note that the old `by_name` argument has been deprecated.

        """
        model_id = None

        resource_type = get_resource_type(model)
        if resource_type not in SUPERVISED_PATHS:
            raise Exception("A supervised model resource id is needed"
                            " to create a prediction. %s found." %
                            resource_type)

        model_id = get_resource_id(model)
        if model_id is not None:
            check_resource(model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if model_id is not None:
            create_args.update({
                "model": model_id})

        body = json.dumps(create_args)
        return self._create(self.prediction_url, body,
                            verify=self.verify_prediction)
Exemplo n.º 44
0
    def reify_source(self, resource_id):
        """Extracts the REST API arguments from the source JSON structure

        """
        resource_type = get_resource_type(resource_id)
        child = self.get_resource(resource_id)

        opts = {"create": {}, "update": {}}

        # create options
        source_defaults = DEFAULTS[resource_type].get("create", {})
        source_defaults.update(COMMON_DEFAULTS.get("create", {}))
        # special case, souces can be named like uploaded files
        name_as_file = [child.get('file_name')]
        name_as_file.extend(source_defaults["name"])
        source_defaults["name"] = name_as_file

        for attribute, default_value in source_defaults.items():
            opts["create"].update(
                u.default_setting(child, attribute, *default_value))

        # data
        if child.get('remote') is not None:
            data = child['remote']
        elif child.get('file_name') is not None:
            data = child['file_name']
        else:
            data = "UNKNOWN-INLINE-DATA"

        # update options
        source_defaults = DEFAULTS[resource_type].get("update", {})

        for attribute, default_value in source_defaults.items():
            opts["update"].update(
                u.default_setting(child, attribute, *default_value))

        # We add the information for the updatable fields only when requested.
        if self.add_fields:
            opts["update"].update({"fields": u.get_fields_changes(child)})

        calls = u.build_calls(resource_id, [data], opts)
        self.add(resource_id, calls)
Exemplo n.º 45
0
def get_fields(resource):
    """Returns the field information in a resource dictionary structure

    """
    try:
        resource_type = get_resource_type(resource)
    except ValueError:
        raise ValueError("Unknown resource structure. Failed to find"
                         " a valid resource dictionary as argument.")

    if resource_type in RESOURCES_WITH_FIELDS:
        resource = resource.get('object', resource)
        # fields structure
        if resource_type in FIELDS_PARENT.keys():
            fields = resource[FIELDS_PARENT[resource_type]].get('fields', {})
        else:
            fields = resource.get('fields', {})

        if resource_type == SAMPLE_PATH:
            fields = dict([(field['id'], field) for field in fields])
    return fields
Exemplo n.º 46
0
def get_origin_info(resource):
    """Key and value that stores the origin resource id

    """
    resource_type = get_resource_type(resource)
    origins = ORIGINS.get(resource_type, [])
    found_origins = []
    for argument_origins in origins:
        for origin in argument_origins:
            info = resource.get(origin)
            if info:
                if origin == 'ranges':
                    info = info.keys()
                found_origins.append((origin, info))
                break

    if not found_origins:
        sys.exit("Failed to find the complete origin information.")
    if len(found_origins) == 1:
        return found_origins[0]
    else:
        return found_origins
Exemplo n.º 47
0
def get_origin_info(resource):
    """Key and value that stores the origin resource id

    """
    resource_type = get_resource_type(resource)
    origins = ORIGINS.get(resource_type, [])
    found_origins = []
    for argument_origins in origins:
        for origin in argument_origins:
            info = resource.get(origin)
            if info:
                if origin == 'ranges':
                    info = info.keys()
                found_origins.append((origin, info))
                break

    if not found_origins:
        sys.exit("Failed to find the complete origin information.")
    if len(found_origins) == 1:
        return found_origins[0]
    else:
        return found_origins
Exemplo n.º 48
0
    def create_projection(self,
                          pca,
                          input_data=None,
                          args=None,
                          wait_time=3,
                          retries=10):
        """Creates a new projection.
           The pca parameter can be a pca resource or ID

        """
        pca_id = None

        resource_type = get_resource_type(pca)
        if resource_type != PCA_PATH:
            raise Exception("A PCA resource id is needed"
                            " to create a projection. %s found." %
                            resource_type)

        pca_id = get_resource_id(pca)
        if pca_id is not None:
            check_resource(pca_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"input_data": input_data})
        if pca_id is not None:
            create_args.update({"pca": pca_id})

        body = json.dumps(create_args)
        return self._create(self.projection_url, body, verify=self.verify)
Exemplo n.º 49
0
    def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10):
        """Creates a statistical test from a `dataset`.

        """
        dataset_id = None
        resource_type = get_resource_type(dataset)
        if resource_type == DATASET_PATH:
            dataset_id = get_dataset_id(dataset)
            check_resource(dataset_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A dataset id is needed to create a"
                            " statistical test. %s found." % resource_type)

        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "dataset": dataset_id})

        body = json.dumps(create_args)
        return self._create(self.statistical_test_url, body)
    def create_topic_distribution(self,
                                  topic_model,
                                  input_data=None,
                                  args=None,
                                  wait_time=3,
                                  retries=10):
        """Creates a new topic distribution.

        """
        topic_model_id = get_topic_model_id(topic_model)
        if topic_model_id is not None:
            check_resource(topic_model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            resource_type = get_resource_type(topic_model)
            raise Exception("A topic model id is needed to create a"
                            " topic distribution. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data,
            "topicmodel": topic_model_id
        })

        body = json.dumps(create_args)
        return self._create(self.topic_distribution_url,
                            body,
                            verify=self.verify_prediction)
Exemplo n.º 51
0
    def reify_dataset(self, resource_id):
        """Extracts the REST API arguments from the dataset JSON structure

        """
        child = self.get_resource(resource_id)
        origin, parent_id = u.get_origin_info(child)
        parent = self.get_resource(parent_id)

        opts = {"create": {}, "update": {}, "get": {}}

        # as two-steps result from a cluster or batch prediction, centroid
        # or anomaly score
        grandparent = parent
        if origin in ['origin_batch_resource', 'cluster']:
            if origin == "cluster":
                opts['create'].update({"centroid": child['centroid']})
            grandparents = u.get_origin_info(parent)
            # batch resources have two parents, choose the dataset
            if origin == "origin_batch_resource" and \
                    isinstance(grandparents, list):
                for gp_origin, grandparent in grandparents:
                    if gp_origin == "dataset":
                        break
            else:
                _, grandparent = grandparents
            grandparent = self.get_resource(grandparent)

        # options common to all model types
        call = "update" if origin == "origin_batch_resource" else "create"
        u.common_dataset_opts(child, grandparent, opts, call=call)

        # update options
        dataset_defaults = DEFAULTS["dataset"].get("update", {})

        for attribute, default_value in dataset_defaults.items():
            opts["update"].update(
                u.default_setting(child, attribute, *default_value))
        # name, exclude automatic naming alternatives
        autonames = [u'']
        u.non_automatic_name(child, opts, autonames=autonames)

        # objective field
        resource_fields = Fields({
            'resource': child['resource'],
            'object': child
        })
        objective_id = child['objective_field']['id']
        preferred_fields = resource_fields.preferred_fields()
        # if there's no preferred fields, use the fields structure
        if len(preferred_fields.keys()) == 0:
            preferred_fields = resource_fields.fields
        max_column = sorted([
            field['column_number'] for _, field in preferred_fields.items()
            if field['optype'] != "text"
        ],
                            reverse=True)[0]
        objective_column = resource_fields.fields[objective_id][ \
            'column_number']
        if objective_column != max_column:
            opts['create'].update({"objective_field": {"id": objective_id}})

        if origin != "origin_batch_resource":
            # resize
            if (child['size'] != grandparent['size']
                    and get_resource_type(parent) == 'source'):
                opts['create'].update({"size": child['size']})

            # generated fields
            if child.get('new_fields', None):
                new_fields = child['new_fields']
                for new_field in new_fields:
                    new_field['field'] = new_field['generator']
                    del new_field['generator']

                opts['create'].update({"new_fields": new_fields})

            u.range_opts(child, grandparent, opts)

        # for batch_predictions, batch_clusters, batch_anomalies generated
        # datasets, attributes cannot be set at creation time, so we
        # must update the resource instead
        suffix = None
        if origin == "origin_batch_resource":
            opts["update"].update(opts["create"])
            opts["create"] = {}
            suffix = "['object']['output_dataset_resource']"
        calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix)
        self.add(resource_id, calls)
Exemplo n.º 52
0
    def create_dataset(self, origin_resource, args=None,
                       wait_time=3, retries=10):
        """Creates a remote dataset.

        Uses a remote resource to create a new dataset using the
        arguments in `args`.
        The allowed remote resources can be:
            - source
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resources, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.
        If `wait_time` is higher than 0 then the dataset creation
        request is not sent until the `source` has been created successfuly.

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource, args=create_args, wait_time=wait_time,
                retries=retries, key="origin_datasets")
        else:
            # dataset from source
            resource_type = get_resource_type(origin_resource)
            if resource_type == SOURCE_PATH:
                source_id = get_source_id(origin_resource)
                if source_id:
                    check_resource(source_id,
                                   query_string=TINY_RESOURCE,
                                   wait_time=wait_time,
                                   retries=retries,
                                   raise_on_error=True, api=self)
                    create_args.update({
                        "source": source_id})
            # dataset from dataset
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource, args=create_args, wait_time=wait_time,
                    retries=retries, key="origin_dataset")
            # dataset from cluster and centroid
            elif resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True, api=self)
                if 'centroid' not in create_args:
                    try:
                        centroid = cluster['object'][
                            'cluster_datasets_ids'].keys()[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the dataset. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a dataset from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            else:
                raise Exception("A source, dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.dataset_url, body)
Exemplo n.º 53
0
    def reify_dataset(self, resource_id):
        """Extracts the REST API arguments from the dataset JSON structure

        """
        child = self.get_resource(resource_id)
        origin, parent_id = u.get_origin_info(child)
        parent = self.get_resource(parent_id)

        opts = {"create": {}, "update": {}}

        # as two-steps result from a cluster or batch prediction, centroid
        # or anomaly score
        if origin in ["origin_batch_resource", "cluster"]:
            if origin == "cluster":
                opts["create"].update({"centroid": child["centroid"]})
            _, grandparent = u.get_origin_info(parent)
            grandparent = self.get_resource(grandparent)
        else:
            grandparent = parent

        # options common to all model types
        u.common_dataset_opts(child, grandparent, opts)

        # update options
        dataset_defaults = DEFAULTS["dataset"].get("update", {})
        dataset_defaults.update(COMMON_DEFAULTS.get("update", {}))

        for attribute, default_value in dataset_defaults.items():
            opts["update"].update(u.default_setting(child, attribute, *default_value))

        # name, exclude automatic naming alternatives
        autonames = [u""]
        suffixes = [
            u"filtered",
            u"sampled",
            u"dataset",
            u"extended",
            u"- batchprediction",
            u"- batchanomalyscore",
            u"- batchcentroid",
            u"- merged",
        ]
        autonames.extend([u"%s %s" % (grandparent.get("name", ""), suffix) for suffix in suffixes])
        autonames.append(u"%s's dataset" % ".".join(parent["name"].split(".")[0:-1]))
        autonames.append(u"%s' dataset" % ".".join(parent["name"].split(".")[0:-1]))
        autonames.append(u"Cluster %s - %s" % (int(child.get("centroid", "0"), base=16), parent["name"]))
        autonames.append(u"Dataset from %s model - segment" % parent["name"])
        u.non_automatic_name(child, opts, autonames=autonames)

        # objective field
        resource_fields = Fields({"resource": child["resource"], "object": child})
        objective_id = child["objective_field"]["id"]
        preferred_fields = resource_fields.preferred_fields()
        max_column = sorted([field["column_number"] for _, field in preferred_fields.items()], reverse=True)[0]
        objective_column = resource_fields.fields[objective_id]["column_number"]
        if objective_column != max_column:
            opts["create"].update({"objective_field": {"id": objective_id}})

        # resize
        if child["size"] != grandparent["size"] and get_resource_type(parent) == "source":
            opts["create"].update({"size": child["size"]})

        # generated fields
        if child.get("new_fields", None):
            new_fields = child["new_fields"]
            for new_field in new_fields:
                new_field["field"] = new_field["generator"]
                del new_field["generator"]

            opts["create"].update({"new_fields": new_fields})

        u.range_opts(child, grandparent, opts)

        calls = u.build_calls(resource_id, [parent_id], opts)
        self.add(resource_id, calls)
Exemplo n.º 54
0
    def create_dataset(self,
                       origin_resource,
                       args=None,
                       wait_time=3,
                       retries=10):
        """Creates a remote dataset.

        Uses a remote resource to create a new dataset using the
        arguments in `args`.
        The allowed remote resources can be:
            - source
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resources, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.
        If `wait_time` is higher than 0 then the dataset creation
        request is not sent until the `source` has been created successfuly.

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource,
                args=create_args,
                wait_time=wait_time,
                retries=retries,
                key="origin_datasets")
        else:
            # dataset from source
            resource_type = get_resource_type(origin_resource)
            if resource_type == SOURCE_PATH:
                source_id = get_source_id(origin_resource)
                if source_id:
                    check_resource(source_id,
                                   query_string=TINY_RESOURCE,
                                   wait_time=wait_time,
                                   retries=retries,
                                   raise_on_error=True,
                                   api=self)
                    create_args.update({"source": source_id})
            # dataset from dataset
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource,
                    args=create_args,
                    wait_time=wait_time,
                    retries=retries,
                    key="origin_dataset")
            # dataset from cluster and centroid
            elif resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True,
                                         api=self)
                if not 'centroid' in create_args:
                    try:
                        centroid = cluster['object'][
                            'cluster_datasets_ids'].keys()[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the dataset. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a dataset from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            else:
                raise Exception("A source, dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.dataset_url, body)
Exemplo n.º 55
0
    def reify_dataset(self, resource_id):
        """Extracts the REST API arguments from the dataset JSON structure

        """
        child = self.get_resource(resource_id)
        origin, parent_id = u.get_origin_info(child)
        parent = self.get_resource(parent_id)

        opts = {"create": {}, "update": {}, "get": {}}

        # as two-steps result from a cluster or batch prediction, centroid
        # or anomaly score
        grandparent = parent
        if origin in ['origin_batch_resource', 'cluster']:
            if origin == "cluster":
                opts['create'].update({"centroid": child['centroid']})
            grandparents = u.get_origin_info(parent)
            # batch resources have two parents, choose the dataset
            if origin == "origin_batch_resource" and \
                    isinstance(grandparents, list):
                for gp_origin, grandparent in grandparents:
                    if gp_origin == "dataset":
                        break
            else:
                _, grandparent = grandparents
            grandparent = self.get_resource(grandparent)

        # options common to all model types
        call = "update" if origin == "origin_batch_resource" else "create"
        u.common_dataset_opts(child, grandparent, opts, call=call)

        # update options
        dataset_defaults = DEFAULTS["dataset"].get("update", {})

        for attribute, default_value in dataset_defaults.items():
            opts["update"].update(
                u.default_setting(child, attribute, *default_value))
        # name, exclude automatic naming alternatives
        autonames = [u'']
        u.non_automatic_name(child, opts, autonames=autonames)

        # objective field
        resource_fields = Fields(
            {'resource': child['resource'], 'object': child})
        objective_id = child['objective_field']['id']
        preferred_fields = resource_fields.preferred_fields()
        # if there's no preferred fields, use the fields structure
        if len(preferred_fields.keys()) == 0:
            preferred_fields = resource_fields.fields
        max_column = sorted([field['column_number']
                             for _, field in preferred_fields.items()
                             if field['optype'] != "text"],
                            reverse=True)[0]
        objective_column = resource_fields.fields[objective_id][ \
            'column_number']
        if objective_column != max_column:
            opts['create'].update({"objective_field": {"id": objective_id}})

        if origin != "origin_batch_resource":
            # resize
            if (child['size'] != grandparent['size'] and
                    get_resource_type(parent) == 'source'):
                opts['create'].update({"size": child['size']})

            # generated fields
            if child.get('new_fields', None):
                new_fields = child['new_fields']
                for new_field in new_fields:
                    new_field['field'] = new_field['generator']
                    del new_field['generator']

                opts['create'].update({"new_fields": new_fields})

            u.range_opts(child, grandparent, opts)

        # for batch_predictions, batch_clusters, batch_anomalies generated
        # datasets, attributes cannot be set at creation time, so we
        # must update the resource instead
        suffix = None
        if origin == "origin_batch_resource":
            opts["update"].update(opts["create"])
            opts["create"] = {}
            suffix = "['object']['output_dataset_resource']"
        calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix)
        self.add(resource_id, calls)