Пример #1
0
def svm_prediction(model, kernel, model_id, predictors):
    '''@svm_prediction

    This method generates an svm prediction using the provided prediction
    feature input(s), and the stored corresponding model, within the NoSQL
    datastore.

    @predictors, a list of arguments (floats) required to make an SVM
        prediction, against the respective svm model.

    '''

    # get necessary model
    title = Cache_Hset().uncache(
        model + '_' + kernel + '_title',
        model_id
    )['result']
    clf = Cache_Model().uncache(
        model + '_' + kernel + '_model',
        model_id + '_' + title
    )

    # get encoded labels
    encoded_labels = Cache_Model().uncache(
        model + '_' + kernel + '_labels',
        model_id
    )

    # perform prediction, and return the result
    numeric_label = (clf.predict([predictors]))
    textual_label = list(encoded_labels.inverse_transform([numeric_label]))
    return {'result': textual_label[0][0], 'error': None}
Пример #2
0
def sv_prediction(model, model_id, predictors):
    '''@sv_prediction

    This method generates an sv (i.e. svm, or svr) prediction using the
    provided prediction feature input(s), and the stored corresponding model,
    within the NoSQL datastore.

    @clf, decoded model, containing several methods (i.e. predict)

    @predictors, a list of arguments (floats) required to make an SVM
        prediction, against the respective svm model.

    '''

    # local variables
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get necessary model
    title = Cache_Hset().uncache(
        model + '_title',
        model_id
    )['result']
    clf = Cache_Model().uncache(
        model + '_model',
        model_id + '_' + title
    )

    # svm model: get encoded labels
    if model == list_model_type[0]:
        encoded_labels = Cache_Model().uncache(
            model + '_labels',
            model_id
        )

    # perform prediction
    numeric_label = (clf.predict([predictors]))

    # result: svm model
    if model == list_model_type[0]:
        textual_label = list(encoded_labels.inverse_transform([numeric_label]))
        return {'result': textual_label[0][0], 'error': None}

    # result: svr model
    elif model == list_model_type[1]:
        return {'result': str(numeric_label[0]), 'error': None}
Пример #3
0
def retrieve_sv_features():
    '''@retrieve_sv_features

    This router function retrieves the generalized features properties that can
    be expected for any given observation within the supplied dataset.

    @label_list, this value will be a json object, since it was originally
        cached into redis using 'json.dumps'.

    '''

    if request.method == 'POST':
        label_list = Cache_Hset().uncache('svm_rbf_feature_labels',
                                          request.get_json()['session_id'])

        # return all feature labels
        if label_list['result']:
            return json.dumps(label_list['result'])
        else:
            return json.dumps({'error': label_list['error']})
Пример #4
0
def retrieve_sv_features():
    '''

    This router function retrieves the generalized features properties that can
    be expected for any given observation within the supplied dataset.

    @label_list, this value will be a json object, since it was originally
        cached into redis using 'json.dumps'.

    '''

    # get model type
    model_id = request.get_json()['model_id']
    model_type = M_Type().get_model_type(model_id)['result']

    # return all feature labels
    if request.method == 'POST':
        label_list = Cache_Hset().uncache(model_type + '_feature_labels',
                                          model_id)

        if label_list['result']:
            return json.dumps(label_list['result'])
        else:
            return json.dumps({'error': label_list['error']})
Пример #5
0
def sv_prediction(model, model_id, predictors):
    '''

    This method generates an sv (i.e. svm, or svr) prediction using the
    provided prediction feature input(s), and the stored corresponding model,
    within the NoSQL datastore.

    Additionally, the following is returned for SVM predictions:

        - array of probability a given point (predictors) is one of the
          defined set of classifiers.
        - array of sum distances a given point (predictors) is to the set
          of associated hyperplanes.

    However, the following is returned for SVR predictions:

        - coefficient of determination (r^2).


    @clf, decoded model, containing several methods (i.e. predict)

    @predictors, a list of arguments (floats) required to make an SVM
        prediction, against the respective svm model.

    '''

    # local variables
    probability = None
    decision_function = None
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get necessary model
    title = Cache_Hset().uncache(
        model + '_title',
        model_id
    )['result']
    clf = Cache_Model().uncache(
        model + '_model',
        model_id + '_' + title
    )

    # perform prediction, and return the result
    prediction = (clf.predict([predictors]))

    # case 1: return svm prediction, and confidence level
    if model == list_model_type[0]:
        encoded_labels = Cache_Model().uncache(
            model + '_labels',
            model_id
        )

        textual_label = encoded_labels.inverse_transform([prediction])
        probability = clf.predict_proba(predictors)
        decision_function = clf.decision_function(predictors)
        classes = [encoded_labels.inverse_transform(x) for x in clf.classes_]

        return {
            'result': textual_label[0][0],
            'model': model,
            'confidence': {
                'classes': list(classes),
                'probability': list(probability[0]),
                'decision_function': list(decision_function[0])
            },
            'error': None
        }

    # case 2: return svr prediction, and confidence level
    elif model == list_model_type[1]:
        r2 = Cache_Hset().uncache(
            model + '_r2',
            model_id
        )['result']

        return {
            'result': str(prediction[0]),
            'model': model,
            'confidence': {
                'score': r2
            },
            'error': None
        }
Пример #6
0
    def generate_model(self):
        """@generate_model

        This method generates an svm model, using a chosen dataset from the SQL
        database.  The resulting model is stored into a NoSQL datastore.

        @grouped_features, a matrix of observations, where each nested vector,
            or python list, is a collection of features within the containing
            observation.

        @encoded_labels, observation labels (dependent variable labels),
            encoded into a unique integer representation.

        """

        # local variables
        dataset = self.feature_request.get_dataset(self.session_id)
        feature_count = self.feature_request.get_count(self.session_id)
        label_encoder = preprocessing.LabelEncoder()

        # get dataset
        if dataset['error']:
            print dataset['error']
            self.list_error.append(dataset['error'])
            dataset = None
        else:
            dataset = numpy.asarray(dataset['result'])

        # get feature count
        if feature_count['error']:
            print feature_count['error']
            self.list_error.append(feature_count['error'])
            feature_count = None
        else:
            feature_count = feature_count['result'][0][0]

        # check dataset integrity, build model
        if len(dataset) % feature_count == 0:
            features_list = dataset[:, [[0], [2], [1]]]
            current_features = []
            grouped_features = []
            observation_labels = []
            feature_labels = []

            # group features into observation instances, record labels
            for index, feature in enumerate(features_list):
                if not (index + 1) % feature_count == 0:
                    # observation labels
                    current_features.append(feature[1][0])

                    # general feature labels in every observation
                    if not len(feature_labels) == feature_count:
                        feature_labels.append(feature[2][0])
                else:
                    # general feature labels in every observation
                    if not len(feature_labels) == feature_count:
                        feature_labels.append(feature[2][0])

                    current_features.append(feature[1][0])
                    grouped_features.append(current_features)
                    observation_labels.append(feature[0][0])
                    current_features = []

            # convert observation labels to a unique integer representation
            label_encoder = preprocessing.LabelEncoder()
            label_encoder.fit(dataset[:, 0])
            encoded_labels = label_encoder.transform(observation_labels)

            # create svm model
            clf = svm.SVC()
            clf.fit(grouped_features, encoded_labels)

            # get svm title, and cache (model, encoded labels, title)
            entity = Retrieve_Entity()
            title = entity.get_title(self.session_id)['result'][0][0]
            Cache_Model(clf).cache('svm_rbf_model',
                                   str(self.session_id) + '_' + title)
            Cache_Model(label_encoder).cache('svm_rbf_labels', self.session_id)
            Cache_Hset().cache('svm_rbf_title', self.session_id, title)

            # cache svm feature labels, with respect to given session id
            Cache_Hset().cache('svm_rbf_feature_labels', str(self.session_id),
                               json.dumps(feature_labels))
Пример #7
0
def sv_model(model, kernel_type, session_id, feature_request, list_error):
    '''@sv_model
    This method generates an sv (i.e. svm, or svr) model using feature data,
    retrieved from the database. The generated model, is then stored within the
    NoSQL datastore.
    @grouped_features, a matrix of observations, where each nested vector,
        or python list, is a collection of features within the containing
        observation.
    @encoded_labels, observation labels (dependent variable labels),
        encoded into a unique integer representation.
    '''

    # local variables
    dataset = feature_request.get_dataset(session_id, model)
    get_feature_count = feature_request.get_count(session_id)
    label_encoder = preprocessing.LabelEncoder()
    logger = Logger(__name__, 'error', 'error')
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get dataset
    if dataset['error']:
        logger.log(dataset['error'])
        list_error.append(dataset['error'])
        dataset = None
    else:
        dataset = numpy.asarray(dataset['result'])

    # get feature count
    if get_feature_count['error']:
        logger.log(get_feature_count['error'])
        list_error.append(get_feature_count['error'])
        feature_count = None
    else:
        feature_count = get_feature_count['result'][0][0]

    # check dataset integrity, build model
    if len(dataset) % feature_count == 0:
        features_list = dataset[:, [[0], [2], [1]]]
        current_features = []
        grouped_features = []
        observation_labels = []
        feature_labels = []

        # group features into observation instances, record labels
        for index, feature in enumerate(features_list):
            # svm: observation labels
            if model == list_model_type[0]:
                current_features.append(feature[1][0])

                if (index+1) % feature_count == 0:
                    grouped_features.append(current_features)
                    observation_labels.append(feature[0][0])
                    current_features = []

            # svr: observation labels
            elif model == list_model_type[1]:
                current_features.append(float(feature[1][0]))

                if (index+1) % feature_count == 0:
                    grouped_features.append(current_features)
                    observation_labels.append(float(feature[0][0]))
                    current_features = []

            # general feature labels in every observation
            if not len(feature_labels) == feature_count:
                feature_labels.append(feature[2][0])

        # case 1: svm model
        if model == list_model_type[0]:
            # convert observation labels to a unique integer representation
            label_encoder = preprocessing.LabelEncoder()
            label_encoder.fit(dataset[:, 0])
            encoded_labels = label_encoder.transform(observation_labels)

            # create model
            clf = svm.SVC(kernel=kernel_type, probability=True)

            # cache encoded labels
            Cache_Model(label_encoder).cache(model + '_labels', session_id)

            # fit model
            clf.fit(grouped_features, encoded_labels)

        # case 2: svr model
        elif model == list_model_type[1]:
            # create model
            clf = svm.SVR(kernel=kernel_type)

            # fit model
            clf.fit(grouped_features, observation_labels)

            # compute, and cache coefficient of determination
            r2 = clf.score(grouped_features, observation_labels)
            Cache_Hset().cache(
                model + '_r2',
                session_id,
                r2
            )

        # get title
        entity = Retrieve_Entity()
        title = entity.get_title(session_id)['result'][0][0]

        # cache model, title
        Cache_Model(clf).cache(
            model + '_model',
            str(session_id) + '_' + title
        )
        Cache_Hset().cache(model + '_title', session_id, title)

        # cache feature labels, with respect to given session id
        Cache_Hset().cache(
            model + '_feature_labels',
            str(session_id),
            json.dumps(feature_labels)
        )

        # return error(s) if exists
        return {'error': list_error}
Пример #8
0
def svm_model(kernel_type, session_id, feature_request, list_error):
    '''@svm_model

    This method generates an svm prediction using the provided prediction
    feature input(s), and the stored corresponding model, within the NoSQL
    datastore.

    @grouped_features, a matrix of observations, where each nested vector,
        or python list, is a collection of features within the containing
        observation.

    @encoded_labels, observation labels (dependent variable labels),
        encoded into a unique integer representation.

    '''

    # local variables
    dataset = feature_request.get_dataset(session_id)
    feature_count = feature_request.get_count(session_id)
    label_encoder = preprocessing.LabelEncoder()
    logger = Logger(__name__, 'error', 'error')

    # get dataset
    if dataset['error']:
        logger.log(dataset['error'])
        list_error.append(dataset['error'])
        dataset = None
    else:
        dataset = numpy.asarray(dataset['result'])

    # get feature count
    if feature_count['error']:
        logger.log(feature_count['error'])
        list_error.append(feature_count['error'])
        feature_count = None
    else:
        feature_count = feature_count['result'][0][0]

    # check dataset integrity, build model
    if len(dataset) % feature_count == 0:
        features_list = dataset[:, [[0], [2], [1]]]
        current_features = []
        grouped_features = []
        observation_labels = []
        feature_labels = []

        # group features into observation instances, record labels
        for index, feature in enumerate(features_list):
            if not (index+1) % feature_count == 0:
                # observation labels
                current_features.append(feature[1][0])

                # general feature labels in every observation
                if not len(feature_labels) == feature_count:
                    feature_labels.append(feature[2][0])
            else:
                # general feature labels in every observation
                if not len(feature_labels) == feature_count:
                    feature_labels.append(feature[2][0])

                current_features.append(feature[1][0])
                grouped_features.append(current_features)
                observation_labels.append(feature[0][0])
                current_features = []

        # convert observation labels to a unique integer representation
        label_encoder = preprocessing.LabelEncoder()
        label_encoder.fit(dataset[:, 0])
        encoded_labels = label_encoder.transform(observation_labels)

        # create svm model
        clf = svm.SVC(kernel=kernel_type)
        clf.fit(grouped_features, encoded_labels)

        # get svm title, and cache (model, encoded labels, title)
        entity = Retrieve_Entity()
        title = entity.get_title(session_id)['result'][0][0]
        Cache_Model(clf).cache(
            'svm_rbf_model',
            str(session_id) + '_' + title
        )
        Cache_Model(label_encoder).cache('svm_rbf_labels', session_id)
        Cache_Hset().cache('svm_rbf_title', session_id, title)

        # cache svm feature labels, with respect to given session id
        Cache_Hset().cache(
            'svm_rbf_feature_labels',
            str(session_id),
            json.dumps(feature_labels)
        )

        # return error(s) if exists
        return {'error': list_error}