Пример #1
0
def retrieve_sv_model():
    '''

    The router function retrieves all models stored in the hashed redis cache.

    '''

    if request.method == 'POST':
        # get all models
        svm_list = Model().get_all_titles('svm_model')
        svr_list = Model().get_all_titles('svr_model')
        svm_result = []
        svr_result = []
        error_result = []

        # get svm model(s)
        if svm_list['result']:
            svm_result = svm_list['result']
        elif svm_list['error']:
            error_result.extend(svm_list['error'])

        # get svr model(s)
        if svr_list['result']:
            svr_result = svr_list['result']
        elif svr_list['error']:
            error_result.extend(svr_list['error'])

        # return combined model(s)
        combined_result = svm_result + svr_result
        if combined_result:
            return json.dumps(combined_result)
        elif error_result:
            return json.dumps({'error': error_result})
Пример #2
0
def generate(model, kernel_type, collection, payload, list_error):
    '''

    This method generates an sv (i.e. svm, or svr) model using feature data,
    retrieved from the database. The generated model, is then stored within the
    NoSQL datastore.

    @grouped_features, a matrix of observations, where each nested vector,
        or python list, is a collection of features within the containing
        observation.
    @encoded_labels, observation labels (dependent variable labels),
        encoded into a unique integer representation.

    '''

    # local variables
    sorted_labels = False
    label_encoder = preprocessing.LabelEncoder()
    list_model_type = current_app.config.get('MODEL_TYPE')
    collection_adjusted = collection.lower().replace(' ', '_')
    cursor = Collection()

    # get datasets
    datasets = cursor.query(collection_adjusted, 'aggregate', payload)

    # restructure dataset into arrays
    observation_labels = []
    grouped_features = []

    for dataset in datasets['result']:
        for observation in dataset['dataset']:
            indep_variables = observation['independent-variables']

            for features in indep_variables:
                # svm case
                if model == list_model_type[0]:
                    observation_labels.append(
                        observation['dependent-variable'])
                    sorted_features = [v for k, v in sorted(features.items())]

                # svr case
                elif model == list_model_type[1]:
                    observation_labels.append(
                        float(observation['dependent-variable']))
                    sorted_features = [
                        float(v) for k, v in sorted(features.items())
                    ]

                grouped_features.append(sorted_features)

                if not sorted_labels:
                    sorted_labels = [k for k, v in sorted(features.items())]

    # generate svm model
    if model == list_model_type[0]:
        # convert observation labels to a unique integer representation
        label_encoder = preprocessing.LabelEncoder()
        label_encoder.fit(observation_labels)
        encoded_labels = label_encoder.transform(observation_labels)

        # create model
        clf = svm.SVC(kernel=kernel_type, probability=True)

        # cache encoded labels
        Model(label_encoder).cache(model + '_labels', collection_adjusted)

        # fit model
        clf.fit(grouped_features, encoded_labels)

    # generate svr model
    elif model == list_model_type[1]:
        # create model
        clf = svm.SVR(kernel=kernel_type)

        # fit model
        clf.fit(grouped_features, observation_labels)

        # compute, and cache coefficient of determination
        r2 = clf.score(grouped_features, observation_labels)
        Hset().cache(model + '_r2', collection_adjusted, r2)

    # cache model
    Model(clf).cache(model + '_model', collection_adjusted)

    # cache feature labels, with respect to given collection
    Hset().cache(model + '_feature_labels', collection,
                 json.dumps(sorted_labels))

    # return error(s) if exists
    return {'error': list_error}
Пример #3
0
def predict(model, collection, predictors):
    '''

    This method generates an sv (i.e. svm, or svr) prediction using the
    provided prediction feature input(s), and the stored corresponding model,
    within the NoSQL datastore.

    Additionally, the following is returned for SVM predictions:

        - array of probability a given point (predictors) is one of the
          defined set of classifiers.
        - array of sum distances a given point (predictors) is to the set
          of associated hyperplanes.

    However, the following is returned for SVR predictions:

        - coefficient of determination (r^2).


    @clf, decoded model, containing several methods (i.e. predict)

    @predictors, a list of arguments (floats) required to make an SVM
        prediction, against the respective svm model.

    '''

    # local variables
    probability = None
    decision_function = None
    collection_adjusted = collection.lower().replace(' ', '_')
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get necessary model
    clf = Model().uncache(
        model + '_model',
        collection_adjusted
    )

    # case 1: return svm prediction, and confidence level
    if model == list_model_type[0]:
        # perform prediction, and return the result
        prediction = clf.predict([predictors])

        encoded_labels = Model().uncache(
            model + '_labels',
            collection_adjusted
        )

        textual_label = encoded_labels.inverse_transform(prediction)
        probability = clf.predict_proba([predictors])
        decision_function = clf.decision_function([predictors])
        classes = [encoded_labels.inverse_transform(x) for x in clf.classes_]

        return {
            'result': textual_label[0],
            'model': model,
            'confidence': {
                'classes': list(classes),
                'probability': list(probability[0]),
                'decision_function': list(decision_function[0])
            },
            'error': None
        }

    # case 2: return svr prediction, and confidence level
    elif model == list_model_type[1]:
        # perform prediction, and return the result
        prediction = (clf.predict([predictors]))

        r2 = Hset().uncache(
            model + '_r2',
            collection_adjusted
        )['result']

        return {
            'result': str(prediction[0]),
            'model': model,
            'confidence': {
                'score': r2
            },
            'error': None
        }
Пример #4
0
def generate(model, kernel_type, session_id, feature_request, list_error):
    '''

    This method generates an sv (i.e. svm, or svr) model using feature data,
    retrieved from the database. The generated model, is then stored within the
    NoSQL datastore.

    @grouped_features, a matrix of observations, where each nested vector,
        or python list, is a collection of features within the containing
        observation.
    @encoded_labels, observation labels (dependent variable labels),
        encoded into a unique integer representation.

    '''

    # local variables
    dataset = feature_request.get_dataset(session_id, model)
    get_feature_count = feature_request.get_count(session_id)
    label_encoder = preprocessing.LabelEncoder()
    logger = Logger(__name__, 'error', 'error')
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get dataset
    if dataset['error']:
        logger.log(dataset['error'])
        list_error.append(dataset['error'])
        dataset = None
    else:
        dataset = numpy.asarray(dataset['result'])

    # get feature count
    if get_feature_count['error']:
        logger.log(get_feature_count['error'])
        list_error.append(get_feature_count['error'])
        feature_count = None
    else:
        feature_count = get_feature_count['result'][0][0]

    # check dataset integrity, build model
    if len(dataset) % feature_count == 0:
        features_list = dataset[:, [[0], [2], [1]]]
        current_features = []
        grouped_features = []
        observation_labels = []
        feature_labels = []

        # group features into observation instances, record labels
        for index, feature in enumerate(features_list):
            # svm: observation labels
            if model == list_model_type[0]:
                current_features.append(feature[1][0])

                if (index+1) % feature_count == 0:
                    grouped_features.append(current_features)
                    observation_labels.append(feature[0][0])
                    current_features = []

            # svr: observation labels
            elif model == list_model_type[1]:
                current_features.append(float(feature[1][0]))

                if (index+1) % feature_count == 0:
                    grouped_features.append(current_features)
                    observation_labels.append(float(feature[0][0]))
                    current_features = []

            # general feature labels in every observation
            if not len(feature_labels) == feature_count:
                feature_labels.append(feature[2][0])

        # case 1: svm model
        if model == list_model_type[0]:
            # convert observation labels to a unique integer representation
            label_encoder = preprocessing.LabelEncoder()
            label_encoder.fit(dataset[:, 0])
            encoded_labels = label_encoder.transform(observation_labels)

            # create model
            clf = svm.SVC(kernel=kernel_type, probability=True)

            # cache encoded labels
            Model(label_encoder).cache(model + '_labels', session_id)

            # fit model
            clf.fit(grouped_features, encoded_labels)

        # case 2: svr model
        elif model == list_model_type[1]:
            # create model
            clf = svm.SVR(kernel=kernel_type)

            # fit model
            clf.fit(grouped_features, observation_labels)

            # compute, and cache coefficient of determination
            r2 = clf.score(grouped_features, observation_labels)
            Hset().cache(
                model + '_r2',
                session_id,
                r2
            )

        # get title
        entity = Entity()
        title = entity.get_title(session_id)['result'][0][0]

        # cache model, title
        Model(clf).cache(
            model + '_model',
            str(session_id) + '_' + title
        )
        Hset().cache(model + '_title', session_id, title)

        # cache feature labels, with respect to given session id
        Hset().cache(
            model + '_feature_labels',
            str(session_id),
            json.dumps(feature_labels)
        )

        # return error(s) if exists
        return {'error': list_error}
Пример #5
0
def predict(model, model_id, predictors):
    '''

    This method generates an sv (i.e. svm, or svr) prediction using the
    provided prediction feature input(s), and the stored corresponding model,
    within the NoSQL datastore.

    Additionally, the following is returned for SVM predictions:

        - array of probability a given point (predictors) is one of the
          defined set of classifiers.
        - array of sum distances a given point (predictors) is to the set
          of associated hyperplanes.

    However, the following is returned for SVR predictions:

        - coefficient of determination (r^2).


    @clf, decoded model, containing several methods (i.e. predict)

    @predictors, a list of arguments (floats) required to make an SVM
        prediction, against the respective svm model.

    '''

    # local variables
    probability = None
    decision_function = None
    list_model_type = current_app.config.get('MODEL_TYPE')

    # get necessary model
    title = Hset().uncache(model + '_title', model_id)['result']
    clf = Model().uncache(model + '_model', model_id + '_' + title)

    # perform prediction, and return the result
    prediction = (clf.predict([predictors]))

    # case 1: return svm prediction, and confidence level
    if model == list_model_type[0]:
        encoded_labels = Model().uncache(model + '_labels', model_id)

        textual_label = encoded_labels.inverse_transform([prediction])
        probability = clf.predict_proba(predictors)
        decision_function = clf.decision_function(predictors)
        classes = [encoded_labels.inverse_transform(x) for x in clf.classes_]

        return {
            'result': textual_label[0][0],
            'model': model,
            'confidence': {
                'classes': list(classes),
                'probability': list(probability[0]),
                'decision_function': list(decision_function[0])
            },
            'error': None
        }

    # case 2: return svr prediction, and confidence level
    elif model == list_model_type[1]:
        r2 = Hset().uncache(model + '_r2', model_id)['result']

        return {
            'result': str(prediction[0]),
            'model': model,
            'confidence': {
                'score': r2
            },
            'error': None
        }