def retrieve_sv_model(): ''' The router function retrieves all models stored in the hashed redis cache. ''' if request.method == 'POST': # get all models svm_list = Model().get_all_titles('svm_model') svr_list = Model().get_all_titles('svr_model') svm_result = [] svr_result = [] error_result = [] # get svm model(s) if svm_list['result']: svm_result = svm_list['result'] elif svm_list['error']: error_result.extend(svm_list['error']) # get svr model(s) if svr_list['result']: svr_result = svr_list['result'] elif svr_list['error']: error_result.extend(svr_list['error']) # return combined model(s) combined_result = svm_result + svr_result if combined_result: return json.dumps(combined_result) elif error_result: return json.dumps({'error': error_result})
def generate(model, kernel_type, collection, payload, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables sorted_labels = False label_encoder = preprocessing.LabelEncoder() list_model_type = current_app.config.get('MODEL_TYPE') collection_adjusted = collection.lower().replace(' ', '_') cursor = Collection() # get datasets datasets = cursor.query(collection_adjusted, 'aggregate', payload) # restructure dataset into arrays observation_labels = [] grouped_features = [] for dataset in datasets['result']: for observation in dataset['dataset']: indep_variables = observation['independent-variables'] for features in indep_variables: # svm case if model == list_model_type[0]: observation_labels.append( observation['dependent-variable']) sorted_features = [v for k, v in sorted(features.items())] # svr case elif model == list_model_type[1]: observation_labels.append( float(observation['dependent-variable'])) sorted_features = [ float(v) for k, v in sorted(features.items()) ] grouped_features.append(sorted_features) if not sorted_labels: sorted_labels = [k for k, v in sorted(features.items())] # generate svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(observation_labels) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Model(label_encoder).cache(model + '_labels', collection_adjusted) # fit model clf.fit(grouped_features, encoded_labels) # generate svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Hset().cache(model + '_r2', collection_adjusted, r2) # cache model Model(clf).cache(model + '_model', collection_adjusted) # cache feature labels, with respect to given collection Hset().cache(model + '_feature_labels', collection, json.dumps(sorted_labels)) # return error(s) if exists return {'error': list_error}
def predict(model, collection, predictors): ''' This method generates an sv (i.e. svm, or svr) prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. Additionally, the following is returned for SVM predictions: - array of probability a given point (predictors) is one of the defined set of classifiers. - array of sum distances a given point (predictors) is to the set of associated hyperplanes. However, the following is returned for SVR predictions: - coefficient of determination (r^2). @clf, decoded model, containing several methods (i.e. predict) @predictors, a list of arguments (floats) required to make an SVM prediction, against the respective svm model. ''' # local variables probability = None decision_function = None collection_adjusted = collection.lower().replace(' ', '_') list_model_type = current_app.config.get('MODEL_TYPE') # get necessary model clf = Model().uncache( model + '_model', collection_adjusted ) # case 1: return svm prediction, and confidence level if model == list_model_type[0]: # perform prediction, and return the result prediction = clf.predict([predictors]) encoded_labels = Model().uncache( model + '_labels', collection_adjusted ) textual_label = encoded_labels.inverse_transform(prediction) probability = clf.predict_proba([predictors]) decision_function = clf.decision_function([predictors]) classes = [encoded_labels.inverse_transform(x) for x in clf.classes_] return { 'result': textual_label[0], 'model': model, 'confidence': { 'classes': list(classes), 'probability': list(probability[0]), 'decision_function': list(decision_function[0]) }, 'error': None } # case 2: return svr prediction, and confidence level elif model == list_model_type[1]: # perform prediction, and return the result prediction = (clf.predict([predictors])) r2 = Hset().uncache( model + '_r2', collection_adjusted )['result'] return { 'result': str(prediction[0]), 'model': model, 'confidence': { 'score': r2 }, 'error': None }
def generate(model, kernel_type, session_id, feature_request, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def predict(model, model_id, predictors): ''' This method generates an sv (i.e. svm, or svr) prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. Additionally, the following is returned for SVM predictions: - array of probability a given point (predictors) is one of the defined set of classifiers. - array of sum distances a given point (predictors) is to the set of associated hyperplanes. However, the following is returned for SVR predictions: - coefficient of determination (r^2). @clf, decoded model, containing several methods (i.e. predict) @predictors, a list of arguments (floats) required to make an SVM prediction, against the respective svm model. ''' # local variables probability = None decision_function = None list_model_type = current_app.config.get('MODEL_TYPE') # get necessary model title = Hset().uncache(model + '_title', model_id)['result'] clf = Model().uncache(model + '_model', model_id + '_' + title) # perform prediction, and return the result prediction = (clf.predict([predictors])) # case 1: return svm prediction, and confidence level if model == list_model_type[0]: encoded_labels = Model().uncache(model + '_labels', model_id) textual_label = encoded_labels.inverse_transform([prediction]) probability = clf.predict_proba(predictors) decision_function = clf.decision_function(predictors) classes = [encoded_labels.inverse_transform(x) for x in clf.classes_] return { 'result': textual_label[0][0], 'model': model, 'confidence': { 'classes': list(classes), 'probability': list(probability[0]), 'decision_function': list(decision_function[0]) }, 'error': None } # case 2: return svr prediction, and confidence level elif model == list_model_type[1]: r2 = Hset().uncache(model + '_r2', model_id)['result'] return { 'result': str(prediction[0]), 'model': model, 'confidence': { 'score': r2 }, 'error': None }