def predict(train): tr_train, tr_test = load_ml100k.get_train_test(train, random_state=34) tr_predicted0 = regression.predict(tr_train) tr_predicted1 = regression.predict(tr_train.T).T tr_predicted2 = corrneighbours.predict(tr_train) tr_predicted3 = corrneighbours.predict(tr_train.T).T tr_predicted4 = norm.predict(tr_train) tr_predicted5 = norm.predict(tr_train.T).T stack_tr = np.array( [ tr_predicted0[tr_test > 0], tr_predicted1[tr_test > 0], tr_predicted2[tr_test > 0], tr_predicted3[tr_test > 0], tr_predicted4[tr_test > 0], tr_predicted5[tr_test > 0], ] ).T lr = linear_model.LinearRegression() lr.fit(stack_tr, tr_test[tr_test > 0]) stack_te = np.array( [ tr_predicted0.ravel(), tr_predicted1.ravel(), tr_predicted2.ravel(), tr_predicted3.ravel(), tr_predicted4.ravel(), tr_predicted5.ravel(), ] ).T return lr.predict(stack_te).reshape(train.shape)
def predict(train): tr_train, tr_test = load_ml100k.get_train_test(train, random_state=34) tr_predicted0 = regression.predict(tr_train) tr_predicted1 = regression.predict(tr_train.T).T tr_predicted2 = corrneighbours.predict(tr_train) tr_predicted3 = corrneighbours.predict(tr_train.T).T tr_predicted4 = norm.predict(tr_train) tr_predicted5 = norm.predict(tr_train.T).T stack_tr = np.array([ tr_predicted0[tr_test > 0], tr_predicted1[tr_test > 0], tr_predicted2[tr_test > 0], tr_predicted3[tr_test > 0], tr_predicted4[tr_test > 0], tr_predicted5[tr_test > 0], ]).T lr = linear_model.LinearRegression() lr.fit(stack_tr, tr_test[tr_test > 0]) stack_te = np.array([ tr_predicted0.ravel(), tr_predicted1.ravel(), tr_predicted2.ravel(), tr_predicted3.ravel(), tr_predicted4.ravel(), tr_predicted5.ravel(), ]).T return lr.predict(stack_te).reshape(train.shape)
def orderIn(self, stream_bucket, sample_path): f_list = self.wait_data(stream_bucket, sample_path) # regression for f in f_list: self.data_processor.copy_to_feature(f) self.upload_dir_s3(self.feature_path) regression.predict(self.bucket_name, self.feature_path, f, self.output_path + "regression/", self.plot_path) self.upload_dir_s3(self.output_path)
def predict(train): predicted0 = regression.predict(train) predicted1 = regression.predict(train.T).T predicted2 = corrneighbours.predict(train) predicted3 = corrneighbours.predict(train.T).T predicted4 = norm.predict(train) predicted5 = norm.predict(train.T).T stack = np.array([ predicted0, predicted1, predicted2, predicted3, predicted4, predicted5, ]) return stack.mean(0)
def pred_(): js = request.get_json() if 'email' not in js or 'data' not in js: return 'missing email or data' result = predict(js['email'], array_to_array_of_arrays([js['data']])) result = pd.Series(result).to_json(orient='values') return Response(json.dumps(result), mimetype='application/json')
def infer_angles(nnet_params, frames, sigmasq_states, inflate_vars=0.): frames = clean_frames(frames).reshape(frames.shape[0], -1) mus, log_sigmasqs = predict(frames, nnet_params) angles, _ = kalman_smoother( 0., 100., 1., sigmasq_states, mus, np.exp(log_sigmasqs) + inflate_vars) return angles
def average_predict(train): # Averaging of predictions predicted0 = regression.predict(train) predicted1 = regression.predict(train.T).T predicted2 = corr_neighbours.predict(train) predicted3 = corr_neighbours.predict(train.T).T predicted4 = normalization.predict(train) predicted5 = normalization.predict(train.T).T stack = np.array([ predicted0, predicted1, predicted2, predicted3, predicted4, predicted5, ]) return stack.mean(0)
def stacked_predict(train_data): # Stacked prediction: when fitting hyperparameters, though, we need two layers of training/testing splits: a first, higher-level split, # and then, inside the training split, a second split to be able to fit the stacked learner. tr_train, tr_test = load_ml100k.get_train_test(train_data, random_state=34) # Call all the methods we previously defined: # these have been implemented as functions: tr_prediction_0 = regression.predict(tr_train) tr_prediction_1 = regression.predict(tr_train.T).T tr_prediction_2 = corr_neighbours.predict(tr_train) tr_prediction_3 = corr_neighbours.predict(tr_train.T).T tr_prediction_4 = normalization.predict(tr_train) tr_prediction_5 = normalization.predict(tr_train.T).T # Now assemble these predictions into a single array stacked_learner = np.array([ tr_prediction_0[tr_test > 0], tr_prediction_1[tr_test > 0], tr_prediction_2[tr_test > 0], tr_prediction_3[tr_test > 0], tr_prediction_4[tr_test > 0], tr_prediction_5[tr_test > 0], ]).T # Fit a simple linear regression linear_leaner = linear_model.LinearRegression() linear_leaner.fit(stacked_learner, tr_test[tr_test > 0]) # apply the whole process to the testing split and evaluate stacked_te = np.array([ tr_prediction_0.ravel(), tr_prediction_1.ravel(), tr_prediction_2.ravel(), tr_prediction_3.ravel(), tr_prediction_4.ravel(), tr_prediction_5.ravel(), ]).T return linear_leaner.predict(stacked_te).reshape(tr_train.shape)
def predict(): """/predict Returns predictions, in CSV format. The first column is an ISO-formatted UTC timestamp of the start of the prediction period and the second column is the predicted # of logins. For example, a few rows of predictions might look like: 2012-05-01 00:00:00,19.1158177963 2012-05-01 01:00:00,22.0997300016 2012-05-01 02:00:00,26.1003343227 Example usage: curl -X GET "http://127.0.0.1:5000/predict?start_date=2012-05-01&end_date=2012-05-15" """ #get date argument start_date = request.args.get('start_date') end_date = request.args.get('end_date') #convert to hour, weekday tuples start_dt = dateutil.parser.parse(start_date) end_dt = dateutil.parser.parse(end_date) input_range = [] for dt in rrule.rrule(rrule.HOURLY, dtstart=start_dt, until=end_dt): input_range.append([dt.hour, dt.weekday()]) #call predict try: prediction_array = regression.predict(input_range) except regression.UntrainedException: return "Error: please train the regressor first! Call the /train endpoint." csv = [] for i, dt in enumerate(rrule.rrule(rrule.HOURLY, dtstart=start_dt, until=end_dt)): csv.append(",".join([str(dt), str(prediction_array[i])])) csv_response = make_response("\n".join(csv)) csv_response.headers["content-type"] = "text/csv" return csv_response
def getDeliveryEstimate(delivery_city): weather = 1 # Weather (default) transport_mode = 1 # transport (default) product_category_number = 1 # product category (default) product_delay_time = delay_time_dictionary[ product_category_number] # delay time (default) shipping_city = calc_shortest_path(delivery_city) # shortest city if shipping_city == None: return jsonify({'result': 'Unknown City', 'status': 'failed'}) print('{} -> {}'.format(shipping_city, delivery_city)) dist = city_matrix[shipping_city][delivery_city][0] X = [ product_category_number, weather, transport_mode, product_delay_time, dist ] prediction = '{:.2f}'.format(predict(regressor, [X])[0]) # print('Prediction: {}'.format(prediction)) return prediction
from regression import best_fit from regression import predict import numpy as np # an example showing how the functions are meant to be used # like everything, numpy arrays will come handy when it comes to data crunching x = np.array([1, 5, 7, 9, 11, 12, 15], dtype=np.float64) y = np.array([2, 4, 6, 9, 12, 14, 18], dtype=np.float64) slope, intercept = best_fit(x, y) print(slope) #1.1985 print(predict(17, slope, intercept)) #19.3872
print "the average acc (train) is: %f" %(1-numpy.mean(mses_train)) print "the average acc (test) is: %f" %(1-numpy.mean(mses)) print "the confusion matrix is: (TP,FP,TN,FN)" print conf_m/K # end print "=============================================" print "" print "=================linear regression=================" h_X = housing_train[:,:n-1] h_y = housing_train[:,n-1] h_X_test = housing_test[:,:n-1] h_y_test = housing_test[:,n-1] print "training with housing training data..." h_X_norm,h_X_test_norm = regression.normalize(h_X,h_X_test) w = regression.caculateW(h_X_norm,h_y) mse_h_train = numpy.mean((regression.predict(h_X_norm,w) - h_y)**2) mse_h_test = numpy.mean((regression.predict(h_X_test_norm,w) - h_y_test)**2) print "for housing training data mse is %f" %mse_h_train print "for housing test data mse is %f" %mse_h_test mses = numpy.zeros(K) print "linear regression with spambase dataset with %d folds cross-validation..." %K for i in range(K): test = k_folds[i] train = numpy.vstack(numpy.delete(k_folds, i, axis=0)) s_X = train[:,:s_n-1] s_y = train[:,s_n-1] s_t_X = test[:,:s_n-1] s_t_y = test[:,s_n-1] s_X_norm,s_t_X_norm = regression.normalize(s_X,s_t_X)
def test_predict(self): dataset = regression.get_dataset(self.BODYFAT_FILE) prediction = regression.predict(dataset, cols=[1, 2], features=[1.0708, 23]) np.testing.assert_almost_equal(prediction, 12.62245862957813)
Xte_p = prepare_data_div(pd.DataFrame(Xte['DNA']), nm_char) #Xte_p['Id'] = Xte['Id'] #Xtr_p = Xtr_p.sample(frac=1) X_tr = pd.DataFrame.as_matrix(Xtr_p.iloc[:,:-1]) Y_tr = pd.DataFrame.as_matrix(Xtr_p['Bound']).astype(float).tolist() print("training logistic regression..") w = logistic_regression(X_tr, Y_tr, num_steps = 50, learning_rate = 5e-5, add_intercept=True) print("predicting the test set..") result_tmp = predict(Xte_p, w) #filter the value result = test_with_id(result_tmp, Xte['Id']) result['Bound'][result['Bound'] == -1] = 0 s = "" for index, row in result.iterrows(): s = s + str(int(row['Id'])) + "," + str(int(row['Bound'])) + "\n" f.write(s) print("finish!") f.close()
X_tr = pd.DataFrame.as_matrix(data_train.iloc[:, :-1]) Y_tr = pd.DataFrame.as_matrix(data_train['Bound']).astype(float).tolist() X_te = pd.DataFrame.as_matrix(data_test.iloc[:, :-1]) Y_te = pd.DataFrame.as_matrix(data_test['Bound']) # train the logistic regression using X_tr = data_train = 70% of entire dataset Prob_Tr = logistic_regression(X_tr, Y_tr, num_steps=50, learning_rate=5e-5, add_intercept=True) # test using Prob_Tr that we get from training with X_te = data_test = 30% of entire dataset p_Te = predict(X_te, Prob_Tr) Y_predicted_te = test(p_Te) predicted_score_te = accuracy_score(Y_predicted_te, Y_te, normalize=False) / len(Y_predicted_te) st_info = "\n test on Xtr" + str(i) + ", Ytr" + str( i) + "\n number of character: " + str(k + 1) if (predicted_score_te > max_predic): max_predic = predicted_score_te max_info = "\n max_result_tr: " + str(predicted_score_te) + "\n" f.write("---------------------------------------") f.write(st_info)
def home(request): ''' Creates a webpage view that validates form data input by a user, converts that data into a input dictionary, and uses that input dictionary as an argument for the function that will create a table of results for the user. Inputs: request: request object for web interfacing Outputs: Rendered webpage table of results from the assigned function ''' context = {} res = None if request.method == 'GET': # create a form instance and populate it with data from the request: form = SearchForm(request.GET) # check whether it's valid: if form.is_valid(): # Convert form data to an args dictionary for regression prediction args = {} preference = form.cleaned_data["preference"] if preference == "Ratings": rating_bool = True else: rating_bool = False lang_num = 0 language = form.cleaned_data["language"] if language == "No necessary in-game text": lang_num = 1 if language == "Some necessary text - easily memorized or small crib sheet": lang_num = 2 if language == "Moderate in-game text - needs crib sheet or paste ups": lang_num = 3 if language == "Extensive use of text - massive conversion needed to be playable": lang_num = 4 if language == "Unplayable in another language": lang_num = 5 args["Language dependency"] = lang_num game_type1 = form.cleaned_data['game_type1'] if game_type1: args['Type 1'] = game_type1 game_type2 = form.cleaned_data['game_type2'] if game_type2: args['Type 2'] = game_type2 game_type3 = form.cleaned_data['game_type3'] if game_type3: args['Type 3'] = game_type3 mechanics = form.cleaned_data['game_mecs'] if mechanics: args['Number of mechanics'] = mechanics if rating_bool: num_player = form.cleaned_data['players'] if num_player: args['Recommended number of players'] = num_player time = form.cleaned_data['time'] if time: args['Average playing time'] = time complexity = form.cleaned_data["complexity"] if complexity: args["Complexity"] = complexity else: game_cats = form.cleaned_data['game_cats'] if game_cats: args['Number of categories'] = game_cats if form.cleaned_data['show_args']: context['args'] = 'args_to_ui = ' + json.dumps(args, indent=2) form = SearchForm() try: res = predict(args, rating_bool) except Exception as e: print('Exception caught') bt = traceback.format_exception(*sys.exc_info()[:3]) context['err'] = """ An exception was thrown in predict: <pre>{} {}</pre> """.format(e, '\n'.join(bt)) res = None else: form = SearchForm() # Handle different responses of res if res is None: context['result'] = None elif isinstance(res, str): context['result'] = None context['err'] = res result = None elif not _valid_result(res): context['result'] = None context['err'] = ('Return of predict has the wrong data type. ' 'Should be a tuple of length 4 with one string and ' 'three lists.') else: columns, result = res # Wrap in tuple if result is not already if result and isinstance(result[0], str): result = [(r, ) for r in result] context['result'] = result context['num_results'] = len(result) context['columns'] = [COLUMN_NAMES.get(col, col) for col in columns] context['form'] = form return render(request, 'index.html', context)
# Divide into testing and training data train, test = sales.random_split(0.8, seed = 0) # A simple case with just one predictor: (example_features, example_Y) = uf.get_numpy_data( sales, ['sqft_living'], 'price') print 'example_features[:3, :]:' print example_features[:3, :] print 'example_Y[:3]:', example_Y[:3] # Use predermined weight (coefficients): my_weights = np.array([1., 1.]) test_predictions = reg.predict(example_features, my_weights) print 'test_predictions:', test_predictions # Test simple_features = ['sqft_living'] (simple_feature_matrix, Y) = uf.get_numpy_data(train, simple_features, 'price') initial_w = np.array([-47000., 1.]) eta = 7e-12 tolerance = 2.5e7 mod1_w = reg.regression_gradient_descent( simple_feature_matrix, Y, initial_w, eta, tolerance) print 'final weights:', mod1_w # Use optimized weights to predict values in the test set (simple_features_matrix_test, Y_test) = uf.get_numpy_data(
def guesscost(): models = regression.trainModels() predVals = regression.predict(models[0], models[1]) return json.dumps(predVals)
def charge_calculator(age, sex, bmi, children, smoker, region): charges = predict(age, sex, bmi, children, smoker, region) charges_rounded = round(float(charges[0]), 2) charges_label['text'] = 'Your estimated charges are ' + str( charges_rounded)