def fit(self, x, y): Utilities.count_occurences(y) self.grid_search(x, y) self.clf = self.construct_classifier() logger.info("Training...") return self.clf.fit(x, y)
def processing(price_source, stock, load_articles, full_articles, processing_type='SVM'): x_data, y_data = get_data(full_articles, load_articles, price_source, stock) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42) if processing_type == 'SVM': y_result, y_result_prob = svm_prediction(x_train, x_test, y_train) else: y_result, y_result_prob = lstm_prediction(processing_type, x_train, x_test, y_train) Utilities.measure_performance(y_test, y_result) Utilities.measure_performance_auc(y_test, y_result, y_result)
def lstm_prediction(model_type, x_train, x_test, y_train): y_train = Utilities.make_dual(y_train, 2) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) # initialise model architecture market_model = build_model(x_train, model_type) market_model.summary() # train model on data # note: eth_history contains information on the training error per epoch cbks = [callbacks.EarlyStopping(monitor='val_loss', patience=25)] market_model.fit(x_train, y_train, batch_size=1000, callbacks=cbks, epochs=50, validation_split=0.25, shuffle=True) y_result_prob = market_model.predict(x_test) y_result = Utilities.make_single_dimension(y_result_prob) return y_result, y_result_prob
def compute_cost_function(self): k = len(self.theta) r = np.ones((self.nrows, k)) for i in range(0, self.nrows): input_x = self.x[i] sum_lh = 0 for j in range(0, k): r[i][j] = self.theta[j] * Utilities.pdf_mnd( self.mu[j], np.diag(np.diag(self.cov[j])), input_x) sum_lh += r[i][j] r[i] /= sum_lh return r
def update_vars(self,cost_function): e_hi,e_log_hi = cost_function self.mu = np.zeros(self.dimensions) for i in range(self.nrows): self.mu += e_hi[i]*self.x[i] self.mu /= sum(e_hi) x_minus_mean = self.x - self.mu self.cov = np.zeros((self.dimensions,self.dimensions)) for i in range(0,self.nrows-1): self.cov += e_hi[i]*np.matmul(x_minus_mean[i], x_minus_mean[i].T) self.cov /= sum(e_hi) self.cov = np.diag(np.diag(self.cov)) self.nu = Utilities.line_search_min(Utilities.t_cost,0.1,1000,10000,e_hi,e_log_hi) return self.compute_cost_function()
def __init__(self,type): mu0,image0,mu1,image1,shape =self.load_model(type) image0,image1 = self.reduce_dimensions(image0),self.reduce_dimensions(image1) mu0,mu1 = np.mean(image0,axis = 0),np.mean(image1,axis = 0) print(mu0) cov0 = np.cov(image0.T) cov1 = np.cov(image1.T) cov0_det,cov0_inv = np.linalg.det(cov0),np.linalg.inv(cov0) cov1_det,cov1_inv = np.linalg.det(cov1),np.linalg.inv(cov1) self.ground_truth = list() test_images = self.load_test_data(type) test_pca = self.reduce_dimensions(test_images) prediction = list() correct_pred = 0 for x,pred in zip(test_pca,self.ground_truth): lh_0 = self.pdf_nd(mu0,cov0_det,cov0_inv,x) lh_1 = self.pdf_nd(mu1,cov1_det,cov1_inv,x) prob = self.compute_posterior(lh_0,lh_1) if(prob>=0.5): prob = 1 else: prob = 0 if(prob == pred): correct_pred += 1 prediction.append(prob) # print("Correct/Total Predictions: "+repr((correct_pred,len(prediction)))) print("Accuracy: "+repr(float(correct_pred)*100/len(prediction))) print(Utilities.performance_Metrics(prediction,self.ground_truth)) fp,tp,_ = roc_curve(self.ground_truth,prediction) area_under_curve = auc(fp, tp) print((fp,tp)) plt.plot(fp, tp, 'b',label='AUC = %0.2f'% area_under_curve) plt.legend(loc='lower right') plt.plot([0,1],[0,1],'r--') plt.xlim([-0.1,1.1]) plt.ylim([-0.1,1.1]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.show()