def on_click(self): filename = self.openFileNameDialog() if filename: self.label.setText('Processing File: {}'.format( filename.split('/')[-1])) self.button.setEnabled(False) # Load DataFrame df = read_CSV(filename) # Scale DataFrame dfs_scaled = pd.DataFrame(self.scaler.transform(df.values), index=df.index, columns=df.columns) # init hidden and cell state hidden = init_hidden(1, 1024, self.device) #Predict 30 laps target = self.scaler.inverse_transform(dfs_scaled.values) input = torch.tensor(dfs_scaled.iloc[0:10].values, dtype=torch.float) input = torch.reshape(input, (1, input.shape[0], input.shape[1])) input.to(self.device) out1, hidden = predict(input, self.predictor, hidden) input = torch.reshape(out1, (1, out1.shape[0], out1.shape[1])) out2, hidden = predict(input, self.predictor, hidden) input = torch.reshape(out2, (1, out2.shape[0], out2.shape[1])) out3, hidden = predict(input, self.predictor, hidden) out = torch.cat((out1, out2, out3), dim=0) out_np = out.cpu().numpy() out_np = self.scaler.inverse_transform(out_np) out_np = np.concatenate((target[0:10], out_np), axis=0) # Plotting out_np_time = [ datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=time) for time in out_np[:, -1] ] plt.plot(out_np_time, label='prediction') target_time = [ datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=time) for time in target[:, -1] ] plt.plot(target_time, label='original') plt.title('Estimated Lap Times') plt.ylabel("Time") plt.xlabel("Lap") plt.gca().yaxis.set_major_formatter(DateFormatter('%M:%S')) plt.tight_layout() plt.legend() plt.show() self.button.setEnabled(True)
def show_webcam(mirror=False): cam = cv2.VideoCapture(0) while True: time.sleep(1) ret_val, img = cam.read() if mirror: img = cv2.flip(img, 1) cv2.imshow('my webcam', img) print('img.shape', img.shape) predict(img) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows()
def post_process(Arguments): ''' This is simply a distibutor for all functions involved in postprocessing POSE results from previous calculations, which is required to make sense of most POSE output ''' if Arguments.PostProcess == "Validate": validate(Arguments) if Arguments.PostProcess == "Predict": predict(Arguments) return
def post_process(Arguments): ''' This is simply a distibutor for all functions involved in postprocessing POSE results from previous calculations, which is required to make sense of most POSE output ''' if Arguments.PostProcess == "Validate": validate(Arguments) if Arguments.PostProcess == "Predict": predict(Arguments) return
def PredictTestData(): token_vocab, label_vocab = load_predict_info() times = str(time.strftime("%d%m%Y%H%M%S")) filename = "PredictOutput_testing_" + times + ".xlsx" workbook = xlsxwriter.Workbook(filename) worksheet = workbook.add_worksheet() worksheet.write(0, 0, 'News Headlines') worksheet.write(0, 1, 'Polarity') worksheet.write(0, 2, 'Probability') rows = 1 col = 0 conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() postgreSQL_select_Query = """ select * from public."testdataset" """ cur.execute(postgreSQL_select_Query) news = cur.fetchall() Predict = [] for row in news: article = row[1] print(article) polarity, probability = predict(token_vocab, label_vocab, article) Predict.append(polarity) worksheet.write(rows, col, article) worksheet.write(rows, col + 1, polarity) worksheet.write(rows, col + 2, probability) rows += 1 workbook.close() os.startfile(filename) return Predict
def upload(): if request.method == 'POST': f = request.files['file'] basepath = os.path.dirname(__file__) # 当前文件所在路径 upload_path = os.path.join(basepath, 'static\\uploads', f.filename) #注意:没有的文件夹一定要先创建,不然会提示没有该路径 f.save(upload_path) data = Image.open(upload_path) '''跟文件名没有关系''' pred = predict(4, 'models/vgg.ckpt', data) pred = np.squeeze(pred) pred = pred.tolist() fenlei = '' index = pred.index(max(pred)) '''获取最大值进行判断''' if index == 0: fenlei = '草体' elif index == 2: fenlei = '篆体' elif index == 3: fenlei = '隶书' else: fenlei = '' return render_template('upload.html', result=fenlei, imgpath=upload_path) return render_template('upload.html')
def predict(self): headline = self.Entry1.get() if headline == "": messagebox.showinfo("Error", "Please insert news headline") else: conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() token_vocab, label_vocab = load_predict_info() polarity, probability = predict(token_vocab, label_vocab, headline) if polarity == 'POS': polarity = 'POSITIVE' if polarity == 'NEU': polarity = 'NEUTRAL' if polarity == 'NEG': polarity = 'NEGATIVE' self.Text1.delete('1.0', 'end') self.Text2.delete('1.0', 'end') self.Text1.insert('end', polarity) self.Text2.insert('end', probability) probability = str(probability) sql = """INSERT INTO public."testing_records"("test_news","test_polarity","test_probability") VALUES ( %s, %s, %s);""" data = (headline, polarity, probability) cur.execute(sql, data) conn.commit()
def predictFile(self): file = self.Entry2.get() try: if file.lower().endswith(('.txt', '.csv')): conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() times = str(time.strftime("%d%m%Y%H%M%S")) filename = "PredictOutput" + times + ".xlsx" workbook = xlsxwriter.Workbook(filename) worksheet = workbook.add_worksheet() worksheet.write(0, 0, 'News Headlines') worksheet.write(0, 1, 'Polarity') worksheet.write(0, 2, 'Probability') row = 1 col = 0 token_vocab, label_vocab = load_predict_info() with open(file, "r", encoding='utf-8') as f: for headlines in f: headlines = headlines.rstrip('\n\r') polarity, probability = predict( token_vocab, label_vocab, headlines) if polarity == 'POS': polarity = 'POSITIVE' if polarity == 'NEU': polarity = 'NEUTRAL' if polarity == 'NEG': polarity = 'NEGATIVE' probability = str(probability) sql = """INSERT INTO public."testing_records"("test_news","test_polarity","test_probability") VALUES ( %s, %s, %s);""" data = (headlines, polarity, probability) cur.execute(sql, data) print(headlines, polarity, probability) worksheet.write(row, col, headlines) worksheet.write(row, col + 1, polarity) worksheet.write(row, col + 2, probability) row += 1 conn.commit() workbook.close() os.startfile(filename) else: messagebox.showinfo("Error", "Please insert the correct format file") except: messagebox.showinfo( "Error", "The file inserted had some error, please try again.")
def predict(self): #先将图片转为PIL形式 image = self.image pred = predict(image) self.label_4.setText('Predicting') #pred=predict(image) self.label_4.setText('Predicted') self.label_2.setText(str(pred.numpy().item()))
def predict(self): #先将图片转为PIL形式 image = self.image pred = predict(image) self.label_4.setText('Predicting') #pred=predict(image) self.label_4.setText('Predicted') self.label_2.setText(CLASSES[pred])
def onclick(args): if args == 1: preprocess() elif args == 2: train() elif args == 3: test() elif args == 4: show_layer1_filters() show_layer2_filters() show_number_of_learned_parameters() elif args == 5: pick_sample_test_data() pick_sample_train_data() else: path = tk.filedialog.askopenfile(title="Select Image File", filetypes=(("jpeg files", "*.jpg"), ("all files", "*.*"))) print(path) try: predict(path.name) except: print("Error While Reading File")
def solve_expression(): if request.method == "POST": try: file = request.files["image"] img_byte = file.read() img = cv2.imdecode(np.frombuffer(img_byte, np.uint8), -1) mask = predict(img) mask_path = "results/mask" + file.filename cv2.imwrite(mask_path, mask*255) return { "filename": mask_path } except: handle_404()
def kernel_variation(): accuracy_sum = 0 model_count = 3 accuracy = {} for trained_model in range(model_count): net = Net(kernel_size=7, conv_output_size=2) model = 'test' + str(trained_model) train(net, model_name=model) accuracy[trained_model] = predict(net, model_name=model) accuracy_sum += accuracy[trained_model] accuracy['average_accuracy'] = accuracy_sum / model_count print(accuracy)
def PredictTrainData(): token_vocab, label_vocab = load_predict_info() conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() postgreSQL_select_Query = """ select * from public."traindataset" """ cur.execute(postgreSQL_select_Query) news = cur.fetchall() Predict = [] for row in news: article = row[1] polarity, probability = predict(token_vocab, label_vocab, article) Predict.append(polarity) return Predict
def handle_location(message): if (message.text.strip() == '/place') : bot.send_message(message.chat.id, 'okay there is example :<br>"<b>\n/place Macdondals in centre of London </b> "', parse_mode='HTML') else: file = open('data.txt', 'r') print(file.read()) if (len(file.read()) > 0 ): result_message = '<b>Sorry , Dude.</b>\nFirstlu you should\nshare location with me.\n📍 /set_location' else: file = open('data.txt', 'r') result_message = '🔮loading ... ' bot.send_message(message.chat.id, result_message, parse_mode='HTML') keyword = message.text[7:] language = 'en' url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?&query={0}&language={1}&key={2}'.format( keyword, language, config.gogole_map_api_key) req = requests.get(url) # print(req.json()['results']) if req.json()['status'] == 'ZERO_RESULTS' : bot.send_message(message.chat.id, '<b>Not found </b> 🤬' ,parse_mode='HTML') else: for result in req.json()['results'][::6]: vin = '<b>'+ result['name'] +'</b>\n📍'+ result['formatted_address'] url = 'https://maps.googleapis.com/maps/api/place/details/json?placeid={0}&key={1}'.format(result['place_id'], config.gogole_map_api_key) detail_req = requests.get(url) # print(detail_req.json()['status']) if detail_req.json()['status'] == 'OK' : # print(detail_req.json()['result']['reviews']) if ( 'reviews' in detail_req.json()['result']): reviews = [] for review in detail_req.json()['result']['reviews']: if (review['language'] == 'en'): reviews.append(review['text']) predicteds = predict(reviews) if (len(predicteds) > 0): vin += '\nThere are <b>' + str(len(predicteds)) + '</b> review(s).' bot.send_message(message.chat.id, vin, parse_mode='HTML') print('===========') print(predicteds) print('===========') else : vin += '\nSorry , this venue has no any reviews.' bot.send_message(message.chat.id, vin, parse_mode='HTML')
def kairec(upload_path): data = Image.open(upload_path) '''跟文件名没有关系''' pred = predict(5, 'kaimodels/vgg.ckpt', data) pred = np.squeeze(pred) pred = pred.tolist() fenlei = '' index = pred.index(max(pred)) '''获取最大值进行判断''' if index == 0: fenlei = "楷体(" + "欧体" + ")" elif index == 1: fenlei = "楷体(" + "赵体" + ")" elif index == 2: fenlei = "楷体(" + "柳体" + ")" elif index == 3: fenlei = "楷体(" + "魏碑" + ")" elif index == 4: fenlei = "楷体(" + "颜体" + ")" return fenlei
def uploadTestTemData(): check_state = checkResponseFile(request.files) if check_state != 1: return check_state file = request.files['file'] fileData = file.read() fileName = getFileMD5(fileData) originSavePath = os.path.join(TEM_SAVE_PATH, fileName) resizeOutputPath = os.path.join(TEM_LOW_SAVE_PATH, fileName) if not os.path.exists(originSavePath): writeFile(originSavePath, fileData) if resizeMovie(originSavePath, resizeOutputPath) != 0: removeFile(originSavePath) removeFile(resizeOutputPath) return jsonify({'code': -5, 'msg': 'Resize video failed'}) result = predict([resizeOutputPath], utilDict) return jsonify({'code': 0, 'result': result, 'msg': 'Predict done'})
def fc_variation(): print('Fully Connected Layer Size Variation') # Combinations of fully connected layer sizes fc1_size = [120, 10, 200] fc2_size = [84, 10, 120] # result dictionary accuracy = {} # Iterate through all the fully connected layer sizes for fc1 in fc1_size: for fc2 in fc2_size: # create an instance of the neural network net = Net(fc1_size=fc1, fc2_size=fc2) model = 'fc_variation' + str(fc1) + '_' + str(fc2) # train the model train(net, model_name=(model)) # measure model accuracy with testing dataset accuracy[model] = predict(net, model_name=model) print(accuracy)
def setCategoryEnglish(self, article): import sys sys.path.insert(0, "../EnglishTextCategorization/") from Predict import predict predictedCategory = predict(article) return predictedCategory
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ from model import create_model from DataGeneration import dataGeneration from Checkpoint import createCheckpoint from Performance import plot, calculate from training import train from Predict import predict loadPreviouslySavedWeights = 1 network = create_model() dataSet = dataGeneration() cp_callback = createCheckpoint() train(network, dataSet, cp_callback, loadPreviouslySavedWeights) calculate(network, dataSet) predict(network, dataSet)
from python_to_mysql import get_categories, add_category, get_prints, set_print_categories from python_to_mysql import mock_classifier # classifier returning random results for quick tests from Predict import predict print("took", round(time.time() - t1), "s to load dependencies") ### Classify the articles print("\nLoading articles from databaes...") t1 = time.time() x_ids, x_titles, x_booktitles, x_keywords = get_prints() print("took", round(time.time() - t1), "s to load", len(x_ids), "prints") print("\nPredicting articles categories...", flush=True) t1 = time.time() prediction = predict(x_titles, x_keywords, x_booktitles) print("took", round(time.time() - t1), "s to predict", len(prediction), "categories") print("\nSaving predicted categories in databaes...", flush=True) t1 = time.time() set_print_categories(x_ids, prediction) print("took", round(time.time() - t1), "s to save", len(prediction), "predictions", flush=True) print("\nClassification done!\n")
def scrape(self, response): single_url = 'https://www.malaysiastock.biz/Corporate-Infomation.aspx?securityCode=' #--- retrieve the tokens and labels token_vocab, label_vocab = load_predict_info() article = NewsItem() i = 0 alldata = [] imgsrc = 0 StockCode = response.meta.get('code') completeurl = single_url + StockCode conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() #x = HtmlXPathSelector(response) r = requests.get(completeurl) #--- crawl the news all_links = response.css('.tablelist td span a') # iterate over links print(completeurl, '\n') for sel in all_links: try: html_as_string = r.text soup = BeautifulSoup(html_as_string, 'html.parser') article['headline'] = (soup.select(".line td span a"))[i].text article['link'] = response.css('.tablelist td span a').xpath( '@href').extract()[i] article['code'] = StockCode # --- filter the news headlines that are not in english language if re.findall('[\u4e00-\u9fff]+', article['headline']): if "News" in article['link']: imgsrc = imgsrc + 1 # --- filter the news headlines to prevent duplication problem else: postgreSQL_select_Query = """ select * from public."company_news" """ cur.execute(postgreSQL_select_Query) news = cur.fetchall() Newsexist = False for row in news: if article['headline'] == row[1] and article[ 'code'] == row[5]: Newsexist = True if "News" in article['link']: imgsrc = imgsrc + 1 # --- if the news headline does not exist in the db, then predict and store if Newsexist == False: singledata = [] if "Blog" in article['link']: print(article['link']) b = requests.get(article['link']) html_as_string = b.text p = BeautifulSoup(html_as_string, 'html.parser') # article['source'] = response.css('.line td a').xpath('text()').extract() article['source'] = p.find( 'label', { 'id': 'MainContent2_lbAuthorProfile' }).find('a').text data = p.find('label', { 'id': 'MainContent2_lbAuthorProfile' }).text unusedata, date = data.split('e:') date = date.strip() date = str(date) for item in date.splitlines(): d = parser.parse(item) article['time'] = d.strftime("%Y-%m-%d") polarity, probability = predict( token_vocab, label_vocab, article['headline']) probability = str(probability) singledata.append(article['headline']) singledata.append(article['link']) singledata.append(polarity) singledata.append(probability) singledata.append(article['code']) singledata.append(article['source']) singledata.append(article['time']) alldata.append(singledata) print(i, article['headline']) print('link:', article['link']) print('published time:', article['time']) print('polarity:', polarity) print('source:', article['source']) else: print(article['link']) article['source'] = response.css( '.line td a img').xpath( '@src').extract()[imgsrc] if 'thestar' in article['source']: s = requests.get(article['link']) html_as_string = s.text s = BeautifulSoup(html_as_string, 'html.parser') date = s.find('p', {'class': 'date'}) if date == None: article['time'] = None else: date = s.find('p', {'class': 'date'}).text date = date.replace(' ', '') date = date.strip() date = str(date) for item in date.splitlines(): d = parser.parse(item) article['time'] = d.strftime( "%Y-%m-%d") else: s = requests.get(article['link']) html_as_string = s.text s = BeautifulSoup(html_as_string, 'html.parser') date = s.find('span', {'class': 'post-created'}) if date is None: article['time'] = None else: date = s.find('span', { 'class': 'post-created' }).text date = str(date.lower()) if date is '': article['time'] = None else: if 'am' in date: date = date.replace('am', '') if 'pm' in date: date = date.replace('pm', '') for item in date.splitlines(): d = parser.parse(item) article['time'] = d.strftime( "%Y-%m-%d") polarity, probability = predict( token_vocab, label_vocab, article['headline']) probability = str(probability) singledata.append(article['headline']) singledata.append(article['link']) singledata.append(polarity) singledata.append(probability) singledata.append(article['code']) singledata.append(article['source']) singledata.append(article['time']) alldata.append(singledata) print(i, article['headline']) print('link:', article['link']) print('published time:', article['time']) print('polarity:', polarity) print('source:', article['source']) imgsrc = imgsrc + 1 i = i + 1 except: print("The news headline had some error or does not exist.") for news_num in range(len(alldata), 0, -1): news_num = news_num - 1 if alldata[news_num][6] is None: print("The news is not exit or got errors.") else: sql = """INSERT INTO public."company_news"("news_title", "news_link", "news_time", "news_accuracy", "news_code", "news_source", "news_polarity" ) VALUES ( %s, %s, %s, %s, %s, %s, %s);""" data = (alldata[news_num][0], alldata[news_num][1], alldata[news_num][6], alldata[news_num][3], alldata[news_num][4], alldata[news_num][5], alldata[news_num][2]) cur.execute(sql, data) conn.commit() cur.close()
label=hp_inst["label"], time_span=hp_inst["time_span"], max_depth=hp_inst["max_depth"], max_features=hp_inst["max_features"], min_samples_leaf=hp_inst["min_samples_leaf"], min_samples_split=hp_inst["min_samples_split"], n_estimators=hp_inst["n_estimators"], cv=hp_inst["cv"], precision=hp_inst["precision"], edge_forecasting=hp_inst["edge_forecasting"], normalize_data=hp_inst["normalize_data"], criterion=hp_inst["criterion"], ) predict( label=hp_inst["label"], precision=hp_inst["precision"], target_date=target_date, normalize_data=hp_inst["normalize_data"], ) MLp, MLa, TWNa, ECa, MLw, TWNw, ECw = post_mortem( target_date=str( get_datetime(target_date) + datetime.timedelta(1))) MLp_agg.append(MLp) MLa_agg.append(MLa) TWNa_agg.append(TWNa) ECa_agg.append(ECa) MLw_agg.append(MLw) TWNw_agg.append(TWNw) ECw_agg.append(ECw) except Exception as e: loggr.exception( "Something went wrong for this date. See next line for details. Skipping date..."
from __future__ import print_function from createDataSet import initDataSet from Predict import predict import six.moves.cPickle as pickle import theano import theano.tensor as T from SaveCnn import CnnModel from SaveCnn import HiddenLayer from SaveCnn import LeNetConvPoolLayer from SaveCnn import LogisticRegression from SaveCnn import load_data import copy import BinAndCrop if __name__ == '__main__': start = BinAndCrop.BinAndCropClass() start.bin() print('222') initDataSet() result = predict('file-126.pkl.gz') print(result)
b, X, Y, num_iterations=101, learning_rate=0.005, print_cost=True) print("w = " + str(params["w"])) print("b = " + str(params["b"])) print("dw = " + str(grads["dw"])) print("db = " + str(grads["db"])) print("cost = " + str(costs)) # pridict from Predict import predict print("prediction = " + str(predict(w, b, X))) print() # model from Model import model d = model(X_train=train_set_x, Y_train=train_set_y, X_test=test_set_x, Y_test=test_set_y, num_iteration=2000, learn_rate=0.005, print_cost=True) print() # Example of a picture that was wrongly classified.
def parse(self, response): token_vocab, label_vocab = load_predict_info() article = NewsItem() i = 0 imgsrc = 0 blogsrc = 0 alldata = [] all_links = response.css('td .newsHeadline a') conn = psycopg2.connect(host="localhost", database="StockMarket", user="******", password="******") cur = conn.cursor() r = requests.get( 'https://www.malaysiastock.biz/Blog/Blog-Headlines.aspx') # --- crawl the latest news for sel in all_links: try: html_as_string = r.text soup = BeautifulSoup(html_as_string, 'html.parser') article['headline'] = soup.findAll( 'span', {'class': 'newsHeadline'})[i].text article['link'] = response.css('td .newsHeadline a').xpath( '@href').extract()[i] # --- filter the news headlines that are not in english language if re.findall('[\u4e00-\u9fff]+', article['headline']): #print("The news headline is not in english language") if "Blog" in article['link']: blogsrc = blogsrc + 1 else: imgsrc = imgsrc + 1 # --- filter the news headlines to prevent duplication problem else: postgreSQL_select_Query = """ select * from public."latest_news" """ cur.execute(postgreSQL_select_Query) news = cur.fetchall() Newsexist = False for row in news: if article['headline'] == row[1] and article[ 'link'] == row[2]: #print("The news headline had existed in the database.") Newsexist = True if "Blog" in article['link']: blogsrc = blogsrc + 1 else: imgsrc = imgsrc + 1 # --- if the news headline does not exist in the db, then predict and store if Newsexist == False: singledata = [] if "Blog" in article['link']: print(article['link']) article['source'] = response.css( 'td .newsMedia a').xpath( 'text()').extract()[blogsrc] b = requests.get(article['link']) html_as_string = b.text p = BeautifulSoup(html_as_string, 'html.parser') data = p.find('label', { 'id': 'MainContent2_lbAuthorProfile' }).text unusedata, date = data.split('e:') date = date.strip() date = str(date) for item in date.splitlines(): d = parser.parse(item) article['time'] = d.strftime("%Y-%m-%d %H:%M") polarity, probability = predict( token_vocab, label_vocab, article['headline']) probability = str(probability) singledata.append(article['headline']) singledata.append(article['link']) singledata.append(polarity) singledata.append(probability) singledata.append(article['source']) singledata.append(article['time']) alldata.append(singledata) print('\n') print(i, article['headline']) print('link:', article['link']) print('published time:', article['time']) print('polarity:', polarity) print('source:', article['source']) print('\n') blogsrc = blogsrc + 1 else: print(article['link']) article['source'] = response.css( 'td span a img').xpath( '@src').extract()[imgsrc] if 'thestar' in article['source']: s = requests.get(article['link']) html_as_string = s.text s = BeautifulSoup(html_as_string, 'html.parser') date = s.find('p', {'class': 'date'}) if date == None: article['time'] = None else: date = s.find('p', {'class': 'date'}).text time = s.find('time', { 'class': 'timestamp' }).text date = date.replace(' ', '') date = date.strip() date = str(date) time = time.replace(' ', '') time = time.strip() time = str(time) datetime = date + " " + time if time is '': othertime = response.css( '.line td').xpath( 'text()').extract()[i] datetime = date + " " + othertime print(datetime) if 'MYT' in datetime: datetime = datetime.replace('MYT', '') for item in datetime.splitlines(): d = parser.parse(item) article['time'] = d.strftime( "%Y-%m-%d %H:%M") else: s = requests.get(article['link']) html_as_string = s.text s = BeautifulSoup(html_as_string, 'html.parser') date = s.find('span', {'class': 'post-created'}) if date is None: article['time'] = None else: date = s.find('span', { 'class': 'post-created' }).text date = str(date.lower()) if date is '': article['time'] = None else: if 'am' in date: date = date.replace('am', '') if 'pm' in date: date = date.replace('pm', '') for item in date.splitlines(): d = parser.parse(item) article['time'] = d.strftime( "%Y-%m-%d %H:%M") polarity, probability = predict( token_vocab, label_vocab, article['headline']) probability = str(probability) singledata.append(article['headline']) singledata.append(article['link']) singledata.append(polarity) singledata.append(probability) singledata.append(article['source']) singledata.append(article['time']) alldata.append(singledata) print('\n') print(i, article['headline']) print('link:', article['link']) print('published time:', article['time']) print('polarity:', polarity) print('source:', article['source']) print('\n') imgsrc = imgsrc + 1 i = i + 1 except: print("The news headline had some error or does not exist.") for news_num in range(len(alldata), 0, -1): news_num = news_num - 1 if alldata[news_num][5] is None: print("The news is not exit or got errors.") else: sql = """INSERT INTO public."latest_news"("news_title", "news_link", "news_polarity", "news_accuracy", "news_source", "news_time") VALUES ( %s, %s, %s, %s, %s, %s);""" data = (alldata[news_num][0], alldata[news_num][1], alldata[news_num][2], alldata[news_num][3], alldata[news_num][4], alldata[news_num][5]) cur.execute(sql, data) conn.commit() cur.close()
def home(): print('home', file=sys.stdout) error_message = False if request.method == 'POST': print('home post', file=sys.stdout) try: # check if the post request has the file part if len(request.files) == 0: flash('No file part') return redirect(request.url) # get first file file = request.files.values().__next__() # if user does not select file, browser also # submit an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file: print('file uploaded', file=sys.stdout) filename = secure_filename(file.filename) test_file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) print(test_file_path, file=sys.stdout) file.save(test_file_path) # check for number of lines with open(test_file_path, "r") as f: lines = len(f.readlines()) max_len = 50000 if lines > max_len: raise IndexError('file is too long') # read form tcr_encoding_model = request.form['model_type'] dataset = request.form['dataset'] use_alpha = 'use_alpha' in request.form use_vj = 'use_vj' in request.form use_mhc = 'use_mhc' in request.form use_t_type = 'use_t_type' in request.form # version flags version = '' version += '1' if dataset == 'vdjdb': version += 'v' elif dataset == 'mcpas': version += 'm' if tcr_encoding_model == 'AE': version += 'e' elif tcr_encoding_model == 'LSTM': version += 'l' if use_alpha: version += 'a' if use_vj: version += 'j' if use_mhc: version += 'h' if use_t_type: version += 't' print('version: ' + version, file=sys.stdout) df = predict(version, test_file_path) df.to_csv(app.config['UPLOAD_FOLDER'] + '/results.csv', index=False) os.remove(test_file_path) return send_from_directory( directory=app.config['UPLOAD_FOLDER'], filename='results.csv') except IndexError: error_message = True print('long file error', file=sys.stderr) os.remove(test_file_path) return render_template("too_long_input_file.html", error_message=error_message) except: error_message = True os.remove(test_file_path) return render_template("home.html", error_message=error_message)
def model(X_train, Y_train, X_test, Y_test, num_iteration=2000, learn_rate=0.5, print_cost=False): """ Builds the logistic regression model by calling the function you've implemented previously Arguments: X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train) Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train) X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test) Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test) num_iterations -- hyperparameter representing the number of iterations to optimize the parameters learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize() print_cost -- Set to true to print the cost every 100 iterations Returns: d -- dictionary containing information about the model. """ # initialize parameters with zeros (≈ 1 line of code) from Initialize_with_zeros import initialize_with_zeros w, b = initialize_with_zeros(X_train.shape[0]) from Optimize import optimize # Gradient descent params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations=num_iteration, learning_rate=learn_rate, print_cost=print_cost) # Retrieve parameters w and b from dictionary "parameters" w = params["w"] b = params["b"] # Predict test/train set examples from Predict import predict Y_pridiction_train = predict(w=w, b=b, X=X_train) Y_pridiction_test = predict(w=w, b=b, X=X_test) # Print train/test Errors print("train accuracy: {} %".format( 100 - np.mean(np.abs(Y_pridiction_train - Y_train)) * 100)) print("train accuracy: {} %".format( 100 - np.mean(np.abs(Y_pridiction_test - Y_test)) * 100)) d = { "costs": costs, "w": w, "b": b, "Y_prediction_train": Y_pridiction_train, "Y_prediction_test": Y_pridiction_test, "learning_rate": learn_rate, "num_iterations": num_iteration } return d
time_span=hp["time_span"], rolling_average_window=hp["rolling_average_window"], rolling_average_min_periods=hp["rolling_average_min_periods"], TWN_EC_split=hp["TWN_EC_split"], date_efficient=hp["date_efficient"], region_efficient=hp["region_efficient"], drop_columns=hp["drop_columns"], include_only_columns=hp["include_only_columns"], label=hp["label"], real_time=hp["real_time"], ) loggr.info("Predictions table is now ready") except Exception as e: loggr.exception("Wrangle.py could not run. Here's why: \n {e}") try: loggr.info("Running predictions for tomorrow ({})".format( datetime.datetime.now().date() + datetime.timedelta(1))) predict( label=hp["label"], precision=hp["precision"], target_date=hp["start_date"], normalize_data=hp["normalize_data"], ) loggr.info("Predictions are now ready") except Exception as e: loggr.exception("Predict.py could not run. Here's why: \n {e}") loggr.info("All done for today!")
def validate(): # Load the feature and label csv file, join them according to complaint ID complaints_with_sentiment = "data/complaints_with_sentiment_metric.csv" narrative_preprocessed_file = "data/narrative_preprocessed.csv" complaints_features = merge_narrative_processed_and_sentiment_metrics( narrative_preprocessed_file, complaints_with_sentiment) # Extract the validation data #print(complaints_features.head()) validation_size = VALIDATION_SIZE complaints_features_for_validation = complaints_features[:validation_size] #print(complaints_features_for_validation.columns) company_response_columns = complaints_features_for_validation.loc[:, 'company_response_Closed': 'company_response_Closed with non-monetary relief'] narratives = complaints_features_for_validation[ "Consumer complaint narrative"] company_responses = [] predicted_product_types = [] predicted_disputes = [] suggested_responses = [] escalate_prob = [] wont_escalate_list = [] print("Loading models...") model_dir = "trained_models" clf_product_file = model_dir + "/" + "product_classifier_lgreg.sav" clf_escalation_file = model_dir + "/" + "lgreg.all.joblib" tf_idf_vectorizer_file = model_dir + "/" + "tfidf_vectorizer_max50000.all.joblib" scaler_file = model_dir + "/" + "scaler.joblib" clf_product, clf_escalation, tf_idf_vectorizer, scaler = load_models( clf_product_file, clf_escalation_file, tf_idf_vectorizer_file, scaler_file) print("Predicting...") i = 0 for narrative in narratives: product_type, escalation_prob_fig, \ suggest_response, escalation_probas_of_responses = predict(narrative, clf_product, clf_escalation, tf_idf_vectorizer, scaler) company_response_index = get_response_index( company_response_columns.loc[i, :]) i += 1 company_responses.append(get_response_types()[company_response_index]) predicted_product_types.append(product_type) predict_dispute = ( 1 if escalation_probas_of_responses[company_response_index] > 0.5 else 0) predicted_disputes.append(predict_dispute) suggested_responses.append(suggest_response) escalate_prob.append(escalation_probas_of_responses) wont_escalate_list.append( wont_escalate(escalation_probas_of_responses)) predict_result = pd.DataFrame() print(complaints_features_for_validation.columns) predict_result["Complaint ID"] = complaints_features_for_validation[ "Complaint ID"] predict_result[ "Consumer complaint narrative"] = complaints_features_for_validation[ "Consumer complaint narrative"] predict_result["Product"] = complaints_features_for_validation["Product"] predict_result["Company response to consumer"] = company_responses predict_result["dispute_result"] = [ 0 if x == 'No' else 1 for x in complaints_features_for_validation["Consumer disputed?"] ] # Assign the predicted result predict_result["predicted_product"] = predicted_product_types predict_result["predicted_dispute"] = predicted_disputes predict_result["suggested_response"] = suggested_responses predict_result["escalation_probs"] = escalate_prob predict_result["wont_escalate"] = wont_escalate_list print(predict_result.head()) predict_result.to_csv("data/predicted_result_of_validation.csv", index=False)
from Predict import predict, get_reviews_from_array a = input() result = predict([a]) print(result)