def analyze_post(): data = request.get_json().get('data') analyzer = TextAnalysis(text=data) return jsonify({ 'original_text': data, 'text_analysis': analyzer.analyze() })
def analyze(filename): try: analyzer = TextAnalysis( filepath=os.path.join(app.config['UPLOAD_FOLDER'], filename)) return jsonify(analyzer.analyze()) except FileNotFoundError as e: return jsonify('File not found'), 400
def submitText(event): vader = Vader() # sentiment analysis tools textblob = TextBlob() naivebayes = NaiveBayes() textobj = TextAnalysis() textInput = textEntry.get() # user-inputted string vaderObj = vader.analyzeString(textInput) # results from each of the tbObj = textblob.analyzeString(textInput) # tools in the form of nbObj = naivebayes.analyzeString(textInput) # SentimentObject objects arr = [textInput] textobj.normalize(arr) resultText = "Vader result: {}\n".format(vaderObj.classifier) resultText += "TextBlob result: {}\n".format(tbObj.classifier) resultText += "NaiveBayes result: {}\n".format(nbObj.classifier) resultText += "Final result: {}\n".format( textobj.normalizedList[0].classifier) resultText += "Confidence: {}%".format( round(textobj.normalizedList[0].confidence * 100, 2)) textResultsString.set(resultText)
def submitFile(): # file paths of provided file, and optionally keywords and stopwords fileName = uploadFileButtonText.get() keywordsName = uploadKeywordsButtonText.get() stopwordsName = uploadStopwordsButtonText.get() # prompt an alert if no file was selected, return if fileName == fileUploadDefaultText: messagebox.showinfo("Error", "No file selected") return # create lists of stopwords and keywords. lists are empty if not provided keywords = None stopwords = None if keywordsName != optionalUploadDefaultText: keywordFile = pd.read_excel(keywordsName) keywordFile.dropna() keywords = keywordFile[defaultColumnName].tolist() if stopwordsName != optionalUploadDefaultText: stopwordsFile = pd.read_excel(stopwordsName) stopwordsFile.dropna() stopwords = stopwordsFile[defaultColumnName].tolist() # analyze file and return results analyzer = TextAnalysis() analyzer.read(fileName) analyzer.extractKeywords(keywords, stopwords) resultText = "Total Positive: {}\n".format(analyzer.totalpos) resultText += "Total Negative: {}\n".format(analyzer.totalneg) resultText += "Total Neutral: {}\n".format(analyzer.totalneu) resultText += "Total Confidence: {}%".format( round(analyzer.avgConfidence * 100, 2)) uploadResultsString.set(resultText) wordCloud(fileName, stopwords) img = ImageTk.PhotoImage(Image.open("../out/img.png")) wordCloudLabel.configure(image=img) wordCloudLabel.image = img os.remove("../out/img.png") #barGraph("../out/out.xlsx") win.destroy() if keywordsName != optionalUploadDefaultText: winFinal = tk.Tk() winFinal.wm_title("Output: ") label = tk.Label(winFinal, text="Keyword file has been succesfully created!") label.pack()
def unique_clf(self): x_train, x_test, y_train, y_test = self.split_data() pipeline = Pipeline( memory=None, steps=[ ('union', FeatureUnion( transformer_list=[ ('text_transform', Pipeline([ ('vect', (TfidfVectorizer(use_idf=True, smooth_idf=True, ngram_range=(1, 3)))), ('feat_sel', SelectKBest(chi2, k=2000)), ])), ('text_analysis', TextAnalysis()) ], transformer_weights={ 'text_transform': 1, 'text_analysis': 0.3, }, )), # ('clf', (dummy.DummyClassifier(strategy="stratified", random_state=0))) ('clf', neural_network.MLPClassifier(max_iter=1000, alpha=1e-4, hidden_layer_sizes=(128, 128), tol=1e-4)) ]) clf = pipeline clf.fit(x_train, y_train) predicted_train = clf.predict(x_train) predicted_test = clf.predict(x_test) train_metrics_dic, test_metrics_dic = ScoreUtils.metrics_getter( y_train, y_test, predicted_train, predicted_test) print("Accuracy score on train: {}".format( train_metrics_dic['acc_score'])) print("Accuracy score on test: {}".format( test_metrics_dic['acc_score']))
def set_pipeline(parameters): pipeline_dic = {} for k, v in parameters.items(): pipeline_dic[k] = Pipeline( memory=None, steps=[('union', FeatureUnion( transformer_list=[ ('text_transform', Pipeline([ ('vect', v['union__text_transform__vect'][0]), ('feat_sel', v['union__text_transform__feat_sel'][0]), ])), ('text_analysis', TextAnalysis()) ], transformer_weights={ 'text_transform': 1, 'text_analysis': 0, }, )), ('clf', v['clf'][0])]) return pipeline_dic
from hmmlearn import hmm import pickle from text_analysis import TextAnalysis input_dir = "./datasets/livedoor/dokujo-tsushin/" input_data = input_dir + "dokujo-tsushin-4778030.txt" X = TextAnalysis.mecab_analysis(input_data) # verbose=Trueで各回のイテレーションを確認できる. # model = hmm.MultinomialHMM(n_components=10, n_iter=1000, verbose=True) model = hmm.MultinomialHMM(n_components=10, n_iter=1000) model.fit(X) L, Z = model.decode(X) # print(model.transmat_) # 遷移確率の出力 # print(model.monitor_) # historyの配列は最後から2つの対数尤度を出力している. sample = model.sample(n_samples=100) # 辞書の読み込み with open('./datasets/livedoor/livedoor_dict.pkl', 'rb') as f: livedoor_dict = pickle.load(f) # モデルからサンプルしてテキスト生成 sample_id = sample[0].flatten() sample_text = "" for id in sample_id: for key in livedoor_dict: if id == livedoor_dict[key]:
def test_check_time(self): result = 1 text = "J'ai 3ans " self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 1 an" self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 1 mois." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 2 an ." self.assertEqual(TextAnalysis.check_time(text), result) text = "il est 4 Heure." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai le 5 août ." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 4 janvier ." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 4 fevrier ." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 2 heures " self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai depuis lundi" self.assertEqual(TextAnalysis.check_time(text), result) text = "A 3 heure20" self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 4min ." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 4s." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'ai 235sec" self.assertEqual(TextAnalysis.check_time(text), result) text = "Lundi j ai" self.assertEqual(TextAnalysis.check_time(text), result) # False result = 0 text = "J'aime ansible" self.assertEqual(TextAnalysis.check_time(text), result) text = "J'aime milan" self.assertEqual(TextAnalysis.check_time(text), result) text = "J'aime le mans." self.assertEqual(TextAnalysis.check_time(text), result) text = "J'aime mon moi moisi " self.assertEqual(TextAnalysis.check_time(text), result)