Пример #1
0
def analyze_post():
    data = request.get_json().get('data')
    analyzer = TextAnalysis(text=data)
    return jsonify({
        'original_text': data,
        'text_analysis': analyzer.analyze()
    })
Пример #2
0
def analyze(filename):
    try:
        analyzer = TextAnalysis(
            filepath=os.path.join(app.config['UPLOAD_FOLDER'], filename))
        return jsonify(analyzer.analyze())
    except FileNotFoundError as e:
        return jsonify('File not found'), 400
Пример #3
0
def submitText(event):
    vader = Vader()  # sentiment analysis tools
    textblob = TextBlob()
    naivebayes = NaiveBayes()
    textobj = TextAnalysis()
    textInput = textEntry.get()  # user-inputted string

    vaderObj = vader.analyzeString(textInput)  # results from each of the
    tbObj = textblob.analyzeString(textInput)  # tools in the form of
    nbObj = naivebayes.analyzeString(textInput)  # SentimentObject objects

    arr = [textInput]
    textobj.normalize(arr)

    resultText = "Vader result: {}\n".format(vaderObj.classifier)
    resultText += "TextBlob result: {}\n".format(tbObj.classifier)
    resultText += "NaiveBayes result: {}\n".format(nbObj.classifier)
    resultText += "Final result: {}\n".format(
        textobj.normalizedList[0].classifier)
    resultText += "Confidence: {}%".format(
        round(textobj.normalizedList[0].confidence * 100, 2))
    textResultsString.set(resultText)
Пример #4
0
def submitFile():
    # file paths of provided file, and optionally keywords and stopwords
    fileName = uploadFileButtonText.get()
    keywordsName = uploadKeywordsButtonText.get()
    stopwordsName = uploadStopwordsButtonText.get()

    # prompt an alert if no file was selected, return
    if fileName == fileUploadDefaultText:
        messagebox.showinfo("Error", "No file selected")
        return

    # create lists of stopwords and keywords. lists are empty if not provided
    keywords = None
    stopwords = None
    if keywordsName != optionalUploadDefaultText:
        keywordFile = pd.read_excel(keywordsName)
        keywordFile.dropna()
        keywords = keywordFile[defaultColumnName].tolist()
    if stopwordsName != optionalUploadDefaultText:
        stopwordsFile = pd.read_excel(stopwordsName)
        stopwordsFile.dropna()
        stopwords = stopwordsFile[defaultColumnName].tolist()

    # analyze file and return results
    analyzer = TextAnalysis()
    analyzer.read(fileName)
    analyzer.extractKeywords(keywords, stopwords)

    resultText = "Total Positive: {}\n".format(analyzer.totalpos)
    resultText += "Total Negative: {}\n".format(analyzer.totalneg)
    resultText += "Total Neutral: {}\n".format(analyzer.totalneu)
    resultText += "Total Confidence: {}%".format(
        round(analyzer.avgConfidence * 100, 2))
    uploadResultsString.set(resultText)
    wordCloud(fileName, stopwords)
    img = ImageTk.PhotoImage(Image.open("../out/img.png"))
    wordCloudLabel.configure(image=img)
    wordCloudLabel.image = img
    os.remove("../out/img.png")

    #barGraph("../out/out.xlsx")

    win.destroy()

    if keywordsName != optionalUploadDefaultText:
        winFinal = tk.Tk()
        winFinal.wm_title("Output: ")

        label = tk.Label(winFinal,
                         text="Keyword file has been succesfully created!")
        label.pack()
Пример #5
0
    def unique_clf(self):
        x_train, x_test, y_train, y_test = self.split_data()
        pipeline = Pipeline(
            memory=None,
            steps=[
                ('union',
                 FeatureUnion(
                     transformer_list=[
                         ('text_transform',
                          Pipeline([
                              ('vect', (TfidfVectorizer(use_idf=True,
                                                        smooth_idf=True,
                                                        ngram_range=(1, 3)))),
                              ('feat_sel', SelectKBest(chi2, k=2000)),
                          ])), ('text_analysis', TextAnalysis())
                     ],
                     transformer_weights={
                         'text_transform': 1,
                         'text_analysis': 0.3,
                     },
                 )),
                # ('clf', (dummy.DummyClassifier(strategy="stratified", random_state=0)))
                ('clf',
                 neural_network.MLPClassifier(max_iter=1000,
                                              alpha=1e-4,
                                              hidden_layer_sizes=(128, 128),
                                              tol=1e-4))
            ])

        clf = pipeline
        clf.fit(x_train, y_train)
        predicted_train = clf.predict(x_train)
        predicted_test = clf.predict(x_test)

        train_metrics_dic, test_metrics_dic = ScoreUtils.metrics_getter(
            y_train, y_test, predicted_train, predicted_test)
        print("Accuracy score on train: {}".format(
            train_metrics_dic['acc_score']))
        print("Accuracy score on test: {}".format(
            test_metrics_dic['acc_score']))
Пример #6
0
 def set_pipeline(parameters):
     pipeline_dic = {}
     for k, v in parameters.items():
         pipeline_dic[k] = Pipeline(
             memory=None,
             steps=[('union',
                     FeatureUnion(
                         transformer_list=[
                             ('text_transform',
                              Pipeline([
                                  ('vect',
                                   v['union__text_transform__vect'][0]),
                                  ('feat_sel',
                                   v['union__text_transform__feat_sel'][0]),
                              ])), ('text_analysis', TextAnalysis())
                         ],
                         transformer_weights={
                             'text_transform': 1,
                             'text_analysis': 0,
                         },
                     )), ('clf', v['clf'][0])])
     return pipeline_dic
Пример #7
0
from hmmlearn import hmm
import pickle

from text_analysis import TextAnalysis

input_dir = "./datasets/livedoor/dokujo-tsushin/"
input_data = input_dir + "dokujo-tsushin-4778030.txt"

X = TextAnalysis.mecab_analysis(input_data)

# verbose=Trueで各回のイテレーションを確認できる.
# model = hmm.MultinomialHMM(n_components=10, n_iter=1000, verbose=True)
model = hmm.MultinomialHMM(n_components=10, n_iter=1000)

model.fit(X)

L, Z = model.decode(X)
# print(model.transmat_) # 遷移確率の出力
# print(model.monitor_) # historyの配列は最後から2つの対数尤度を出力している.
sample = model.sample(n_samples=100)

# 辞書の読み込み
with open('./datasets/livedoor/livedoor_dict.pkl', 'rb') as f:
    livedoor_dict = pickle.load(f)

# モデルからサンプルしてテキスト生成
sample_id = sample[0].flatten()
sample_text = ""
for id in sample_id:
    for key in livedoor_dict:
        if id == livedoor_dict[key]:
Пример #8
0
    def test_check_time(self):
        result = 1
        text = "J'ai 3ans "
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 1 an"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 1 mois."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 2 an ."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "il est 4 Heure."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai  le 5 août ."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 4 janvier ."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 4 fevrier ."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 2 heures "
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai depuis lundi"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "A 3 heure20"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 4min ."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 4s."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'ai 235sec"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "Lundi j ai"
        self.assertEqual(TextAnalysis.check_time(text), result)

        # False
        result = 0
        text = "J'aime ansible"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'aime milan"
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'aime le mans."
        self.assertEqual(TextAnalysis.check_time(text), result)

        text = "J'aime mon moi moisi "
        self.assertEqual(TextAnalysis.check_time(text), result)