Пример #1
0
    def test(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(
            NLPData.AIVIVN2019_SA_SAMPLE)
        # corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.AIVIVN2019_SA)
        params = {
            "vectorizer": CountVectorizer(ngram_range=(1, 2),
                                          max_features=4000),
            "svc": SVC(kernel='linear', C=0.3)
        }
        classifier = TextClassifier(estimator=TEXT_CLASSIFIER_ESTIMATOR.SVC,
                                    **params)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def negative_f1_score(y_true, y_pred):
            score_class_0, score_class_1 = f1_score(y_true,
                                                    y_pred,
                                                    average=None)
            return score_class_1

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder,
                                    scoring=negative_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)
        sentence = Sentence('tuyệt vời')
        classifier.predict(sentence)
        shutil.rmtree(tmp_model_folder)
        print(sentence)
def sentiment(text):
    global classifier

    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.sa_bank.value}\"."
            )
            sys.exit(1)
    sentence = Sentence(text)
    classifier.predict(sentence)
    labels = sentence.labels
    return [label.value for label in labels]
Пример #3
0
def classify(X):
    global classifier

    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.tc_general.value}\".")
            sys.exit(1)

    sentence = Sentence(X)
    classifier.predict(sentence)
    labels = sentence.labels
    return labels
Пример #4
0
def sentiment(text):
    global classifier
    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.sa_general.value}\"."
            )
            sys.exit(1)
    sentence = Sentence(text)
    classifier.predict(sentence)
    label = sentence.labels[0]
    if label == "1":
        label = "negative"
    if label == "0":
        label = "positive"
    return label
Пример #5
0
    def test_fasttext(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(
            NLPData.AIVIVN2019_SA_SAMPLE)
        params = {"lr": 0.01, "epoch": 20, "wordNgrams": 3, "dim": 20}
        classifier = TextClassifier(
            estimator=TEXT_CLASSIFIER_ESTIMATOR.FAST_TEXT, **params)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder, scoring=macro_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)
        sentence = Sentence('tuyệt vời')
        classifier.predict(sentence)
        shutil.rmtree(tmp_model_folder)
        print(sentence)
Пример #6
0
    def test(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.UTS2017_BANK_SA_SAMPLE)
        # corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.AIVIVN2019_SA)
        pipeline = Pipeline(
            steps=[('features', CountVectorizer(ngram_range=(1, 2), max_features=4000)),
                   ('estimator', OneVsRestClassifier(SVC(kernel='linear', C=0.3)))]
        )
        classifier = TextClassifier(estimator=TEXT_CLASSIFIER_ESTIMATOR.PIPELINE, pipeline=pipeline, multilabel=True)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder, scoring=macro_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)

        sentence = Sentence('Dịch vụ tiện dụng quá')
        classifier.predict(sentence)
        print(sentence)

        shutil.rmtree(tmp_model_folder)
Пример #7
0
from languageflow.data import Sentence
from languageflow.data_fetcher import NLPData, DataFetcher
from languageflow.models.text_classifier import TextClassifier

model_folder = "tmp/sentiment_svm_uts2017_bank_sa"
print(f"Load model from {model_folder}")
classifier = TextClassifier.load(model_folder)
print(f"Model is loaded.")


def predict(text):
    print(f"\nText: {text}")

    sentence = Sentence(text)
    classifier.predict(sentence)
    labels = sentence.labels
    print(f"Labels: {labels}")


corpus = DataFetcher.load_corpus(NLPData.UTS2017_BANK_SA)

predict(
    'Bạn nên làm thẻ credit, đừng làm debit. Mình dùng thẻ debit của vcb, tết vừa rồi bị hack mất 28 triệu trong tài khoản mà đến giờ vcb đã giải quyết cho mình đâu. Bực mình!'
)