Пример #1
0
    def test(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(
            NLPData.AIVIVN2019_SA_SAMPLE)
        # corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.AIVIVN2019_SA)
        params = {
            "vectorizer": CountVectorizer(ngram_range=(1, 2),
                                          max_features=4000),
            "svc": SVC(kernel='linear', C=0.3)
        }
        classifier = TextClassifier(estimator=TEXT_CLASSIFIER_ESTIMATOR.SVC,
                                    **params)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def negative_f1_score(y_true, y_pred):
            score_class_0, score_class_1 = f1_score(y_true,
                                                    y_pred,
                                                    average=None)
            return score_class_1

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder,
                                    scoring=negative_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)
        sentence = Sentence('tuyệt vời')
        classifier.predict(sentence)
        shutil.rmtree(tmp_model_folder)
        print(sentence)
Пример #2
0
def predict(text):
    print(f"\nText: {text}")

    sentence = Sentence(text)
    classifier.predict(sentence)
    labels = sentence.labels
    print(f"Labels: {labels}")
Пример #3
0
 def read_text_classification_file(path_to_file) -> List[Sentence]:
     sentences = []
     with open(path_to_file) as f:
         lines = f.read().splitlines()
         for line in lines:
             label_pattern = r"__label__(?P<label>[\w#]+)"
             labels = re.findall(label_pattern, line)
             labels = [Label(label) for label in labels]
             text = re.sub(label_pattern, "", line)
             s = Sentence(text, labels)
             sentences.append(s)
     return sentences
Пример #4
0
    def predict(self, sentence: Sentence):
        if self.estimator == TEXT_CLASSIFIER_ESTIMATOR.FAST_TEXT:
            values, scores = self.ft.predict(sentence.text)
            labels = []
            for value, score in zip(values, scores):
                value = value.replace("__label__", "")
                label = Label(value, score)
                labels.append(label)
            sentence.add_labels(labels)

        if self.estimator == TEXT_CLASSIFIER_ESTIMATOR.SVC:
            text = sentence.text
            X = self.x_transformer.transform([text])
            y = self.svc.predict(X)
            y = self.y_transformer.inverse_transform(y)
            sentence.add_labels(y)

        if self.estimator == TEXT_CLASSIFIER_ESTIMATOR.PIPELINE:
            text = sentence.text
            y = self.pipeline.predict([text])
            if self.multilabel:
                y = self.y_encoder.inverse_transform(y)
                y = list(y[0])
            else:
                y = list(y)
            sentence.add_labels(y)
def sentiment(text):
    global classifier

    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.sa_bank.value}\"."
            )
            sys.exit(1)
    sentence = Sentence(text)
    classifier.predict(sentence)
    labels = sentence.labels
    return [label.value for label in labels]
Пример #6
0
def classify(X):
    global classifier

    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.tc_general.value}\".")
            sys.exit(1)

    sentence = Sentence(X)
    classifier.predict(sentence)
    labels = sentence.labels
    return labels
Пример #7
0
def sentiment(text):
    global classifier
    if not classifier:
        if os.path.exists(model_path):
            classifier = TextClassifier.load(model_path)
        else:
            logger.error(
                f"Could not load model at {model_path}.\n"
                f"Download model with \"underthesea download {UTSModel.sa_general.value}\"."
            )
            sys.exit(1)
    sentence = Sentence(text)
    classifier.predict(sentence)
    label = sentence.labels[0]
    if label == "1":
        label = "negative"
    if label == "0":
        label = "positive"
    return label
Пример #8
0
    def test_fasttext(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(
            NLPData.AIVIVN2019_SA_SAMPLE)
        params = {"lr": 0.01, "epoch": 20, "wordNgrams": 3, "dim": 20}
        classifier = TextClassifier(
            estimator=TEXT_CLASSIFIER_ESTIMATOR.FAST_TEXT, **params)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder, scoring=macro_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)
        sentence = Sentence('tuyệt vời')
        classifier.predict(sentence)
        shutil.rmtree(tmp_model_folder)
        print(sentence)
Пример #9
0
    def test(self):
        corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.UTS2017_BANK_SA_SAMPLE)
        # corpus: CategorizedCorpus = DataFetcher.load_corpus(NLPData.AIVIVN2019_SA)
        pipeline = Pipeline(
            steps=[('features', CountVectorizer(ngram_range=(1, 2), max_features=4000)),
                   ('estimator', OneVsRestClassifier(SVC(kernel='linear', C=0.3)))]
        )
        classifier = TextClassifier(estimator=TEXT_CLASSIFIER_ESTIMATOR.PIPELINE, pipeline=pipeline, multilabel=True)
        model_trainer = ModelTrainer(classifier, corpus)
        tmp_model_folder = mkdtemp()

        def macro_f1_score(y_true, y_pred):
            return f1_score(y_true, y_pred, average='macro')

        score = model_trainer.train(tmp_model_folder, scoring=macro_f1_score)
        print(score)

        classifier = TextClassifier.load(tmp_model_folder)

        sentence = Sentence('Dịch vụ tiện dụng quá')
        classifier.predict(sentence)
        print(sentence)

        shutil.rmtree(tmp_model_folder)