示例#1
0
def test_classify_text():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english")
    result = happy_tc.classify_text("What a great movie")
    assert result.label == 'POSITIVE'
    assert result.score > 0.9
示例#2
0
def example_2_0():
    happy_tc_distilbert = HappyTextClassification(
    )  # default with "distilbert-base-uncased"
    happy_tc_albert = HappyTextClassification(model_type="ALBERT",
                                              model_name="albert-base-v2")
    happy_tc_bert = HappyTextClassification("BERT", "bert-base-uncased")
    happy_tc_roberta = HappyTextClassification("ROBERTA",
                                               "deepset/roberta-base-squad2")
示例#3
0
def example_2_0():
    happy_tc_distilbert = HappyTextClassification("DISTILBERT",
                                                  "distilbert-base-uncased",
                                                  num_labels=2)  # default
    happy_tc_albert = HappyTextClassification(model_type="ALBERT",
                                              model_name="albert-base-v2")
    happy_tc_bert = HappyTextClassification("BERT", "bert-base-uncased")
    happy_tc_roberta = HappyTextClassification("ROBERTA", "roberta-base")
示例#4
0
def example_2_3():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english",
        num_labels=2)  # Don't forget to set num_labels!
    result = happy_tc.eval("../../data/tc/train-eval.csv")
    print(type(result))  # <class 'happytransformer.happy_trainer.EvalResult'>
    print(result)  # EvalResult(eval_loss=0.007262040860950947)
    print(result.loss)  # 0.007262040860950947
示例#5
0
def example_2_3():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english",
        num_labels=2)  # Don't forget to set num_labels!
    args = TCEvalArgs(
        save_preprocessed_data=False)  # for demonstration -- not needed
    result = happy_tc.eval("../../data/tc/train-eval.csv", args=args)
    print(type(result))  # <class 'happytransformer.happy_trainer.EvalResult'>
    print(result)  # EvalResult(eval_loss=0.007262040860950947)
    print(result.loss)  # 0.007262040860950947
示例#6
0
def example_2_1():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english")
    result = happy_tc.classify_text("Great movie! 5/5")
    print(
        type(result)
    )  # <class 'happytransformer.happy_text_classification.TextClassificationResult'>
    print(
        result
    )  # TextClassificationResult(label='LABEL_1', score=0.9998761415481567)
    print(result.label)  # LABEL_1
示例#7
0
def test_tc_test():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english")

    result = happy_tc.test("../data/tc/test.csv")
    answer = [
        TextClassificationResult(label='POSITIVE', score=0.9998401999473572),
        TextClassificationResult(label='NEGATIVE', score=0.9772131443023682),
        TextClassificationResult(label='NEGATIVE', score=0.9966067671775818),
        TextClassificationResult(label='POSITIVE', score=0.9792295098304749)
    ]
    assert result == answer
示例#8
0
def test_tc_save():
    happy = HappyTextClassification()
    happy.save("model/")
    result_before = happy.classify_text("What a great movie")

    happy = HappyTextClassification(load_path="model/")
    result_after = happy.classify_text("What a great movie")

    assert result_before.label == result_after.label
示例#9
0
def example_2_4():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english",
        num_labels=2)  # Don't forget to set num_labels!
    result = happy_tc.test("../../data/tc/test.csv")
    print(type(result))  # <class 'list'>
    print(
        result
    )  # [TextClassificationResult(label='LABEL_1', score=0.9998401999473572), TextClassificationResult(label='LABEL_0', score=0.9772131443023682)...
    print(
        type(result[0])
    )  # <class 'happytransformer.happy_text_classification.TextClassificationResult'>
    print(
        result[0]
    )  # TextClassificationResult(label='LABEL_1', score=0.9998401999473572)
    print(result[0].label)  # LABEL_1
示例#10
0
def test_tc_train_effectiveness():
    """assert that training decreases the loss"""
    happy_tc = HappyTextClassification(model_type="DISTILBERT",
                                       model_name="distilbert-base-uncased")
    before_loss = happy_tc.eval("../data/tc/train-eval.csv").loss
    happy_tc.train("../data/tc/train-eval.csv")
    after_loss = happy_tc.eval("../data/tc/train-eval.csv").loss
    assert after_loss < before_loss
示例#11
0
def test_tc_train_effectiveness_multi():

    happy_tc = HappyTextClassification(model_type="DISTILBERT",
                                       model_name="distilbert-base-uncased",
                                       num_labels=3)
    before_loss = happy_tc.eval("../data/tc/train-eval-multi.csv").loss
    happy_tc.train("../data/tc/train-eval-multi.csv")
    after_loss = happy_tc.eval("../data/tc/train-eval-multi.csv").loss
    assert after_loss < before_loss
示例#12
0
def example_2_5():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english",
        num_labels=2)  # Don't forget to set num_labels!
    before_loss = happy_tc.eval("../../data/tc/train-eval.csv").loss
    happy_tc.train("../../data/tc/train-eval.csv")
    after_loss = happy_tc.eval("../../data/tc/train-eval.csv").loss
    print("Before loss: ", before_loss)  # 0.007262040860950947
    print("After loss: ", after_loss)  # 0.000162081079906784
示例#13
0
def test_tc_with_dataclass():

    happy_tc = HappyTextClassification()
    train_args = TCTrainArgs(learning_rate=0.01, num_train_epochs=1)

    happy_tc.train("../data/tc/train-eval.csv", args=train_args)

    eval_args = TCEvalArgs()

    result_eval = happy_tc.eval("../data/tc/train-eval.csv", args=eval_args)

    test_args = TCTestArgs()

    result_test = happy_tc.test("../data/tc/test.csv", args=test_args)
示例#14
0
def test_tc_with_dic():

    happy_tc = HappyTextClassification()
    train_args = {'learning_rate': 0.01, "num_train_epochs": 1}

    happy_tc.train("../data/tc/train-eval.csv", args=train_args)

    eval_args = {}

    result_eval = happy_tc.eval("../data/tc/train-eval.csv", args=eval_args)

    test_args = {}

    result_test = happy_tc.test("../data/tc/test.csv", args=test_args)
示例#15
0
def test_tc_eval():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english")
    results = happy_tc.eval("../data/tc/train-eval.csv")
    assert results.loss == approx(0.007262040860950947, 0.01)
示例#16
0
def test_tc_train():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english")
    results = happy_tc.train("../data/tc/train-eval.csv")
示例#17
0
def test_tc_save_load_eval():
    happy_wp = HappyTextClassification()
    output_path = "data/tc-train.json"
    data_path = "../data/tc/train-eval.csv"
    run_save_load(happy_wp, output_path, ARGS_TC_EVAL, data_path, "eval")
示例#18
0
from webapp.models import Report
from webapp import api
from webapp import tweepy
import pandas as pd
import preprocessor as p
from happytransformer import HappyTextClassification

happy_tc = HappyTextClassification(
    'BERT', 'Hate-speech-CNERG/dehatebert-mono-english', 2)
model = happy_tc


def scan(username):
    tweets_d = tweetpull(username)  # get the target's tweets
    total_tweets = len(tweets_d)  # gets length of total tweets
    tweets = []  # buffer for tweets
    counter = -1  # counter to track id
    for tweet in tweets_d['tweet_text']:  # iterates through tweets
        counter += 1
        tweets_d['prediction'] = model.classify_text(
            tweet)  # sets prediction value
        if 'LABEL_1' in str(tweets_d['prediction']):  # if hate
            id = tweets_d['tweet_id'][counter]  # gets tweet id
            tweets.append([id, tweet])
    total_scanned_tweets = len(tweets)  # amount of hate tweets
    account_summary = get_account_summary(
        username, total_scanned_tweets,
        total_tweets)  # makes call to account summary
    profile = get_twitter_info(username)
    return tweets, account_summary, profile  # returns results (bad tweet ids, the tweet text and the overall account
    # summary) and account summary and profile
示例#19
0
def example_2_2():
    happy_tc = HappyTextClassification(
        model_type="DISTILBERT",
        model_name="distilbert-base-uncased-finetuned-sst-2-english",
        num_labels=2)  # Don't forget to set num_labels!
    happy_tc.train("../../data/tc/train-eval.csv")