def test_classify_text(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english") result = happy_tc.classify_text("What a great movie") assert result.label == 'POSITIVE' assert result.score > 0.9
def example_2_0(): happy_tc_distilbert = HappyTextClassification( ) # default with "distilbert-base-uncased" happy_tc_albert = HappyTextClassification(model_type="ALBERT", model_name="albert-base-v2") happy_tc_bert = HappyTextClassification("BERT", "bert-base-uncased") happy_tc_roberta = HappyTextClassification("ROBERTA", "deepset/roberta-base-squad2")
def example_2_0(): happy_tc_distilbert = HappyTextClassification("DISTILBERT", "distilbert-base-uncased", num_labels=2) # default happy_tc_albert = HappyTextClassification(model_type="ALBERT", model_name="albert-base-v2") happy_tc_bert = HappyTextClassification("BERT", "bert-base-uncased") happy_tc_roberta = HappyTextClassification("ROBERTA", "roberta-base")
def example_2_3(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english", num_labels=2) # Don't forget to set num_labels! result = happy_tc.eval("../../data/tc/train-eval.csv") print(type(result)) # <class 'happytransformer.happy_trainer.EvalResult'> print(result) # EvalResult(eval_loss=0.007262040860950947) print(result.loss) # 0.007262040860950947
def example_2_3(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english", num_labels=2) # Don't forget to set num_labels! args = TCEvalArgs( save_preprocessed_data=False) # for demonstration -- not needed result = happy_tc.eval("../../data/tc/train-eval.csv", args=args) print(type(result)) # <class 'happytransformer.happy_trainer.EvalResult'> print(result) # EvalResult(eval_loss=0.007262040860950947) print(result.loss) # 0.007262040860950947
def example_2_1(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english") result = happy_tc.classify_text("Great movie! 5/5") print( type(result) ) # <class 'happytransformer.happy_text_classification.TextClassificationResult'> print( result ) # TextClassificationResult(label='LABEL_1', score=0.9998761415481567) print(result.label) # LABEL_1
def test_tc_test(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english") result = happy_tc.test("../data/tc/test.csv") answer = [ TextClassificationResult(label='POSITIVE', score=0.9998401999473572), TextClassificationResult(label='NEGATIVE', score=0.9772131443023682), TextClassificationResult(label='NEGATIVE', score=0.9966067671775818), TextClassificationResult(label='POSITIVE', score=0.9792295098304749) ] assert result == answer
def test_tc_save(): happy = HappyTextClassification() happy.save("model/") result_before = happy.classify_text("What a great movie") happy = HappyTextClassification(load_path="model/") result_after = happy.classify_text("What a great movie") assert result_before.label == result_after.label
def example_2_4(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english", num_labels=2) # Don't forget to set num_labels! result = happy_tc.test("../../data/tc/test.csv") print(type(result)) # <class 'list'> print( result ) # [TextClassificationResult(label='LABEL_1', score=0.9998401999473572), TextClassificationResult(label='LABEL_0', score=0.9772131443023682)... print( type(result[0]) ) # <class 'happytransformer.happy_text_classification.TextClassificationResult'> print( result[0] ) # TextClassificationResult(label='LABEL_1', score=0.9998401999473572) print(result[0].label) # LABEL_1
def test_tc_train_effectiveness(): """assert that training decreases the loss""" happy_tc = HappyTextClassification(model_type="DISTILBERT", model_name="distilbert-base-uncased") before_loss = happy_tc.eval("../data/tc/train-eval.csv").loss happy_tc.train("../data/tc/train-eval.csv") after_loss = happy_tc.eval("../data/tc/train-eval.csv").loss assert after_loss < before_loss
def test_tc_train_effectiveness_multi(): happy_tc = HappyTextClassification(model_type="DISTILBERT", model_name="distilbert-base-uncased", num_labels=3) before_loss = happy_tc.eval("../data/tc/train-eval-multi.csv").loss happy_tc.train("../data/tc/train-eval-multi.csv") after_loss = happy_tc.eval("../data/tc/train-eval-multi.csv").loss assert after_loss < before_loss
def example_2_5(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english", num_labels=2) # Don't forget to set num_labels! before_loss = happy_tc.eval("../../data/tc/train-eval.csv").loss happy_tc.train("../../data/tc/train-eval.csv") after_loss = happy_tc.eval("../../data/tc/train-eval.csv").loss print("Before loss: ", before_loss) # 0.007262040860950947 print("After loss: ", after_loss) # 0.000162081079906784
def test_tc_with_dataclass(): happy_tc = HappyTextClassification() train_args = TCTrainArgs(learning_rate=0.01, num_train_epochs=1) happy_tc.train("../data/tc/train-eval.csv", args=train_args) eval_args = TCEvalArgs() result_eval = happy_tc.eval("../data/tc/train-eval.csv", args=eval_args) test_args = TCTestArgs() result_test = happy_tc.test("../data/tc/test.csv", args=test_args)
def test_tc_with_dic(): happy_tc = HappyTextClassification() train_args = {'learning_rate': 0.01, "num_train_epochs": 1} happy_tc.train("../data/tc/train-eval.csv", args=train_args) eval_args = {} result_eval = happy_tc.eval("../data/tc/train-eval.csv", args=eval_args) test_args = {} result_test = happy_tc.test("../data/tc/test.csv", args=test_args)
def test_tc_eval(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english") results = happy_tc.eval("../data/tc/train-eval.csv") assert results.loss == approx(0.007262040860950947, 0.01)
def test_tc_train(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english") results = happy_tc.train("../data/tc/train-eval.csv")
def test_tc_save_load_eval(): happy_wp = HappyTextClassification() output_path = "data/tc-train.json" data_path = "../data/tc/train-eval.csv" run_save_load(happy_wp, output_path, ARGS_TC_EVAL, data_path, "eval")
from webapp.models import Report from webapp import api from webapp import tweepy import pandas as pd import preprocessor as p from happytransformer import HappyTextClassification happy_tc = HappyTextClassification( 'BERT', 'Hate-speech-CNERG/dehatebert-mono-english', 2) model = happy_tc def scan(username): tweets_d = tweetpull(username) # get the target's tweets total_tweets = len(tweets_d) # gets length of total tweets tweets = [] # buffer for tweets counter = -1 # counter to track id for tweet in tweets_d['tweet_text']: # iterates through tweets counter += 1 tweets_d['prediction'] = model.classify_text( tweet) # sets prediction value if 'LABEL_1' in str(tweets_d['prediction']): # if hate id = tweets_d['tweet_id'][counter] # gets tweet id tweets.append([id, tweet]) total_scanned_tweets = len(tweets) # amount of hate tweets account_summary = get_account_summary( username, total_scanned_tweets, total_tweets) # makes call to account summary profile = get_twitter_info(username) return tweets, account_summary, profile # returns results (bad tweet ids, the tweet text and the overall account # summary) and account summary and profile
def example_2_2(): happy_tc = HappyTextClassification( model_type="DISTILBERT", model_name="distilbert-base-uncased-finetuned-sst-2-english", num_labels=2) # Don't forget to set num_labels! happy_tc.train("../../data/tc/train-eval.csv")