def run(which="all"): backup() if which == "all": bases = _bases else: bases = [which] for base in bases: compute_diff("mpl_%s_raw.xml" % base, "mpl_%s.xml" % base, "mpl_%s_diff.xml" % base) run_parse(base) apply_diff("mpl_%s_raw.xml" % base, "mpl_%s_diff.xml" % base, "mpl_%s.xml" % base) run_generate(base)
def generate_text_with_our_methods_sentiment(sentiment_tokenizer, sentiment_classifier): for condition in ["negative", "positive"]: for prefix1 in [ "The chiken", "The house", "The pizza", "The potato", "The lake" ]: prefix2 = "The following is an article about " + condition + ". " + prefix1 for prefix in [prefix1, prefix2]: print('Condition:', condition) print('Prefix:', prefix) perplexit_scores = [] diversity_scores = [] sentiment_scores = [] for k in range(20): # generate 100 samples per combination a_lst = run_generate(prefix=prefix, condition=condition, length=100, device=device) a_str = [''.join(a for a in a_lst)] #print(a_str) perplexit_scores.append( np.mean([ perplexity_score(text, per_model, tokenizer) for text in a_str ])) diversity_scores.append(diversity(a_str)) sentiment_scores.append([ sentiment_predict( text, sentiment_tokenizer, sentiment_classifier)['label'] == condition for text in a_str ][0]) print('Perplexity score %.3f' % np.mean(perplexit_scores)) print('Sentiment acc. %.3f' % (sum(sentiment_scores) / len(sentiment_scores))) print('Diversity score:') print('\t Dist-1: %.3f' % np.mean([a[0] for a in diversity_scores])) print('\t Dist-2: %.3f' % np.mean([a[1] for a in diversity_scores])) print('\t Dist-3: %.3f' % np.mean([a[2] for a in diversity_scores])) print('********\n')
def generate_text_with_our_methods_topic(): # loop over prefix and conditions and log the generated sentences and evaluation scores for condition in [ "Religion", "Military", "Politics", "Science", "Legal", "Space", "Computers", "Technology" ]: for prefix1 in [ "The chiken", "The house", "The pizza", "The potato", "The lake" ]: prefix2 = "The following is an article about " + condition + ". " + prefix1 # print(prefix2) # added the following for prefix in [prefix1, prefix2]: print('Condition:', condition) print('Prefix:', prefix) perplexit_scores = [] diversity_scores = [] for k in range(20): # generate 100 samples per combination a_lst = run_generate(prefix=prefix, condition=condition, length=100, device=device) a_str = [''.join(a for a in a_lst)] #print(a_str) perplexit_scores.append( np.mean([ perplexity_score(text, per_model, tokenizer) for text in a_str ])) diversity_scores.append(diversity(a_str)) #evaluate_all(all_text) print('Perplexity score %.3f' % np.mean(perplexit_scores)) print('Diversity score:') print('\t Dist-1: %.3f' % np.mean([a[0] for a in diversity_scores])) print('\t Dist-2: %.3f' % np.mean([a[1] for a in diversity_scores])) print('\t Dist-3: %.3f' % np.mean([a[2] for a in diversity_scores])) print('********\n')