def stance_fine_tune(): # importing the required module import matplotlib.pyplot as plt for lr in [1e-3, 5e-4, 2e-4, 1e-4]: hp = HPFineTunePair() topic = "hillary" e = Experiment(hp) hp.lr = lr hp.num_epochs = 100 preload_id = ("LM_pair_tweets_hillary_run2", 1247707) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.FineLoader(topic, hp.seq_max, setting.vocab_filename, hp.sent_max) valid_history = e.train_stance(setting.vocab_size, stance_data, preload_id) e.clear_run() l_acc, l_f1 = zip(*valid_history) plt.plot(l_acc, label="{} / ACC".format(lr)) plt.plot(l_f1, label="{} / F1".format(lr)) plt.legend(loc='lower right') # giving a title to my graph plt.title('learning rate - dev !') # function to show the plot plt.show()
def stance_after_feature(): hp = HPPairFeatureTweetFine() topic = "atheism" e = Experiment(hp) preload_id = ("LM_pair_featuer_tweets_atheism", 979) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) e.train_stance_pair_feature(setting.vocab_size, stance_data, None)
def stance_warm(): hp = HPFineTunePair() topic = "hillary" e = Experiment(hp) preload_id = ("after_stance", 400) hp.seq_max = 50 setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) e.train_stance(setting.vocab_size, stance_data, preload_id)
def stance_after_lm(): hp = HPFineTunePair() topic = "hillary" e = Experiment(hp) preload_id = ("DLM_pair_tweets_hillary", 852967) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.FineLoader(topic, hp.seq_max, setting.vocab_filename, hp.sent_max) e.train_stance(setting.vocab_size, stance_data, preload_id)
def feature_svm(): hp = HPFineTunePair() topic = "atheism" e = Experiment(hp) preload_id = ("LM_reserve/DLM_pair_tweets_atheism", 217246) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.FineLoader(topic, hp.seq_max, setting.vocab_filename, hp.sent_max) e.feature_svm(setting.vocab_size, stance_data, preload_id)
def stance_cold_start(): hp = HPColdStart() e = Experiment(hp) topic = "hillary" setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e.train_stance(voca_size, stance_data)
def train_aux(): hp = HPFineTunePair() hp2 = HPTiny() e = Experiment(hp) topic = "hillary" preload_id = ("DLM_pair_tweets_hillary", 131200) setting = shared_setting.TopicTweets2Stance(topic) sentiment = stance_detection.SentimentLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e.train_aux1(hp2, voca_size, sentiment, preload_id)
def train_aux_stance(): hp = HPColdStart() hp2 = HPTiny() topic = "hillary" preload_id = ("after_aux", 234) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e = Experiment(hp) e.train_aux_stance(hp2, voca_size, stance_data, preload_id)
def pair_lm(): hp = HPPairTweet() topic = "atheism" setting = shared_setting.TopicTweets2Stance(topic) tweet_group = tweet_reader.load_per_user(topic) data = loader.PairDataLoader(hp.sent_max, setting, tweet_group) e_config = ExperimentConfig() e_config.name = "LM_pair_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e = Experiment(hp) e.train_pair_lm(e_config, data)
def stance_with_consistency(): hp = HPStanceConsistency() topic = "atheism" e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "stance_consistency_{}".format(topic) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) tweet_group = tweet_reader.load_per_user(topic) aux_data = AuxPairLoader(hp.seq_max, setting, tweet_group) voca_size = setting.vocab_size e.train_stance_consistency(voca_size, stance_data, aux_data)
def stance_cold(self): hp = hyperparams.HPColdStart() topic = "atheism" setting = shared_setting.TopicTweets2Stance(topic) model_dir = get_model_dir("stance_cold_{}".format(topic)) task = Classification(3) model = Transformer(hp, setting.vocab_size, task) param = { 'feature_columns': self.get_feature_column(), 'n_classes': 3, } estimator = tf.estimator.Estimator( model_fn=model.model_fn, model_dir=model_dir, params=param, config=None) data_source = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) def train_input_fn(features, labels, batch_size): f_dict = pd.DataFrame(data=features) dataset = tf.data.Dataset.from_tensor_slices((f_dict, labels)) # Shuffle, repeat, and batch the examples. return dataset.shuffle(1000).repeat().batch(batch_size) def dev_input_fn(batch_size): features, labels = data_source.get_dev_data() f_dict = pd.DataFrame(data=features) dataset = tf.data.Dataset.from_tensor_slices((f_dict, labels)) # Shuffle, repeat, and batch the examples. return dataset.shuffle(1000).batch(batch_size) X, Y = data_source.get_train_data() num_epoch = 10 batch_size = 32 step_per_epoch = (len(Y)-1) / batch_size + 1 tf.logging.info("Logging Test") tf.logging.info("num epoch %d", num_epoch) estimator.train(lambda:train_input_fn(X, Y, batch_size), max_steps=num_epoch * step_per_epoch) print(estimator.evaluate(lambda:dev_input_fn(batch_size)))
def pair_feature(): hp = HPPairFeatureTweet() topic = "atheism" setting = shared_setting.TopicTweets2Stance(topic) use_cache = True run_id = "{}_{}".format(topic, hp.sent_max) if use_cache: print("using PairDataCache") data = loader.PairDataLoader.load_from_pickle(run_id) else: tweet_group = tweet_reader.load_per_user(topic) data = loader.PairDataLoader(hp.sent_max, setting, tweet_group) data.index_data() data.save_to_pickle(run_id) e_config = ExperimentConfig() e_config.name = "LM_pair_featuer_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 3 * 60 # 3 minutes e = Experiment(hp) e.train_pair_feature(e_config, data)
def document_lm(): hp = HPDocLM() topic = "hillary" setting = shared_setting.TopicTweets2Stance(topic) use_cache = False run_id = "{}_{}".format(topic, hp.seq_max) if use_cache: data = author_as_doc.AuthorAsDoc.load_from_pickle(run_id) else: tweet_group = tweet_reader.load_per_user(topic) data = author_as_doc.AuthorAsDoc(hp.seq_max, setting, tweet_group) data.index_data() data.save_to_pickle(run_id) e_config = ExperimentConfig() e_config.name = "DLM_pair_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e = Experiment(hp) e.train_doc_lm(e_config, data)
def baselines(): hp = Hyperparams() e = Experiment(hp) topic = "hillary" setting = shared_setting.TopicTweets2Stance(topic) e.stance_baseline(topic, setting.vocab_filename)