示例#1
0
def stance_fine_tune():
    # importing the required module
    import matplotlib.pyplot as plt

    for lr in [1e-3, 5e-4, 2e-4, 1e-4]:
        hp = HPFineTunePair()
        topic = "hillary"
        e = Experiment(hp)
        hp.lr = lr
        hp.num_epochs = 100
        preload_id = ("LM_pair_tweets_hillary_run2", 1247707)
        setting = shared_setting.TopicTweets2Stance(topic)
        stance_data = stance_detection.FineLoader(topic, hp.seq_max,
                                                  setting.vocab_filename,
                                                  hp.sent_max)
        valid_history = e.train_stance(setting.vocab_size, stance_data,
                                       preload_id)
        e.clear_run()

        l_acc, l_f1 = zip(*valid_history)
        plt.plot(l_acc, label="{} / ACC".format(lr))
        plt.plot(l_f1, label="{} / F1".format(lr))

    plt.legend(loc='lower right')

    # giving a title to my graph
    plt.title('learning rate - dev !')
    # function to show the plot
    plt.show()
示例#2
0
def stance_after_feature():
    hp = HPPairFeatureTweetFine()
    topic = "atheism"
    e = Experiment(hp)
    preload_id = ("LM_pair_featuer_tweets_atheism", 979)
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.DataLoader(topic, hp.seq_max,
                                              setting.vocab_filename)
    e.train_stance_pair_feature(setting.vocab_size, stance_data, None)
示例#3
0
def stance_warm():
    hp = HPFineTunePair()
    topic = "hillary"
    e = Experiment(hp)
    preload_id = ("after_stance", 400)
    hp.seq_max = 50
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.DataLoader(topic, hp.seq_max,
                                              setting.vocab_filename)
    e.train_stance(setting.vocab_size, stance_data, preload_id)
示例#4
0
def stance_after_lm():
    hp = HPFineTunePair()
    topic = "hillary"
    e = Experiment(hp)
    preload_id = ("DLM_pair_tweets_hillary", 852967)
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.FineLoader(topic, hp.seq_max,
                                              setting.vocab_filename,
                                              hp.sent_max)
    e.train_stance(setting.vocab_size, stance_data, preload_id)
示例#5
0
def feature_svm():
    hp = HPFineTunePair()
    topic = "atheism"
    e = Experiment(hp)
    preload_id = ("LM_reserve/DLM_pair_tweets_atheism", 217246)
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.FineLoader(topic, hp.seq_max,
                                              setting.vocab_filename,
                                              hp.sent_max)
    e.feature_svm(setting.vocab_size, stance_data, preload_id)
示例#6
0
def stance_cold_start():
    hp = HPColdStart()
    e = Experiment(hp)
    topic = "hillary"
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.DataLoader(topic, hp.seq_max,
                                              setting.vocab_filename)

    voca_size = setting.vocab_size
    e.train_stance(voca_size, stance_data)
示例#7
0
def train_aux():
    hp = HPFineTunePair()
    hp2 = HPTiny()
    e = Experiment(hp)
    topic = "hillary"
    preload_id = ("DLM_pair_tweets_hillary", 131200)
    setting = shared_setting.TopicTweets2Stance(topic)
    sentiment = stance_detection.SentimentLoader(topic, hp.seq_max,
                                                 setting.vocab_filename)
    voca_size = setting.vocab_size
    e.train_aux1(hp2, voca_size, sentiment, preload_id)
示例#8
0
def train_aux_stance():
    hp = HPColdStart()
    hp2 = HPTiny()
    topic = "hillary"
    preload_id = ("after_aux", 234)
    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.DataLoader(topic, hp.seq_max,
                                              setting.vocab_filename)
    voca_size = setting.vocab_size

    e = Experiment(hp)
    e.train_aux_stance(hp2, voca_size, stance_data, preload_id)
示例#9
0
def pair_lm():
    hp = HPPairTweet()
    topic = "atheism"
    setting = shared_setting.TopicTweets2Stance(topic)
    tweet_group = tweet_reader.load_per_user(topic)
    data = loader.PairDataLoader(hp.sent_max, setting, tweet_group)
    e_config = ExperimentConfig()
    e_config.name = "LM_pair_tweets_{}".format(topic)
    e_config.num_epoch = 1
    e_config.save_interval = 30 * 60  # 30 minutes
    e = Experiment(hp)
    e.train_pair_lm(e_config, data)
示例#10
0
def stance_with_consistency():
    hp = HPStanceConsistency()
    topic = "atheism"
    e = Experiment(hp)
    e_config = ExperimentConfig()
    e_config.name = "stance_consistency_{}".format(topic)

    setting = shared_setting.TopicTweets2Stance(topic)
    stance_data = stance_detection.DataLoader(topic, hp.seq_max,
                                              setting.vocab_filename)
    tweet_group = tweet_reader.load_per_user(topic)
    aux_data = AuxPairLoader(hp.seq_max, setting, tweet_group)
    voca_size = setting.vocab_size
    e.train_stance_consistency(voca_size, stance_data, aux_data)
示例#11
0
    def stance_cold(self):
        hp = hyperparams.HPColdStart()
        topic = "atheism"
        setting = shared_setting.TopicTweets2Stance(topic)
        model_dir = get_model_dir("stance_cold_{}".format(topic))

        task = Classification(3)
        model = Transformer(hp, setting.vocab_size, task)
        param = {
            'feature_columns': self.get_feature_column(),
            'n_classes': 3,
        }
        estimator = tf.estimator.Estimator(
            model_fn=model.model_fn,
            model_dir=model_dir,
            params=param,
            config=None)

        data_source = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename)

        def train_input_fn(features, labels, batch_size):
            f_dict = pd.DataFrame(data=features)
            dataset = tf.data.Dataset.from_tensor_slices((f_dict, labels))
            # Shuffle, repeat, and batch the examples.
            return dataset.shuffle(1000).repeat().batch(batch_size)

        def dev_input_fn(batch_size):
            features, labels = data_source.get_dev_data()
            f_dict = pd.DataFrame(data=features)
            dataset = tf.data.Dataset.from_tensor_slices((f_dict, labels))
            # Shuffle, repeat, and batch the examples.
            return dataset.shuffle(1000).batch(batch_size)

        X, Y = data_source.get_train_data()
        num_epoch = 10
        batch_size = 32
        step_per_epoch = (len(Y)-1) / batch_size + 1
        tf.logging.info("Logging Test")
        tf.logging.info("num epoch %d", num_epoch)
        estimator.train(lambda:train_input_fn(X, Y, batch_size),
                        max_steps=num_epoch * step_per_epoch)

        print(estimator.evaluate(lambda:dev_input_fn(batch_size)))
示例#12
0
def pair_feature():
    hp = HPPairFeatureTweet()
    topic = "atheism"
    setting = shared_setting.TopicTweets2Stance(topic)
    use_cache = True
    run_id = "{}_{}".format(topic, hp.sent_max)
    if use_cache:
        print("using PairDataCache")
        data = loader.PairDataLoader.load_from_pickle(run_id)
    else:
        tweet_group = tweet_reader.load_per_user(topic)
        data = loader.PairDataLoader(hp.sent_max, setting, tweet_group)
        data.index_data()
        data.save_to_pickle(run_id)

    e_config = ExperimentConfig()
    e_config.name = "LM_pair_featuer_tweets_{}".format(topic)
    e_config.num_epoch = 1
    e_config.save_interval = 3 * 60  # 3 minutes
    e = Experiment(hp)
    e.train_pair_feature(e_config, data)
示例#13
0
def document_lm():
    hp = HPDocLM()
    topic = "hillary"
    setting = shared_setting.TopicTweets2Stance(topic)
    use_cache = False

    run_id = "{}_{}".format(topic, hp.seq_max)
    if use_cache:
        data = author_as_doc.AuthorAsDoc.load_from_pickle(run_id)
    else:
        tweet_group = tweet_reader.load_per_user(topic)
        data = author_as_doc.AuthorAsDoc(hp.seq_max, setting, tweet_group)
        data.index_data()
        data.save_to_pickle(run_id)

    e_config = ExperimentConfig()
    e_config.name = "DLM_pair_tweets_{}".format(topic)
    e_config.num_epoch = 1
    e_config.save_interval = 30 * 60  # 30 minutes
    e = Experiment(hp)
    e.train_doc_lm(e_config, data)
示例#14
0
def baselines():
    hp = Hyperparams()
    e = Experiment(hp)
    topic = "hillary"
    setting = shared_setting.TopicTweets2Stance(topic)
    e.stance_baseline(topic, setting.vocab_filename)