示例#1
0
    def __init__(self, w2vPath, numHidden):
        self.numHidden = numHidden
        self.w2v = load_w2v(w2vPath, FLAGS.embedding_word_size)
        self.words = tf.Variable(self.w2v, name="words")

        with tf.variable_scope('CNN_Layer') as scope:
            self.char_filter = tf.get_variable(
                "char_filter",
                shape=[
                    FLAGS.char_window_size, FLAGS.embedding_char_size, 1,
                    FLAGS.embedding_char_size
                ],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        with tf.variable_scope('Clfier_output') as scope:
            self.clfier_softmax_W = tf.get_variable(
                "clfier_W",
                shape=[numHidden * 2, FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            self.clfier_softmax_b = tf.get_variable(
                "clfier_softmax_b",
                shape=[FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        self.inp_w = tf.placeholder(tf.int32,
                                    shape=[None, FLAGS.max_sentence_len],
                                    name="input_words")
def run_test(training_filename):

    ##Test for Pairwise_Tokenize
    # data = pd.read_csv(training_filename)
    # w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
    # w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'

    # w2v = utils.load_w2v(w2v_file,w2v_url)
    # #Comparison
    # with open('tokenize_check.txt','wb') as output_file:
    #     for i in xrange(10):
    #         original = data['Headline'].iloc[i]
    #         p_words = pairwise_tokenize(original,True,w2v)
    #         pair_sent = "|".join(words)
    #         output_file.write(original + "\n")
    #         output_file.write("111: " + pair_sent +"\n")
    #         output_file.write("\n")

    #Run Augmented headlines
    w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
    w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'
    print "loading w2v"
    w2v = utils.load_w2v(w2v_file, w2v_url)

    data = pd.read_csv(training_filename)
    print "generating headlines"
    augmented_temp = augment_headlines(data, w2v, "augmented_headlines.txt")
    new_data = pd.read_csv("augmented_headlines.txt", sep="|")
示例#3
0
    def __init__(self, c2vPath, numHidden):
        self.numHidden = numHidden
        self.c2v = load_w2v(c2vPath, FLAGS.embedding_char_size)
        self.chars = tf.Variable(self.c2v, name="chars")
        self.common_id_embedding = tf.Variable(tf.random_uniform(
            [len(ENTITY_TYPES), FLAGS.embedding_char_size], -1.0, 1.0),
                                               name="common_id_embedding")
        self.chars_emb = tf.concat([self.chars, self.common_id_embedding],
                                   0,
                                   name='concat')

        with tf.variable_scope('Clfier_output') as scope:
            self.clfier_softmax_W = tf.get_variable(
                "clfier_W",
                shape=[numHidden * 2, FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            self.clfier_softmax_b = tf.get_variable(
                "clfier_softmax_b",
                shape=[FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        self.inp_c = tf.placeholder(tf.int32,
                                    shape=[None, FLAGS.max_sentence_len],
                                    name="input_words")
示例#4
0
    def __init__(self,
                 embedding_dim=100,
                 batch_size=64,
                 n_hidden=100,
                 learning_rate=0.01,
                 n_class=3,
                 max_sentence_len=50,
                 l2_reg=0.,
                 display_step=4,
                 n_iter=100,
                 type_=''):
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.display_step = display_step
        self.n_iter = n_iter
        self.type_ = type_
        self.word_id_mapping, self.w2v = load_w2v(FLAGS.embedding_file_path,
                                                  self.embedding_dim)
        self.word_embedding = tf.constant(self.w2v, name='word_embedding')
        # self.word_embedding = tf.Variable(self.w2v, name='word_embedding')
        # self.word_id_mapping = load_word_id_mapping(FLAGS.word_id_file_path)
        # self.word_embedding = tf.Variable(
        #     tf.random_uniform([len(self.word_id_mapping), self.embedding_dim], -0.1, 0.1), name='word_embedding')

        self.dropout_keep_prob = tf.placeholder(tf.float32)
        with tf.name_scope('inputs'):
            self.x = tf.placeholder(tf.int32, [None, self.max_sentence_len])
            self.y = tf.placeholder(tf.int32, [None, self.n_class])
            self.sen_len = tf.placeholder(tf.int32, None)

            self.x_bw = tf.placeholder(tf.int32, [None, self.max_sentence_len])
            self.y_bw = tf.placeholder(tf.int32, [None, self.n_class])
            self.sen_len_bw = tf.placeholder(tf.int32, [None])

            self.target_words = tf.placeholder(tf.int32, [None, 1])

        with tf.name_scope('weights'):
            self.weights = {
                'softmax_bi_lstm':
                tf.get_variable(
                    name='bi_lstm_w',
                    shape=[2 * self.n_hidden, self.n_class],
                    initializer=tf.random_uniform_initializer(-0.003, 0.003),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg))
            }

        with tf.name_scope('biases'):
            self.biases = {
                'softmax_bi_lstm':
                tf.get_variable(
                    name='bi_lstm_b',
                    shape=[self.n_class],
                    initializer=tf.random_uniform_initializer(-0.003, 0.003),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg))
            }
示例#5
0
    def __init__(self, c2vPath, numHidden):
        self.numHidden = numHidden
        self.c2v = load_w2v(c2vPath, FLAGS.embedding_char_size)
        self.chars = tf.Variable(self.c2v, name="chars")

        self.common_id_embedding = tf.Variable(
            tf.random_uniform([len(ENTITY_TYPES), FLAGS.embedding_char_size],
                              -1.0, 1.0),
            name="common_id_embedding")
        self.chars_emb = tf.concat(
            [self.chars, self.common_id_embedding], 0, name='concat')

        self.filter_sizes = list(map(int, FLAGS.filter_sizes.split(',')))

        self.clfier_filters = [None] * len(self.filter_sizes)
        self.clfier_bs = [None] * len(self.filter_sizes)
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.variable_scope('Clfier_conv_maxpool') as scope:
                self.clfier_filters[i] = tf.get_variable(
                    "clfier_filter_%d" % i,
                    shape=[
                        filter_size, FLAGS.embedding_char_size, 1,
                        FLAGS.num_filters
                    ],
                    regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                    initializer=tf.truncated_normal_initializer(stddev=0.01),
                    dtype=tf.float32)

                self.clfier_bs[i] = tf.get_variable(
                    "clfier_b_%d" % i,
                    shape=[FLAGS.num_filters],
                    regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                    initializer=tf.truncated_normal_initializer(stddev=0.01),
                    dtype=tf.float32)

        with tf.variable_scope('Clfier_output') as scope:
            self.clfier_softmax_W = tf.get_variable(
                "clfier_W",
                shape=[
                    FLAGS.num_filters * len(self.filter_sizes),
                    FLAGS.num_classes
                ],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            self.clfier_softmax_b = tf.get_variable(
                "clfier_softmax_b",
                shape=[FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        self.inp_c = tf.placeholder(
            tf.int32, shape=[None, FLAGS.max_sentence_len], name="input_words")
示例#6
0
def create_dataset(headlines_train, bodies_train, stance_train, headlines_test,
                   bodies_test, stance_test):
    """Given w2v model, train and test headlines, bodies and stance, return Dataset
	either by reading in already made csv's or calling data_splitting functions"""
    w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
    w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'

    w2v = utils.load_w2v(w2v_file, w2v_url)
    if w2v == False:
        print 'failed to load the file'
        return

    try:
        tmp = pd.read_csv('training-all.csv')
    except IOError:
        print("File not found, try running cleanup.py")
        """
	for (idx, data) in enumerate(even_split(tmp)):
		
		try:
			data = pd.read_csv(path +'_split.csv')
		except IOError:
			print("File not found, try running cleanup.py")
		
		bodies = np.array(data['articleBody'])
		headlines = np.array(data['Headline'])
		stance = np.array(data['Stance'])

		#if(idx==0):
		X_train, y_train = prep_stanford(headlines, bodies, stance, w2v, concat)
		print(X_train.shape)
		print(y_train.shape)
		#else:
		X_test, y_test = prep_stanford(headlines, bodies, stance, w2v, concat)
		print(X_test.shape)
		print(y_test.shape)
		"""
    X_train, y_train = prep_data2(headlines_train,
                                  bodies_train,
                                  stance_train,
                                  w2v,
                                  concat,
                                  flag=1)
    X_test, y_test = prep_data2(headlines_test,
                                bodies_test,
                                stance_test,
                                w2v,
                                concat,
                                flag=0)

    return X_train, y_train, X_test, y_test
示例#7
0
    def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01,
                 n_class=3, max_sentence_len=50, l2_reg=0., display_step=4, n_iter=100, type_=''):
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.display_step = display_step
        self.n_iter = n_iter
        self.type_ = type_
        self.word_id_mapping, self.w2v = load_w2v(FLAGS.embedding_file_path, self.embedding_dim)
        self.word_embedding = tf.constant(self.w2v, name='word_embedding')
        # self.word_embedding = tf.Variable(self.w2v, name='word_embedding')
        # self.word_id_mapping = load_word_id_mapping(FLAGS.word_id_file_path)
        # self.word_embedding = tf.Variable(
        #     tf.random_uniform([len(self.word_id_mapping), self.embedding_dim], -0.1, 0.1), name='word_embedding')

        self.dropout_keep_prob = tf.placeholder(tf.float32)
        with tf.name_scope('inputs'):
            self.x = tf.placeholder(tf.int32, [None, self.max_sentence_len])
            self.y = tf.placeholder(tf.int32, [None, self.n_class])
            self.sen_len = tf.placeholder(tf.int32, None)

            self.x_bw = tf.placeholder(tf.int32, [None, self.max_sentence_len])
            self.y_bw = tf.placeholder(tf.int32, [None, self.n_class])
            self.sen_len_bw = tf.placeholder(tf.int32, [None])

            self.target_words = tf.placeholder(tf.int32, [None, 1])

        with tf.name_scope('weights'):
            self.weights = {
                'softmax_bi_lstm': tf.get_variable(
                    name='bi_lstm_w',
                    shape=[2 * self.n_hidden, self.n_class],
                    initializer=tf.random_uniform_initializer(-0.003, 0.003),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)
                )
            }

        with tf.name_scope('biases'):
            self.biases = {
                'softmax_bi_lstm': tf.get_variable(
                    name='bi_lstm_b',
                    shape=[self.n_class],
                    initializer=tf.random_uniform_initializer(-0.003, 0.003),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)
                )
            }
示例#8
0
def w2v_augment():
    w2v_file = 'googlenews-vectors-negative300.bin.gz'
    w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'

    w2v = utils.load_w2v(w2v_file,w2v_url)
    if w2v==False:
        print 'failed to load the file'
        return

    sentence = 'this is a test sentence'
    split_sentence=sentence.split()
    print split_sentence

    new_sentence = replace_w2v(split_sentence,w2v)
    print new_sentence
示例#9
0
    def __init__(self, distinctTagNum, w2vPath, numHidden):
        self.distinctTagNum = distinctTagNum
        self.numHidden = numHidden
        self.w2v = load_w2v(w2vPath, FLAGS.ner_embedding_word_size)
        self.words = tf.Variable(self.w2v, name="words")

        with tf.variable_scope('Ner_output') as scope:
            self.W = tf.get_variable(
                shape=[numHidden * 2, distinctTagNum],
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                name="weights",
                regularizer=tf.contrib.layers.l2_regularizer(0.001))
            self.b = tf.Variable(tf.zeros([distinctTagNum], name="bias"))
        self.inp_w = tf.placeholder(tf.int32,
                                    shape=[None, FLAGS.ner_max_sentence_len],
                                    name="input_words")
示例#10
0
def main():

    try:
        data = pd.read_csv("../data/training-all.csv")
    except IOError:
        print("File not found, try running cleanup.py")

    bodies = np.array(data['articleBody'])
    headlines = np.array(data['Headline'])
    stance = np.array(data['Stance'])

    w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
    w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'

    w2v = utils.load_w2v(w2v_file, w2v_url)
    if w2v == False:
        print 'failed to load the file'
        return

    run_test(headlines, bodies, stance, w2v)
def preprocess_all(bodies, stances, save_dir='./', augment_syn=True, window=True, split_per=0.6, val_per=0.2, diff=0.03, max_tries=10):
    w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
    w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'
    w2v = utils.load_w2v(w2v_file,w2v_url)
    df_all = import_data(bodies, stances, w2v, window=window)
    #df_tr, df_te, df_val = even_split(df_all, split_per, val_per, max_tries, diff)
    df_tr, df_te = even_split(df_all, split_per, val_per, max_tries, diff)
    if window:
        print 'test'
        df_te = explode_data(df_te)
        #print 'val'
        #df_val = explode_data(df_val)
        print 'train'
        df_tr = explode_data(df_tr)
    print 'len train', len(df_tr)
    print 'len test', len(df_te)
    #print 'len val', len(df_val)
    augment_synonym.augment_headlines(df_tr, w2v, os.path.join(save_dir,'train.csv'), nrows=None, augment=augment_syn)
    print 'done train'
    #augment_synonym.augment_headlines(df_val, w2v, os.path.join(save_dir,'val.csv'), nrows=None, augment=False)
    augment_synonym.augment_headlines(df_te, w2v, os.path.join(save_dir,'test.csv'), nrows=None, augment=False)
def create_domain_weigths(config_pair):
    """

    :param model_path:
    :param tkn_path:
    :param opt:
    :return:
    """
    vec_path, tkn_path, opt = config_pair
    print('Working on pre-trained embedding: '+str(vec_path))
    # load tokenizer
    with open(tkn_path, 'rb') as tkn_file:
        tkn = pickle.load(tkn_file)

    # loop through each domain
    for domain in tkn:
        print('Working on domain: '+str(domain))
        # get the vector generator
        vec_generator = utils.load_w2v(vec_path)
        tmp_w, tmp_v = next(vec_generator)
        print('Embedding size: ' + str(len(tmp_v)))
        
        embed_len = len(tkn[domain].word_index)
        if embed_len > tkn[domain].num_words:
            embed_len = tkn[domain].num_words

        embedding_matrix = np.zeros((embed_len + 1, len(tmp_v)))

        # add the word if the word in the tokenizer
#        if tmp_w in tkn[domain].word_index:
#            if tkn[domain].word_index[tmp_w] < tkn[domain].num_words:
#                embedding_matrix[tkn[domain].word_index[tmp_w]] = tmp_v
        # loop through each word vectors
        for word, vectors in vec_generator:
            if word in tkn[domain].word_index:
                if tkn[domain].word_index[word] < tkn[domain].num_words:
                    embedding_matrix[tkn[domain].word_index[word]] = vectors

        # save the matrix to the dir
        np.save(opt+'weights#'+str(domain)+'.npy', embedding_matrix)
示例#13
0
    def __init__(self, params,sess):
        print 'init LSTM'

        self.params = params
        
        #Model Params
        self.batch_size = params['batch_size']
        self.learning_rate = params['learning_rate']
        self.l2 = params['l2']
        self.lstm_units = 100
        self.hidden_units = 512

        #DataPrams
        self.body_truncate_len = 150
        self.headline_truncate_len = 40
        self.input_truncate_len = self.body_truncate_len + self.headline_truncate_len
        self.data_dim = params['data_dim']

        #W2V
        w2v_file = 'GoogleNews-vectors-negative300.bin.gz'
        w2v_url = 'https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM'        
        self.w2v = utils.load_w2v(w2v_file,w2v_url)

        #Build the model
        self.output = self.add_model()
        self.loss = self.calculate_loss(self.output)
        self.train_step = self.add_training()
        self.eval_correct = self.evaluate(self.output)
        self.iterations = 0 #counts the iterations we've run

        #Summary writers
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter('visual_logs_lstm' + '/train',
                                              sess.graph)
        self.test_writer = tf.summary.FileWriter('visual_logs_lstm'+ '/test')

        #Saver
        self.saver = tf.train.Saver()
示例#14
0
    def __init__(self, distinctTagNum, c2vPath, numHidden):
        self.distinctTagNum = distinctTagNum
        self.numHidden = numHidden
        self.c2v = load_w2v(c2vPath, FLAGS.embedding_char_size)
        self.chars = tf.Variable(self.c2v, name="chars")

        self.common_id_embedding_pad = tf.constant(
            0.0, shape=[1, numHidden * 2], name="common_id_embedding_pad")

        self.common_id_embedding = tf.Variable(tf.random_uniform(
            [len(ENTITY_TYPES), numHidden * 2], -1.0, 1.0),
                                               name="common_id_embedding")

        self.common_embedding = tf.concat(
            [self.common_id_embedding_pad, self.common_id_embedding],
            0,
            name='common_embedding')

        with tf.variable_scope('Ner_output') as scope:
            self.W = tf.get_variable(
                shape=[numHidden * 2, distinctTagNum],
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                name="weights",
                regularizer=tf.contrib.layers.l2_regularizer(0.001))
            self.b = tf.Variable(tf.zeros([distinctTagNum], name="bias"))

        with tf.variable_scope('Attention') as scope:
            self.attend_W = tf.get_variable(
                "attend_W",
                shape=[1, 1, self.numHidden * 2, self.numHidden * 2],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            self.attend_V = tf.get_variable(
                "attend_V",
                shape=[self.numHidden * 2],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        with tf.variable_scope('Clfier_output') as scope:
            self.clfier_softmax_W = tf.get_variable(
                "clfier_W",
                shape=[numHidden * 2, FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            self.clfier_softmax_b = tf.get_variable(
                "clfier_softmax_b",
                shape=[FLAGS.num_classes],
                regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                initializer=tf.truncated_normal_initializer(stddev=0.01),
                dtype=tf.float32)

        self.inp_c = tf.placeholder(tf.int32,
                                    shape=[None, FLAGS.max_sentence_len],
                                    name="input_words")

        self.entity_info = tf.placeholder(tf.int32,
                                          shape=[None, MAX_COMMON_LEN],
                                          name="entity_info")
示例#15
0
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate=FLAGS.learning_rate, keep_prob = FLAGS.keep_prob1):
    print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')
        # word_embedding = tf.Variable(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32, name="keep_prob1")
        keep_prob2 = tf.placeholder(tf.float32, name="keep_prob2")

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len], name="x")
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class], name="y")
            sen_len = tf.placeholder(tf.int32, None, name="sen_len")

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len], name="x_bw")
            sen_len_bw = tf.placeholder(tf.int32, [None], name="sen_len_bw")

            target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len], name="target_words")
            tar_len = tf.placeholder(tf.int32, [None], name="tar_len")

            sent_short = tf.placeholder(tf.int32, [None, None], name="sent_short")
            sent = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len], name="sent")
            mult_mask = tf.placeholder(tf.float32, [None, FLAGS.max_sentence_len], name="mult_mask")

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)       # batch x N x d
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)    # batch x N x d
        sent_full = tf.nn.embedding_lookup(word_embedding, sent)    # batch x N x d
        
        #compute average sentence representation and target
        sentence_mask = tf.cast(tf.sequence_mask(sen_len_bw + sen_len - tar_len, FLAGS.max_sentence_len), tf.int32)
        sentence = tf.nn.embedding_lookup(word_embedding, tf.multiply(sent_short,sentence_mask))
        ave_sent = tf.divide(tf.reduce_sum(sentence, 1),tf.reshape(tf.tile(tf.cast(sen_len_bw + sen_len - tar_len, tf.float32),[FLAGS.embedding_dim]),[-1,FLAGS.embedding_dim]))
        
        target_mask = tf.cast(tf.sequence_mask(tar_len, FLAGS.max_target_len), tf.int32)
        target = tf.nn.embedding_lookup(word_embedding, tf.multiply(target_words,target_mask))
        target = tf.divide(tf.reduce_sum(target, 1),tf.reshape(tf.tile(tf.cast(tar_len, tf.float32), [FLAGS.embedding_dim]),[-1,FLAGS.embedding_dim]))


        # target = reduce_mean_with_len(target, tar_len)
        # for MLP & DOT
        # target = tf.expand_dims(target, 1)
        # batch_size = tf.shape(inputs_bw)[0]
        # target = tf.zeros([batch_size, FLAGS.max_sentence_len, FLAGS.embedding_dim]) + target
        # for BL
        # target = tf.squeeze(target)
        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = cabasc(inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, ave_sent, mult_mask, sent_full, keep_prob1, keep_prob2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            keep_prob,
            FLAGS.keep_prob2,
            FLAGS.batch_size,
            learning_rate,
            FLAGS.l2_reg,
            FLAGS.max_sentence_len,
            FLAGS.embedding_dim,
            FLAGS.n_hidden,
            FLAGS.n_class
        )

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, tr_sen_short, tr_sent, tr_mult_mask = load_inputs_cabasc(
            train_path,
            word_id_mapping,
            FLAGS.max_sentence_len,
            'TC',
            is_r,
            FLAGS.max_target_len
        )
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, te_sen_short, te_sent, te_mult_mask = load_inputs_cabasc(
            test_path,
            word_id_mapping,
            FLAGS.max_sentence_len,
            'TC',
            is_r,
            FLAGS.max_target_len
        )

        def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, senshort, sen, multMask, is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                    
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                    sent_short: sen[index],
                    sent: sen[index],
                    mult_mask: multMask[index]
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None
        for i in range(FLAGS.n_iter):
            trainacc, traincnt = 0., 0
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len,
                                           FLAGS.batch_size, keep_prob, keep_prob, tr_sen_short, tr_sent, tr_mult_mask):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, step, summary, _trainacc = sess.run([optimizer, global_step, train_summary_op, acc_num], feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc            # saver.save(sess, save_dir, global_step=step)
                traincnt += numtrain
            print('finished train')
            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000, 1.0, 1.0, te_sen_short, te_sent, te_mult_mask, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob], feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw = sess.run([loss, acc_num, true_y, pred_y, prob, alpha_fw], feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                fw = np.asarray(_fw)
                p += list(_p)
                acc += _acc
                cost += _loss * num
                cnt += num
            print('all samples={}, correct prediction={}'.format(cnt, acc))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size
            cost = cost / cnt
            print('Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, test acc={:.6f}, combined acc={:.6f}'.format(i, cost,trainacc, acc, totalacc))
            summary = sess.run(test_summary_op, feed_dict={test_loss: cost, test_acc: acc})
            test_summary_writer.add_summary(summary, step)
            if acc > max_acc:
                max_acc = acc
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p
        P = precision_score(max_ty, max_py, average=None)
        R = recall_score(max_ty, max_py, average=None)
        F1 = f1_score(max_ty, max_py, average=None)
        print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        fp = open(FLAGS.prob_file, 'w')
        for item in max_prob:
            fp.write(' '.join([str(it) for it in item]) + '\n')
        fp = open(FLAGS.prob_file + '_fw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_fw):
            fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_bw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_bw):
            fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tl', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tl):
            fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tr', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tr):
            fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print('Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format(
            FLAGS.learning_rate,
            FLAGS.n_iter,
            FLAGS.batch_size,
            FLAGS.n_hidden,
            FLAGS.l2_reg
        ))
        return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw
示例#16
0
def main(train_path,
         eval_path,
         test_path,
         complete_path,
         accuracyOnt,
         test_size,
         remaining_size,
         l2=0.0001):  #learning_rate=0.02, keep_prob=0.7,
    #momentum=0.95, l2=0.0001):
    #print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)
        learning_rate = tf.placeholder(tf.float32)
        momentum = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        #save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        #save_pth = 'savedModel' + str(FLAGS.year)
        #if save_pth is not None:
        #   save_path = save_pth + '/'
        #  saver = saver_func(save_path)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')
        # restore_pth = 'savedModel' + str(FLAGS.year)
        # meta = '-2350'
        #if restore_pth is not None and meta is not None:
        #   restore_path = restore_pth + '/'
        #  restore_meta_path = restore_pth + '/' + meta + '.meta'
        # restore = tf.train.import_meta_graph(restore_meta_path)
        #restore.restore(sess, tf.train.latest_checkpoint(restore_path))

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        ev_x, ev_sen_len, ev_x_bw, ev_sen_len_bw, ev_y, ev_target_word, ev_tar_len, _, _, _ = load_inputs_twitter(
            eval_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        co_x, co_sen_len, co_x_bw, co_sen_len_bw, co_y, co_target_word, co_tar_len, _, _, _ = load_inputs_twitter(
            complete_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        def train_get_batch_data(x_f,
                                 sen_len_f,
                                 x_b,
                                 sen_len_b,
                                 yi,
                                 target,
                                 tl,
                                 batch_size,
                                 kp1,
                                 kp2,
                                 learning,
                                 moment,
                                 is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                    learning_rate: learning,
                    momentum: moment,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        train_time = 0
        max_time = 0

        print("number of training instances: {}, number of test instances: {}".
              format(len(tr_y), len(te_y)))

        cost_func_test = []
        cost_func_train = []
        cost_func_eval = []
        acc_func_train = []
        acc_func_test = []
        acc_func_eval = []

        i = 0
        converged = False
        all_evalloss = []
        all_evalacc = []

        max_evalloss = 100

        lr = 0.02
        keep_prob = 0.7
        mom = 0.95

        while i < FLAGS.n_iter and converged == False:
            trainacc, trainloss, traincnt = 0., 0., 0
            start_time = time.time()
            for train, numtrain in train_get_batch_data(
                    tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                    tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob,
                    keep_prob, lr, mom):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, _trainloss, step, summary, _trainacc = sess.run(
                    [optimizer, loss, global_step, train_summary_op, acc_num],
                    feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                traincnt += numtrain
                trainloss += _trainloss * numtrain

            #if save_pth is not None:
            #   saver.save(sess, save_path, global_step=step)

            elapsed_time = time.time() - start_time
            train_time += elapsed_time

            evalacc, evalcost, evalcnt = 0., 0., 0
            for eva, evalnum in get_batch_data(ev_x, ev_sen_len, ev_x_bw,
                                               ev_sen_len_bw, ev_y,
                                               ev_target_word, ev_tar_len,
                                               2000, 1.0, 1.0, False):
                _evalloss, _evalacc = sess.run([loss, acc_num], feed_dict=eva)
                evalacc += _evalacc
                evalcost += _evalloss * evalnum
                evalcnt += evalnum

            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):

                _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                    [
                        loss, acc_num, true_y, pred_y, prob, alpha_fw,
                        alpha_bw, alpha_t_l, alpha_t_r
                    ],
                    feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                cost += _loss * num
                cnt += num

            comacc, comcnt = 0., 0
            for com, comnum in get_batch_data(co_x, co_sen_len, co_x_bw,
                                              co_sen_len_bw, co_y,
                                              co_target_word, co_tar_len,
                                              FLAGS.batch_size, 1.0, 1.0,
                                              False):
                _comloss, _comacc, _cty, _cpy, _cp, _cfw, _cbw, _ctl, _ctr = sess.run(
                    [
                        loss, acc_num, true_y, pred_y, prob, alpha_fw,
                        alpha_bw, alpha_t_l, alpha_t_r
                    ],
                    feed_dict=com)
                comacc += _comacc
                comcnt += comnum

            print(
                'all samples={}, correct prediction={}, training time={}, training time so far={}'
                .format(cnt, acc, elapsed_time, train_time))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            evalacc = evalacc / evalcnt
            comacc = comacc / comcnt
            #totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size
            cost = cost / cnt
            trainloss = trainloss / traincnt
            evalcost = evalcost / evalcnt
            cost_func_test.append(cost)
            cost_func_train.append(trainloss)
            cost_func_eval.append(evalcost)
            acc_func_eval.append(evalacc)
            acc_func_test.append(acc)
            acc_func_train.append(trainacc)
            print(
                'Iter {}: mini-batch loss validation set={:.6f}, train loss={:.6f}, train acc={:.6f}, validation acc={:6f} test acc={:.6f}, total training acc={:6f}'
                .format(i, evalcost, trainloss, trainacc, evalacc, acc,
                        comacc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)

            all_evalloss.append(evalcost)
            all_evalacc.append(evalacc)
            if i > 2:  # want to compare current train accuracy with train acc previous iterations
                if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and (
                        all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \
                        and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001):
                    converged = True

            i += 1

            #if acc > max_acc:
            #   max_acc = acc
            #  max_fw = fw
            # max_bw = bw
            # max_tl = tl
            # max_tr = tr
            # max_ty = ty
            # max_py = py
            # max_prob = p

        #P = precision_score(max_ty, max_py, average=None)
        #R = recall_score(max_ty, max_py, average=None)
        #F1 = f1_score(max_ty, max_py, average=None)
        #print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        #print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        #print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        print("total train acc = ")

        # Plotting chart of training and testing loss as a function of iterations
        iterations = list(range(i))
        plt.plot(iterations, cost_func_train, label='Cost func train')
        plt.plot(iterations, cost_func_test, label='Cost func test')
        plt.plot(iterations, cost_func_eval, label='Cost func validation')
        plt.title('Model loss k=1')
        plt.ylabel('Loss')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test', 'eval'], loc='upper left')
        plt.show()

        # Plotting chart of training and testing accuracies as a function of iterations
        iterations = list(range(i))
        plt.plot(iterations, acc_func_train, label='Acc func train')
        plt.plot(iterations, acc_func_test, label='Acc func test')
        plt.plot(iterations, acc_func_eval, label='Acc func validation')
        plt.title('Model accuracy k=1')
        plt.ylabel('Loss')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test', 'eval'], loc='upper left')
        plt.show()

        print(
            'Optimization Finished! Iteration:{}: Validation loss={}, validation accuracy={}, test accuracy={}'
            .format(i, evalcost, evalacc, acc))
        print(acc_func_train)
        print(acc_func_test)

        return acc
示例#17
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#Author zhang

import utils

from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, GlobalAveragePooling1D



sequences, data = utils.read_corpus("data\data.tsv")
w2v_model = utils.load_w2v("data\word2vec.model")
word2id = utils.word2id(w2v_model)
X_data = utils.get_sequences(word2id, sequences)

# 截长补短
maxlen = 20
X_pad = pad_sequences(X_data, maxlen=maxlen)
# 取得标签
Y = data.sentiment.values
# 划分数据集
X_train, X_test, Y_train, Y_test = train_test_split(
    X_pad,
    Y,
    test_size=0.2,
    random_state=42)

"""
示例#18
0
    def __init__(self, config):
        self.embedding_dim = config.embedding_dim
        self.batch_size = config.batch_size
        self.n_hidden = config.n_hidden
        self.learning_rate = config.learning_rate
        self.n_class = config.n_class
        self.max_len = config.max_len
        self.l2_reg = config.l2_reg
        self.display_step = config.display_step
        self.n_iter = config.n_iter
        self.embedding_file = config.embedding_file_path
        self.word2id_file = config.word_id_file_path
        self.aspect_id_file = config.aspect_id_file_path
        self.train_file = config.train_file_path
        self.test_file = config.test_file_path
        self.val_file = config.validate_file_path

        self.word2id, self.w2v = load_w2v(self.embedding_file,
                                          self.embedding_dim)
        self.word_embedding = tf.constant(self.w2v, name='word_embedding')
        self.aspect2id, self.a2v = load_aspect2id(self.aspect_id_file,
                                                  self.word2id, self.w2v,
                                                  self.embedding_dim)
        self.aspect_embedding = tf.constant(self.a2v, name='aspect_embedding')

        with tf.name_scope('inputs'):
            self.x = tf.placeholder(tf.int32, [None, self.max_len], name='x')
            self.y = tf.placeholder(tf.float32, [None, self.n_class], name='y')
            self.sen_len = tf.placeholder(tf.int32, None, name='sen_len')
            self.aspect_id = tf.placeholder(tf.int32, None, name='aspect_id')
            self.position = tf.placeholder(tf.int32, [None, self.max_len],
                                           name='position')

        with tf.name_scope('GRU'):
            self.w_r = tf.get_variable(
                name='W_r',
                shape=[2 * self.n_hidden + 1, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.u_r = tf.get_variable(
                name='U_r',
                shape=[self.n_hidden, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.w_z = tf.get_variable(
                name='W_z',
                shape=[2 * self.n_hidden + 1, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.u_z = tf.get_variable(
                name='U_z',
                shape=[self.n_hidden, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.w_x = tf.get_variable(
                name='W_x',
                shape=[self.n_hidden, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
            self.w_g = tf.get_variable(
                name='W_g',
                shape=[2 * self.n_hidden + 1, self.n_hidden],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer(uniform=True))
示例#19
0
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         learning_rate=0.09,
         keep_prob=0.3,
         momentum=0.85,
         l2=0.00001):
    print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        sess.run(tf.global_variables_initializer())

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        Results_File = np.zeros((3, 1))
        for i in range(FLAGS.n_iter):
            trainacc, traincnt = 0., 0
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, step, summary, _trainacc = sess.run(
                    [optimizer, global_step, train_summary_op, acc_num],
                    feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                traincnt += numtrain

            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                cost += _loss * num
                cnt += num
            print('all samples={}, correct prediction={}'.format(cnt, acc))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            totalacc = ((acc * remaining_size) +
                        (accuracyOnt *
                         (test_size - remaining_size))) / test_size
            cost = cost / cnt
            print(
                'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, test acc={:.6f}, combined acc={:.6f}'
                .format(i, cost, trainacc, acc, totalacc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)
            if acc > max_acc:
                max_acc = acc
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p

            Added = [[i + 1], [trainacc], [acc]]
            Results_File = np.concatenate((Results_File, Added), 1)

        # Saving training information as csv file
        from datetime import datetime
        dateTimeObj = datetime.now()
        save_dir = '/Users/ronhochstenbach/Desktop/Ectrie Thesis/Venv_Thesis/Results_Run_Adversarial/Run_' + str(
            dateTimeObj) + '_lcrrot_' + str(FLAGS.year) + '.csv'
        np.savetxt(save_dir, Results_File, delimiter=",")

        P = precision_score(max_ty, max_py, average=None)
        R = recall_score(max_ty, max_py, average=None)
        F1 = f1_score(max_ty, max_py, average=None)
        print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        fp = open(FLAGS.prob_file, 'w')
        for item in max_prob:
            fp.write(' '.join([str(it) for it in item]) + '\n')
        fp = open(FLAGS.prob_file + '_fw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_fw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_bw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_bw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tl', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tl):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tr', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tr):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg))

        return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0,
                                 1), max_fw.tolist(), max_bw.tolist(
                                 ), max_tl.tolist(), max_tr.tolist()
示例#20
0
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         augment_data,
         augmentation_file_path,
         ct,
         learning_rate=0.09,
         keep_prob=0.3,
         momentum=0.85,
         l2=0.00001):
    print_config()
    augmenter = Augmentation(FLAGS.EDA_type, need_mixup=True)
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        len_non_augmented, tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path,
            word_id_mapping,
            FLAGS.max_sentence_len,
            'TC',
            is_r,
            FLAGS.max_target_len,
            augment_data=augment_data,
            augmentation_file_path=augmentation_file_path)
        _, te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        max_records_mixup = len(
            tr_x) if FLAGS.mixup_on_augmentations > 0 else len_non_augmented
        if augment_data and FLAGS.use_word_mixup > 0:
            print("The amount of records on which mixup is applied: {}".format(
                max_records_mixup))
            rand_mixup = np.array(range(max_records_mixup - 1))
            print("applying mixup...")
            for _ in range(FLAGS.use_word_mixup):
                random.shuffle(rand_mixup)
                for i, j in tqdm(zip(*[iter(rand_mixup)] * 2)):
                    first = (tr_x[i], tr_sen_len[i], tr_x_bw[i],
                             tr_sen_len_bw[i], tr_y[i], tr_target_word[i],
                             tr_tar_len[i])
                    second = (tr_x[j], tr_sen_len[j], tr_x_bw[j],
                              tr_sen_len_bw[j], tr_y[j], tr_target_word[j],
                              tr_tar_len[j])
                    augmenter.word_mixup(first, second)
            print("Word mixup embeddings: {}".format(augmenter.counter))

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None
        for i in range(FLAGS.n_iter):
            trainacc, traincnt = 0., 0
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, step, summary, _trainacc = sess.run(
                    [optimizer, global_step, train_summary_op, acc_num],
                    feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                traincnt += numtrain

            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                cost += _loss * num
                cnt += num
            print('all samples={}, correct prediction={}'.format(cnt, acc))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            totalacc = ((acc * remaining_size) +
                        (accuracyOnt *
                         (test_size - remaining_size))) / test_size
            cost = cost / cnt
            print(
                'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, test acc={:.6f}, combined acc={:.6f}'
                .format(i, cost, trainacc, acc, totalacc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)
            if acc > max_acc:
                max_trainacc = trainacc
                max_totalacc = totalacc
                iteration = i
                max_acc = acc
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p

        P = precision_score(max_ty, max_py, average=None)
        R = recall_score(max_ty, max_py, average=None)
        F1 = f1_score(max_ty, max_py, average=None)
        print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        keys_to_save = 'year EDA_type EDA_deletion EDA_replacement original_multiplier EDA_insertion EDA_swap EDA_pct backtranslation_langs use_word_mixup mixup_beta mixup_on_augmentations'.split(
            ' ')
        try:
            df = pd.read_json(FLAGS.results_file)
            print('adding outcome to {}'.format(FLAGS.results_file))
        except ValueError:
            print(
                'did not find an existing result file, creating a new one...')
            df = pd.DataFrame([])
        new_experiment = {}
        for k, v in sorted(FLAGS.flag_values_dict().items()):
            if k in keys_to_save:
                new_experiment[k] = v
        new_experiment['in_sample'] = max_trainacc
        new_experiment['out_of_sample'] = max_acc
        new_experiment['ontology_acc'] = accuracyOnt
        new_experiment['total_acc'] = max_totalacc
        new_experiment['at_iteration'] = iteration
        new_experiment['#of_test'] = cnt
        new_experiment['#of_train'] = len(tr_x)
        new_experiment['pre_embed_aug'] = ct
        new_experiment['post_embed_aug'] = augmenter.counter
        df = df.append(new_experiment, ignore_index=True)
        df.to_json(FLAGS.results_file)

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg))

        return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0,
                                 1), max_fw.tolist(), max_bw.tolist(
                                 ), max_tl.tolist(), max_tr.tolist()
示例#21
0
def main(train_path,
         test_path,
         learning_rate=FLAGS.learning_rate,
         keep_prob=FLAGS.keep_prob1,
         l2=FLAGS.l2_reg,
         beta=0.9,
         number_epochs=100):
    print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=beta).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            learning_rate, l2, FLAGS.max_sentence_len, FLAGS.embedding_dim,
            FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    batch_size = FLAGS.batch_size
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))

        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        all_training_losses, all_training_accuracies = [], []
        all_test_losses, all_test_accuracies = [], []

        for i in range(number_epochs):
            learning_rate = (0.99) * learning_rate
            number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0.
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, step, _trainacc, _training_loss = sess.run(
                    [optimizer, global_step, acc_num, loss], feed_dict=train)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                number_of_training_examples_correct += _trainacc
                number_of_training_examples += numtrain
                training_loss += _training_loss * numtrain

            number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                number_of_test_examples_correct += _acc
                test_loss += _loss * num
                number_of_test_examples += num
            print(
                'number of training examples={}, correct training examples={}, number of test examples={}, correct test examples={}'
                .format(number_of_training_examples,
                        number_of_training_examples_correct,
                        number_of_test_examples,
                        number_of_test_examples_correct))

            training_accuracy = number_of_training_examples_correct / number_of_training_examples
            test_accuracy = number_of_test_examples_correct / number_of_test_examples
            average_test_loss = test_loss / number_of_test_examples
            average_training_loss = training_loss / number_of_training_examples

            all_training_losses.append(average_training_loss)
            all_training_accuracies.append(training_accuracy)
            all_test_losses.append(average_test_loss)
            all_test_accuracies.append(test_accuracy)

            print(
                'Epoch {}: average training loss={:.6f}, train acc={:.6f}, average test loss={:.6f}, test acc={:.6f}'
                .format(i, average_training_loss, training_accuracy,
                        average_test_loss, test_accuracy))

        min_training_loss = min(all_training_losses)
        max_training_accuracy = max(all_training_accuracies)
        min_test_loss = min(all_test_losses)
        max_test_accuracy = max(all_test_accuracies)

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(learning_rate, number_epochs, FLAGS.batch_size,
                    FLAGS.n_hidden, l2))

        return min_training_loss, max_training_accuracy, min_test_loss, max_test_accuracy, all_training_losses, all_training_accuracies, all_test_losses, all_test_accuracies
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         sort,
         num_buckets,
         l2=0.0001):  # learning_rate=0.07,
    # keep_prob=0.4,
    # momentum=0.9):
    # print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)
        learning_rate = tf.placeholder(tf.float32)
        momentum = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        # save_dir = 'temp_model/babysteps2buckets'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        def curr_get_batch_data(x_f,
                                sen_len_f,
                                x_b,
                                sen_len_b,
                                yi,
                                target,
                                tl,
                                batch_size,
                                kp1,
                                kp2,
                                learning,
                                moment,
                                bucket,
                                is_shuffle=True):
            for index in curr_batch_index(bucket, batch_size, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                    learning_rate: learning,
                    momentum: moment,
                }
                yield feed_dict, len(index)

        def eval_get_batch_data(x_f,
                                sen_len_f,
                                x_b,
                                sen_len_b,
                                yi,
                                target,
                                tl,
                                batch_size,
                                kp1,
                                kp2,
                                bucket,
                                is_shuffle=True):
            for index in curr_batch_index(bucket, batch_size, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        train_time = 0
        max_time = 0

        total_iter = 0
        cost_func_test = []
        cost_func_train = []
        cost_func_eval = []
        acc_func_train = []
        acc_func_test = []
        acc_func_eval = []
        alltrainacc = []

        # split data in num_buckets
        buckets = np.array_split(sort, num_buckets)
        bucket_number = 1

        lowest_val = 100
        best_train = 0.
        best_test = 0.
        best_iteration = 0

        for bucket in buckets:  # for every bucket of data
            print("bucket number:{}".format(bucket_number))
            np.random.shuffle(bucket)
            tmp = int(round(0.8 * len(bucket)))
            traindata = bucket[:tmp]
            evaldata = bucket[tmp:]

            # update the hyperparameters for every bucket, depending on num_buckets
            if bucket_number == 1:
                lr = 0.01
                keep_prob = 0.7
                mom = 0.85
            if bucket_number == 2:
                lr = 0.01
                keep_prob = 0.7
                mom = 0.85
            if bucket_number == 3:
                lr = 0.02
                keep_prob = 0.6
                mom = 0.95
            if bucket_number == 4:
                lr = 0.08
                keep_prob = 0.3
                mom = 0.9
            if bucket_number == 5:
                lr = 0.07
                keep_prob = 0.4
                mom = 0.99
            if bucket_number == 6:
                lr = 0.02
                keep_prob = 0.6
                mom = 0.9
            if bucket_number == 7:
                lr = 0.05
                keep_prob = 0.4
                mom = 0.9
            if bucket_number == 8:
                lr = 0.05
                keep_prob = 0.4
                mom = 0.95
            if bucket_number == 9:
                lr = 0.01
                keep_prob = 0.5
                mom = 0.99
            if bucket_number == 10:  # all the instances
                lr = 0.01
                keep_prob = 0.6
                mom = 0.9

            print("Training instances: {}, validation instances: {}".format(
                len(traindata), len(evaldata)))

            i = 0
            converged = False
            all_evalloss = []
            all_evalacc = []
            while i < FLAGS.n_iter and converged == False:  # until convergence or until certain amount of iterations
                trainacc, trainloss, traincnt = 0., 0., 0
                start_time = time.time()
                for train, numtrain in curr_get_batch_data(
                        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                        tr_target_word, tr_tar_len, FLAGS.batch_size,
                        keep_prob, keep_prob, lr, mom, traindata):
                    # _, step = sess.run([optimizer, global_step], feed_dict=train)
                    _, _trainloss, step, summary, _trainacc = sess.run(
                        [
                            optimizer, loss, global_step, train_summary_op,
                            acc_num
                        ],
                        feed_dict=train)

                    train_summary_writer.add_summary(summary, step)
                    # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                    # sess.run(embed_update)
                    trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                    traincnt += numtrain
                    trainloss += _trainloss * numtrain

                elapsed_time = time.time() - start_time
                train_time += elapsed_time

                evalacc, evalcost, evalcnt = 0., 0., 0
                for eva, evalnum in eval_get_batch_data(
                        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                        tr_target_word, tr_tar_len, 2000, 1.0, 1.0, evaldata,
                        False):
                    _evalloss, _evalacc = sess.run([loss, acc_num],
                                                   feed_dict=eva)
                    evalacc += _evalacc
                    evalcost += _evalloss * evalnum
                    evalcnt += evalnum
                acc, cost, cnt = 0., 0., 0
                fw, bw, tl, tr, ty, py = [], [], [], [], [], []
                p = []
                for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                                te_sen_len_bw, te_y,
                                                te_target_word, te_tar_len,
                                                2000, 1.0, 1.0, False):
                    if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                        _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                            [
                                loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                                alpha_t_r, true_y, pred_y, prob
                            ],
                            feed_dict=test)
                        fw += list(_fw)
                        bw += list(_bw)
                        tl += list(_tl)
                        tr += list(_tr)
                    else:
                        _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                            [
                                loss, acc_num, true_y, pred_y, prob, alpha_fw,
                                alpha_bw, alpha_t_l, alpha_t_r
                            ],
                            feed_dict=test)
                    ty = np.asarray(_ty)
                    py = np.asarray(_py)
                    p = np.asarray(_p)
                    fw = np.asarray(_fw)
                    bw = np.asarray(_bw)
                    tl = np.asarray(_tl)
                    tr = np.asarray(_tr)
                    acc += _acc
                    cost += _loss * num
                    cnt += num

                comacc, comcnt = 0., 0
                for com, comnum in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, 1.0, 1.0,
                                                  False):
                    _comloss, _comacc, _cty, _cpy, _cp, _cfw, _cbw, _ctl, _ctr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=com)
                    comacc += _comacc
                    comcnt += comnum

                print(
                    'all samples={}, correct prediction={}, training time={}, training time so far={}'
                    .format(cnt, acc, elapsed_time, train_time))
                trainacc = trainacc / traincnt
                acc = acc / cnt
                evalacc = evalacc / evalcnt
                comacc = comacc / comcnt
                alltrainacc.append(comacc)
                totalacc = ((acc * remaining_size) +
                            (accuracyOnt *
                             (test_size - remaining_size))) / test_size
                cost = cost / cnt
                trainloss = trainloss / traincnt
                evalcost = evalcost / evalcnt
                cost_func_test.append(cost)
                cost_func_train.append(trainloss)
                cost_func_eval.append(evalcost)
                acc_func_test.append(acc)
                acc_func_train.append(trainacc)
                acc_func_eval.append(evalacc)
                print(
                    'Iter {}: mini-batch loss validation set={:.6f}, train loss={:.6f}, train acc={:.6f}, '
                    'validation acc={:6f} test acc={:.6f}, total train acc={:6f}'
                    .format(i, evalcost, trainloss, trainacc, evalacc, acc,
                            comacc))
                summary = sess.run(test_summary_op,
                                   feed_dict={
                                       test_loss: cost,
                                       test_acc: acc
                                   })
                test_summary_writer.add_summary(summary, step)

                all_evalloss.append(evalcost)
                all_evalacc.append(evalacc)
                if i > 1:  # want to compare current validation accuracy with val acc previous iterations
                    if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and (
                            all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \
                            and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001):
                        converged = True
                # if (all_evalloss[i] - all_evalloss[i-1] > 0.00001) and (all_evalloss[i-1] - all_evalloss[i-2] > 0.00001) \
                #    and (all_evalloss[i-2] - all_evalloss[i-3] > 0.00001):
                #   converged = True

                if bucket_number == num_buckets:
                    if evalcost < lowest_val:
                        lowest_val = evalcost
                        best_test = acc
                        best_train = comacc
                        best_iteration = i

                i += 1
                total_iter += 1

            bucket_number += 1

        # Plotting chart of training and testing losses as a function of iterations
        iterations = list(range(total_iter))
        plt.plot(iterations, cost_func_train, label='Cost func train')
        plt.plot(iterations, cost_func_test, label='Cost func test')
        plt.plot(iterations, cost_func_eval, label='Cost func validation')
        plt.title('Model loss k={}'.format(num_buckets))
        plt.ylabel('Loss')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test', 'eval'], loc='upper left')
        plt.show()

        # Plotting chart of training and testing accuracies as a function of iterations
        iterations = list(range(total_iter))
        plt.plot(iterations, acc_func_train, label='Acc func train')
        plt.plot(iterations, acc_func_test, label='Cost func test')
        plt.plot(iterations, acc_func_eval, label='Acc func validation')
        plt.title('Model accuracy k={}'.format(num_buckets))
        plt.ylabel('Accuracy')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test', 'eval'], loc='upper left')
        plt.show()

        print(
            'Optimization Finished! Iteration:{}: Validation loss={}, validation accuracy={}, test accuracy={}, in-sample acc={}'
            .format(total_iter, evalcost, evalacc, acc, comacc))

        print(
            "Lowest validation loss:{}, at iteration:{}, with out-of-sample acc:{} and in-sample acc:{}"
            .format(lowest_val, best_iteration, best_test, best_train))

        print('iter_num={}, batch_size={}, hidden_num={}, l2={}'.format(
            total_iter, FLAGS.batch_size, FLAGS.n_hidden, l2))

        print(acc_func_train)
        print(acc_func_test)
        print(acc_func_eval)
        print(cost_func_eval)
        print(alltrainacc)

        return acc
示例#23
0
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         learning_rate=0.09,
         keep_prob=0.3,
         momentum=0.85,
         l2=0.0001):
    # print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        train_time = 0
        max_time = 0

        cost_func_test = []
        cost_func_train = []
        acc_func_train = []
        acc_func_test = []

        i = 0
        converged = False
        all_testloss = []
        all_evalacc = []
        max_testloss = 100

        while i < FLAGS.n_iter and converged == False:
            trainacc, trainloss, traincnt = 0., 0., 0
            start_time = time.time()
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, _trainloss, step, summary, _trainacc = sess.run(
                    [optimizer, loss, global_step, train_summary_op, acc_num],
                    feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                traincnt += numtrain
                trainloss += _trainloss * numtrain

            elapsed_time = time.time() - start_time
            train_time += elapsed_time

            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                cost += _loss * num
                cnt += num
            print('training samples= {}'.format(traincnt))
            print(
                'all samples={}, correct prediction={}, training time={}, training time so far={}'
                .format(cnt, acc, elapsed_time, train_time))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            totalacc = ((acc * remaining_size) +
                        (accuracyOnt *
                         (test_size - remaining_size))) / test_size
            cost = cost / cnt
            trainloss = trainloss / traincnt
            cost_func_test.append(cost)
            cost_func_train.append(trainloss)
            acc_func_test.append(acc)
            acc_func_train.append(trainacc)
            print(
                'Iter {}: mini-batch loss={:.6f}, train loss={:.6f}, train acc={:.6f}, test acc={:.6f}'
                .format(i, cost, trainloss, trainacc, acc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)

            all_testloss.append(cost)
            all_evalacc.append(acc)
            if i > 2:
                if (all_testloss[i] - all_testloss[i - 1] > 0.00001) and (
                        all_testloss[i - 1] - all_testloss[i - 2] > 0.00001) \
                        and (all_testloss[i - 2] - all_testloss[i - 3] > 0.00001):
                    converged = True

            if np.isnan(cost):
                acc = 0
                converged = True

            #if i > 2:  # want to compare current validation accuracy with val acc previous iterations
            #  if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and (
            #        all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \
            #      and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001):
            #converged = True
            #if np.isnan(cost):
            #   converged = True

            if cost < max_testloss:
                max_testloss = cost
                max_testacc = acc
                max_iter = i
            i += 1

            if i == FLAGS.n_iter:  # want niet geconvergeerd in n iteraties
                acc = 0

            # if acc > max_acc:
            #   max_acc = acc
            #  max_fw = fw
            # max_bw = bw
            # max_tl = tl
            # max_tr = tr
            # max_ty = ty
            # max_py = py
            # max_prob = p

        # P = precision_score(max_ty, max_py, average=None)
        # R = recall_score(max_ty, max_py, average=None)
        # F1 = f1_score(max_ty, max_py, average=None)
        # print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        # print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        # print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        # Plotting chart of training and testing loss as a function of iterations
        # iterations = list(range(i))
        # plt.plot(iterations, cost_func_train, label='Cost func train')
        # plt.plot(iterations, cost_func_test, label='Cost func test')
        # plt.title('Model loss k=1')
        # plt.ylabel('Loss')
        # plt.xlabel('Iterations')
        # plt.legend(['train', 'test'], loc='upper left')
        # plt.show()

        # Plotting chart of training and testing accuracies as a function of iterations
        # iterations = list(range(i))
        # plt.plot(iterations, acc_func_train, label='Acc func train')
        # plt.plot(iterations, acc_func_test, label='Cost func test')
        # plt.title('Model accuracy k=1')
        # plt.ylabel('Loss')
        # plt.xlabel('Iterations')
        # plt.legend(['train', 'test'], loc='upper left')
        # plt.show()

        print(
            'Optimization Finished! Iteration:{}:Minimal test loss={}, test accuracy={}'
            .format(max_iter, max_testloss, max_testacc))
        # nu alleen kijken naar laatste iteratie! Laatste testacc wil je zo laag mogelijk uiteindelijk (als je hyperoptimized)
        return acc
示例#24
0
def main(train_path, test_path, accuracyOnt, test_size, remaining_size,
         learning_rate_dis, learning_rate_gen, keep_prob, momentum_dis,
         momentum_gen, l2, k, WriteFile):
    print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x_real = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y_real = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x_real)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        l, r, t_l, t_r, l2, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')
        gen_l, gen_r, gen_t_l, gen_t_r = generator(l2)

        with tf.variable_scope(
                "var_D", reuse=tf.AUTO_REUSE
        ) as scope:  #re-using the discriminator parameters since it is called twice per iter
            #Calculating prob for real data
            prob_real = discriminator(l, r, t_l, t_r, keep_prob2, l2)

            #Calculating prob for generated data
            prob_generated = discriminator(gen_l, gen_r, gen_t_l, gen_t_r,
                                           keep_prob2, l2)

        loss = loss_func_adversarial(prob_real, prob_generated, y_real)
        acc_num_real, acc_prob_real, acc_num_gen, acc_prob_gen = acc_func_adversarial(
            prob_real, prob_generated, y_real)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)

        #set variable lists
        var_list_D = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='var_D')
        var_list_G = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='var_G')

        #As we solve a min max problem, we optimize twice with respect to different variable sets , var_list = var_D , var_list = var_G
        opti_min = tf.train.MomentumOptimizer(learning_rate=learning_rate_dis,
                                              momentum=momentum_dis).minimize(
                                                  loss,
                                                  var_list=var_list_D,
                                                  global_step=global_step)
        opti_max = tf.train.MomentumOptimizer(learning_rate=learning_rate_gen,
                                              momentum=momentum_gen).minimize(
                                                  -loss, var_list=var_list_G)

        true_y = tf.argmax(y_real, 1)
        pred_y = tf.argmax(prob_real, 1)

        title = '-d1-{}d2-{}b-{}rd-{}rg-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            learning_rate_dis, learning_rate_gen, FLAGS.l2_reg,
            FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden,
            FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func_adversarial(loss, acc_prob_real, acc_prob_gen, test_loss, test_acc, _dir, title, sess)
        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x_real: x_f[index],
                    x_bw: x_b[index],
                    y_real: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        Results_File = np.zeros(
            (5, 1)
        )  #6 = number of rows / values to store:['Iteration','loss','trainacc_real','test_acc','avg prob assigned to correct generated']
        for i in range(1, FLAGS.n_iter + 1):
            avg_p_real = None
            avg_p_gen = None

            #update D more often than G
            if k >= 1:
                if i % k == 0:
                    print('In iter ' + str(i) + ' we update both G and D.')
                    trainacc_real, trainacc_gen, traincnt = 0., 0., 0
                    for train, numtrain in get_batch_data(
                            tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                            tr_target_word, tr_tar_len, FLAGS.batch_size,
                            keep_prob, keep_prob):
                        # _, step = sess.run([optimizer, global_step], feed_dict=train)

                        _, _, step, summary, _trainacc_real, _trainacc_gen = sess.run(
                            [
                                opti_max, opti_min, global_step,
                                train_summary_op, acc_num_real, acc_num_gen
                            ],
                            feed_dict=train)
                        train_summary_writer.add_summary(summary, step)
                        # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                        # sess.run(embed_update)
                        trainacc_real += _trainacc_real  # saver.save(sess, save_dir, global_step=step)
                        trainacc_gen += _trainacc_gen
                        traincnt += numtrain
                else:
                    print('In iter ' + str(i) + ' we update only D.')
                    trainacc_real, trainacc_gen, traincnt = 0., 0., 0
                    for train, numtrain in get_batch_data(
                            tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                            tr_target_word, tr_tar_len, FLAGS.batch_size,
                            keep_prob, keep_prob):
                        # _, step = sess.run([optimizer, global_step], feed_dict=train)

                        _, step, summary, _trainacc_real, _trainacc_gen = sess.run(
                            [
                                opti_min, global_step, train_summary_op,
                                acc_num_real, acc_num_gen
                            ],
                            feed_dict=train)
                        train_summary_writer.add_summary(summary, step)
                        # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                        # sess.run(embed_update)
                        trainacc_real += _trainacc_real  # saver.save(sess, save_dir, global_step=step)
                        trainacc_gen += _trainacc_gen
                        traincnt += numtrain

            #Update G more often than D
            else:
                k_inv = 1 / k
                if i % k_inv == 0:
                    print('In iter ' + str(i) + ' we update both G and D.')
                    trainacc_real, trainacc_gen, traincnt = 0., 0., 0
                    for train, numtrain in get_batch_data(
                            tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                            tr_target_word, tr_tar_len, FLAGS.batch_size,
                            keep_prob, keep_prob):
                        # _, step = sess.run([optimizer, global_step], feed_dict=train)

                        _, _, step, summary, _trainacc_real, _trainacc_gen = sess.run(
                            [
                                opti_max, opti_min, global_step,
                                train_summary_op, acc_num_real, acc_num_gen
                            ],
                            feed_dict=train)
                        train_summary_writer.add_summary(summary, step)
                        # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                        # sess.run(embed_update)
                        trainacc_real += _trainacc_real  # saver.save(sess, save_dir, global_step=step)
                        trainacc_gen += _trainacc_gen
                        traincnt += numtrain
                else:
                    print('In iter ' + str(i) + ' we update only G.')
                    trainacc_real, trainacc_gen, traincnt = 0., 0., 0
                    for train, numtrain in get_batch_data(
                            tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y,
                            tr_target_word, tr_tar_len, FLAGS.batch_size,
                            keep_prob, keep_prob):
                        # _, step = sess.run([optimizer, global_step], feed_dict=train)

                        _, step, summary, _trainacc_real, _trainacc_gen = sess.run(
                            [
                                opti_max, global_step, train_summary_op,
                                acc_num_real, acc_num_gen
                            ],
                            feed_dict=train)
                        train_summary_writer.add_summary(summary, step)
                        # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                        # sess.run(embed_update)
                        trainacc_real += _trainacc_real  # saver.save(sess, save_dir, global_step=step)
                        trainacc_gen += _trainacc_gen
                        traincnt += numtrain

            #Testing occurs in every iteration, regardless of what networks have been updated.
            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob_real
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr, _p_g, _y_real, _prob_real = sess.run(
                        [
                            loss, acc_num_real, true_y, pred_y, prob_real,
                            alpha_fw, alpha_bw, alpha_t_l, alpha_t_r,
                            prob_generated, y_real, prob_real
                        ],
                        feed_dict=test)

                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                yr = np.asarray(y_real)
                acc += _acc
                cost += _loss * num
                cnt += num
                p_g = np.asarray(_p_g)

            print('all samples={}, correct prediction={}'.format(cnt, acc))
            trainacc_real = trainacc_real / traincnt
            trainacc_gen = trainacc_gen / traincnt
            acc = acc / cnt
            totalacc = ((acc * remaining_size) +
                        (accuracyOnt *
                         (test_size - remaining_size))) / test_size
            cost = cost / cnt
            print(
                'Iter {}: mini-batch loss={:.6f}, train acc real ={:.6f}, test acc={:.6f}, combined acc={:.6f}'
                .format(i, cost, trainacc_real, acc, totalacc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)
            if acc > max_acc:
                max_acc = acc
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p

            #Writing File
            if WriteFile:
                avg_p_real = np.mean(
                    np.multiply(_prob_real, _y_real)
                )  #average probability assigned to the correct class for real data
                avg_p_gen = np.mean(
                    p_g, axis=0
                )[3]  #average probability assigned to the correct class for generated data
                Added = [[i], [cost], [trainacc_real], [acc], [avg_p_gen]]
                Results_File = np.concatenate((Results_File, Added), 1)

            if np.isnan(_loss):
                print('Ohw shit we obtained an NaN bro!!')
                max_acc = max_acc * (
                    (i / 200)**2
                )  #Uncomment this line for hyperpar optim, to penalize
                break

        P = precision_score(max_ty, max_py, average=None)
        R = recall_score(max_ty, max_py, average=None)
        F1 = f1_score(max_ty, max_py, average=None)
        print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        fp = open(FLAGS.prob_file, 'w')
        for item in max_prob:
            fp.write(' '.join([str(it) for it in item]) + '\n')
        fp = open(FLAGS.prob_file + '_fw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_fw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_bw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_bw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tl', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tl):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tr', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tr):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print(
            'Learning_rate_dis={},Learning_rate_gen={}, momentum_dis={},momentum_gen={}, iter_num={}, batch_size={}, hidden_num={}, l2={},k={}'
            .format(learning_rate_dis, learning_rate_gen, momentum_dis,
                    momentum_gen, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg, k))

        if WriteFile:
            #Saving training information as csv file
            dateTimeObj = datetime.now()
            save_dir = '/Results_Run_Adversarial/Run_' + str(
                dateTimeObj) + '_lr' + str(learning_rate_dis) + '_lrg' + str(
                    learning_rate_gen) + '_kp' + str(
                        keep_prob) + '_mom_d' + str(
                            momentum_dis) + '_mom_g' + str(
                                momentum_gen) + '_k' + str(k) + '.csv'
            np.savetxt(save_dir, Results_File, delimiter=",")

        return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0,
                                 1), max_fw.tolist(), max_bw.tolist(
                                 ), max_tl.tolist(), max_tr.tolist()

    if __name__ == '__main__':
        tf.app.run()
def main(train_path,
         test_path,
         accuracyOnt,
         trainAccuracyOnt,
         test_size,
         remaining_size,
         learning_rate=FLAGS.learning_rate,
         keep_prob=FLAGS.keep_prob1,
         momentum=0.9,
         l2=FLAGS.l2_reg,
         number_of_heads=FLAGS.heads):
    print_config()

    word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim)
    word_embedding = tf.constant(w2v, name='word_embedding')

    keep_prob1 = tf.placeholder(tf.float32)
    keep_prob2 = tf.placeholder(tf.float32)

    with tf.name_scope('inputs'):
        x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
        y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
        sen_len = tf.placeholder(tf.int32, None)

        x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
        sen_len_bw = tf.placeholder(tf.int32, [None])

        target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len])
        tar_len = tf.placeholder(tf.int32, [None])

    inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
    inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
    target = tf.nn.embedding_lookup(word_embedding, target_words)

    alpha_fw, alpha_bw = None, None
    prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
        inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1,
        keep_prob2, l2, 'all', number_of_heads)

    loss = loss_func(y, prob)
    acc_num, acc_prob = acc_func(y, prob)
    global_step = tf.Variable(0, name='tr_global_step', trainable=False)
    optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate,
                                           momentum=momentum).minimize(
                                               loss, global_step=global_step)
    # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
    true_y = tf.argmax(y, 1)
    pred_y = tf.argmax(prob, 1)

    title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
        FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
        FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
        FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        sess.run(tf.global_variables_initializer())

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont, te_y_ont, te_target_word_ont, te_tar_len_ont, _, _, _ = load_inputs_twitter(
            FLAGS.remaining_test_path, word_id_mapping, FLAGS.max_sentence_len,
            'TC', is_r, FLAGS.max_target_len)

        tr_x_ont, tr_sen_len_ont, tr_x_bw_ont, tr_sen_len_bw_ont, tr_y_ont, tr_target_word_ont, tr_tar_len_ont, _, _, _ = load_inputs_twitter(
            FLAGS.remaining_train_path, word_id_mapping,
            FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        all_training_losses, all_training_accuracies = [], []
        all_test_losses, all_test_accuracies = [], []

        for i in range(FLAGS.n_iter):
            learning_rate = (0.99) * learning_rate
            number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0.
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):

                _, step, _trainacc, _training_loss = sess.run(
                    [optimizer, global_step, acc_num, loss], feed_dict=train)

                number_of_training_examples_correct += _trainacc
                number_of_training_examples += numtrain
                training_loss += _training_loss * numtrain

            number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    # fw += list(_fw)
                    # bw += list(_bw)
                    # tl += list(_tl)
                    # tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)

                number_of_test_examples_correct += _acc
                test_loss += _loss * num
                number_of_test_examples += num

            number_of_test_examples_correct_ont, number_of_test_examples_ont = 0., 0
            for test_ont, num_ont in get_batch_data(
                    te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont,
                    te_y_ont, te_target_word_ont, te_tar_len_ont, 2000, 1.0,
                    1.0, False):
                _acc_ont = sess.run(acc_num, feed_dict=test_ont)
                number_of_test_examples_correct_ont += _acc_ont
                number_of_test_examples_ont += num_ont

            number_of_train_examples_correct_ont, number_of_train_examples_ont = 0., 0
            for train_ont, num_train_ont in get_batch_data(
                    tr_x_ont, tr_sen_len_ont, tr_x_bw_ont, tr_sen_len_bw_ont,
                    tr_y_ont, tr_target_word_ont, tr_tar_len_ont, 2000, 1.0,
                    1.0, False):
                _acc_ont_train = sess.run(acc_num, feed_dict=train_ont)
                number_of_train_examples_correct_ont += _acc_ont_train
                number_of_train_examples_ont += num_train_ont

            print(
                'number of training examples={}, correct training examples={}, number of test examples={}, correct test examples={}, number of examples without onto = {}'
                .format(number_of_training_examples,
                        number_of_training_examples_correct,
                        number_of_test_examples,
                        number_of_test_examples_correct,
                        number_of_test_examples_ont))
            training_accuracy = number_of_training_examples_correct / number_of_training_examples
            test_accuracy = number_of_test_examples_correct / number_of_test_examples
            test_accuracy_ont = number_of_test_examples_correct_ont / number_of_test_examples_ont
            train_accuracy_ont = number_of_train_examples_correct_ont / number_of_train_examples_ont
            totalacc_train = (
                (train_accuracy_ont * number_of_train_examples_ont) +
                (trainAccuracyOnt *
                 (number_of_training_examples - number_of_train_examples_ont))
            ) / number_of_training_examples
            totalacc = (
                (test_accuracy_ont * number_of_test_examples_ont) +
                (accuracyOnt *
                 (number_of_test_examples - number_of_test_examples_ont))
            ) / number_of_test_examples
            average_test_loss = test_loss / number_of_test_examples
            average_training_loss = training_loss / number_of_training_examples
            print(
                'Epoch {}: average training loss={:.6f}, train acc={:.6f}, average test loss={:.6f}, test acc={:.6f}, combined acc={:.6f}, accuracy without onto={:.6f}, in-sample with onto = {}'
                .format(i, average_training_loss, training_accuracy,
                        average_test_loss, test_accuracy, totalacc,
                        test_accuracy_ont, totalacc_train))

        # max_acc = test_accuracy
        # max_fw = np.average(np.abs(fw), axis=2)
        # max_bw = np.average(np.abs(bw), axis=2)
        # max_tl = np.average(np.abs(tl), axis=2)
        # max_tr = np.average(np.abs(tr), axis=2)
        # max_ty = ty
        # max_py = py
        # max_prob = p

        max_acc = test_accuracy
        max_fw = np.squeeze(fw)
        max_bw = np.squeeze(bw)
        max_tl = np.squeeze(tl)
        max_tr = np.squeeze(tr)
        max_ty = ty
        max_py = py
        max_prob = p
        # print(max_fw)
        # print(np.shape(max_fw))

        # w1 = tf.get_variable("head_w_hiddenstl0")
        # print(w1.eval(session=sess))

        # fp = open(FLAGS.prob_file + '_multihead' + str(FLAGS.year) + '.txt', 'w')
        # for y1, y2, item in zip(max_ty, max_py, max_prob):
        #     fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(it) for it in item]) + '\n')
        #
        # with open(FLAGS.prob_file + '_fw_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile:
        #     np.savetxt(outfile, max_fw)
        #
        # with open(FLAGS.prob_file + '_bw_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile:
        #     np.savetxt(outfile, max_bw)
        #
        # with open(FLAGS.prob_file + '_tl_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile:
        #     np.savetxt(outfile, max_tl)
        #
        # with open(FLAGS.prob_file + '_tr_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile:
        #     np.savetxt(outfile, max_tr)

        print('Optimization Finished! Max acc={}'.format(max_acc))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg))

        return training_accuracy, max_acc, totalacc_train, totalacc, test_accuracy_ont, np.where(
            np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(
            ), max_bw.tolist(), max_tl.tolist(), max_tr.tolist()
示例#26
0
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         learning_rate=0.09,
         keep_prob=0.3,
         momentum=0.85,
         l2=0.00001):
    #print_config()
    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)

        with tf.name_scope('inputs'):
            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
            sen_len = tf.placeholder(tf.int32, None)

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
            sen_len_bw = tf.placeholder(tf.int32, [None])

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len])
            tar_len = tf.placeholder(tf.int32, [None])

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss = loss_func(y, prob)
        acc_num, acc_prob = acc_func(y, prob)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        true_y = tf.argmax(y, 1)
        pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        # save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')
        restore_pth = 'savedModel' + str(FLAGS.year)
        meta = '-2444'
        if restore_pth is not None and meta is not None:
            restore_path = restore_pth + '/'
            restore_meta_path = restore_pth + '/' + meta + '.meta'
            restore = tf.train.import_meta_graph(restore_meta_path)
            restore.restore(sess, tf.train.latest_checkpoint(restore_path))

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        train_time = 0
        max_time = 0

        print("number of training instances: {}, number of test instances: {}".
              format(len(tr_y), len(te_y)))

        cost_func_test = []
        cost_func_train = []
        acc_func_train = []
        acc_func_test = []

        i = 0
        converged = False
        all_trainacc = []

        while i < FLAGS.n_iter and converged == False:
            trainacc, trainloss, traincnt = 0., 0., 1
            start_time = time.time()
            #for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word,
            #                                     tr_tar_len,
            #                                    FLAGS.batch_size, keep_prob, keep_prob):
            # _, step = sess.run([optimizer, global_step], feed_dict=train)
            # _, _trainloss, step, summary, _trainacc = sess.run([optimizer, loss, global_step, train_summary_op, acc_num],
            #                                           feed_dict=train)
            #train_summary_writer.add_summary(summary, step)
            # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
            # sess.run(embed_update)
            #trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
            #traincnt += numtrain
            #trainloss += _trainloss * numtrain

            elapsed_time = time.time() - start_time
            train_time += elapsed_time

            acc, cost, cnt = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 2000,
                                            1.0, 1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, true_y, pred_y, prob, alpha_fw,
                            alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                cost += _loss * num
                cnt += num
            print(
                'all samples={}, correct prediction={}, training time={}, training time so far={}'
                .format(cnt, acc, elapsed_time, train_time))
            trainacc = trainacc / traincnt
            acc = acc / cnt
            #totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size
            cost = cost / cnt
            trainloss = trainloss / traincnt
            cost_func_test.append(cost)
            cost_func_train.append(trainloss)
            acc_func_test.append(acc)
            acc_func_train.append(trainacc)
            print(
                'Iter {}: mini-batch loss={:.6f}, train loss={:.6f}, train acc={:.6f}, test acc={:.6f}'
                .format(i, cost, trainloss, trainacc, acc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)

            all_trainacc.append(trainacc)
            if i > 2:  # want to compare current train accuracy with train acc previous iterations
                if (all_trainacc[i] - all_trainacc[i - 1] < 0.001) and (all_trainacc[i - 1] - all_trainacc[i - 2] < 0.001)\
                        and (all_trainacc[i-2] - all_trainacc[i-3] < 0.001):
                    converged = True
            i += 1

            if acc > max_acc:
                max_acc = acc
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p

        P = precision_score(max_ty, max_py, average=None)
        R = recall_score(max_ty, max_py, average=None)
        F1 = f1_score(max_ty, max_py, average=None)
        print('P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        # Plotting chart of training and testing loss as a function of iterations
        iterations = list(range(i))
        plt.plot(iterations, cost_func_train, label='Cost func train')
        plt.plot(iterations, cost_func_test, label='Cost func test')
        plt.title('Model loss k=1')
        plt.ylabel('Loss')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()

        # Plotting chart of training and testing accuracies as a function of iterations
        iterations = list(range(i))
        plt.plot(iterations, acc_func_train, label='Acc func train')
        plt.plot(iterations, acc_func_test, label='Cost func test')
        plt.title('Model accuracy k=1')
        plt.ylabel('Loss')
        plt.xlabel('Iterations')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()

        fp = open(FLAGS.prob_file, 'w')
        for item in max_prob:
            fp.write(' '.join([str(it) for it in item]) + '\n')
        fp = open(FLAGS.prob_file + '_fw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_fw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_bw', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_bw):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tl', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tl):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')
        fp = open(FLAGS.prob_file + '_tr', 'w')
        for y1, y2, ws in zip(max_ty, max_py, max_tr):
            fp.write(
                str(y1) + ' ' + str(y2) + ' ' +
                ' '.join([str(w) for w in ws[0]]) + '\n')

        print('Optimization Finished! Final acc={}'.format(acc))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg))

        return acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist(), \
               max_tl.tolist(), max_tr.tolist()
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         learning_rate=0.09,
         keep_prob=0.5,
         momentum=0.9,
         l2=0.0001):
    print_config()

    word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim)
    word_embedding = tf.constant(w2v, name='word_embedding')

    keep_prob1 = tf.placeholder(tf.float32)
    keep_prob2 = tf.placeholder(tf.float32)

    with tf.name_scope('inputs'):
        x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
        y = tf.placeholder(tf.float32, [None, FLAGS.n_class])
        sen_len = tf.placeholder(tf.int32, None)

        x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len])
        sen_len_bw = tf.placeholder(tf.int32, [None])

        target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len])
        tar_len = tf.placeholder(tf.int32, [None])

    inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
    inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
    target = tf.nn.embedding_lookup(word_embedding, target_words)

    alpha_fw, alpha_bw = None, None
    attention, attention_masked = lcr_rot(inputs_fw, inputs_bw, sen_len,
                                          sen_len_bw, target, tar_len,
                                          keep_prob1, keep_prob2, l2, 'all')

    # loss = loss_func(y, prob)
    # acc_num, acc_prob = acc_func(y, prob)
    global_step = tf.Variable(0, name='tr_global_step', trainable=False)
    # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize(loss,
    #                                                                                                 global_step=global_step)
    # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
    true_y = tf.argmax(y, 1)
    # pred_y = tf.argmax(prob, 1)

    title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
        FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
        FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
        FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        sess.run(tf.global_variables_initializer())

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter(
            train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont, te_y_ont, te_target_word_ont, te_tar_len_ont, _, _, _ = load_inputs_twitter(
            FLAGS.remaining_test_path, word_id_mapping, FLAGS.max_sentence_len,
            'TC', is_r, FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           yi,
                           target,
                           tl,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yi), batch_size, 1, is_shuffle):
                feed_dict = {
                    x: x_f[index],
                    x_bw: x_b[index],
                    y: yi[index],
                    sen_len: sen_len_f[index],
                    sen_len_bw: sen_len_b[index],
                    target_words: target[index],
                    tar_len: tl[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        all_training_losses, all_training_accuracies = [], []
        all_test_losses, all_test_accuracies = [], []

        for i in range(FLAGS.n_iter):
            learning_rate = (0.99) * learning_rate
            number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0.
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw,
                                                  tr_sen_len_bw, tr_y,
                                                  tr_target_word, tr_tar_len,
                                                  FLAGS.batch_size, keep_prob,
                                                  keep_prob):

                step = sess.run([global_step], feed_dict=train)

                # number_of_training_examples_correct += _trainacc
                # number_of_training_examples += numtrain
                # training_loss += _training_loss * numtrain

            number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            m = 1
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_y,
                                            te_target_word, te_tar_len, 3, 1.0,
                                            1.0, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _output_old, _output = sess.run([output_old, output],
                                                    feed_dict=test)
                else:
                    if m == 1:
                        _attention, _attention_masked = sess.run(
                            [attention, attention_masked], feed_dict=test)
                        m += 1
                        print(_attention)
                        print(np.shape(_attention))
                        print(_attention_masked)
                        print(np.shape(_attention_masked))
示例#28
0
def main(train_path,
         test_path,
         accuracyOnt,
         test_size,
         remaining_size,
         momentum=0.85):
    # print_config()
    l2 = FLAGS.l2_reg
    learning_rate = FLAGS.learning_rate

    with tf.device('/gpu:1'):
        word_id_mapping, w2v = load_w2v(FLAGS.embedding_path,
                                        FLAGS.embedding_dim)
        word_embedding = tf.constant(w2v, name='word_embedding')

        keep_prob1 = tf.constant(FLAGS.keep_prob1, tf.float32)
        keep_prob2 = tf.constant(FLAGS.keep_prob2, tf.float32)

        lambda_0 = tf.constant(FLAGS.lambda_0, tf.float32)
        lambda_1 = tf.constant(1 - FLAGS.lambda_0, tf.float32)

        with tf.name_scope('inputs'):
            y_sen = tf.placeholder(tf.float32, [None, FLAGS.n_class],
                                   name='y_sentence_level')
            n_asp = tf.placeholder(tf.int32, [None], name='n_asp')

            x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len],
                               name='x')
            y = tf.placeholder(tf.float32, [None, FLAGS.n_class], name='y')
            sen_len = tf.placeholder(tf.int32, None, name='sentence_length')

            x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len],
                                  name='x_backwards')
            sen_len_bw = tf.placeholder(tf.int32, [None],
                                        name='sentence_length_backwards')

            target_words = tf.placeholder(tf.int32,
                                          [None, FLAGS.max_target_len],
                                          name='target_words')
            tar_len = tf.placeholder(tf.int32, [None], name='target_length')

        inputs_fw = tf.nn.embedding_lookup(word_embedding, x)
        inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw)
        target = tf.nn.embedding_lookup(word_embedding, target_words)

        alpha_fw, alpha_bw = None, None
        prob, prob_sen, _, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot(
            n_asp, inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len,
            keep_prob1, keep_prob2, l2, 'all')

        loss_asp = loss_func(y, prob)
        loss_sen = loss_func(y_sen, prob_sen)
        loss = lambda_1 * loss_asp + lambda_0 * loss_sen
        acc_num, acc_prob, f1_micro, f1_macro, f1_weighted = acc_func(
            y, prob, y_sen, prob_sen, thre=FLAGS.threshold)
        global_step = tf.Variable(0, name='tr_global_step', trainable=False)
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum).minimize(
                                                   loss,
                                                   global_step=global_step)
        # optimizer = train_func(loss, FLAGS.learning_rate, global_step)
        # true_y = tf.argmax(y, 1)
        true_y = y_sen
        pred_y = tf.cast(tf.math.greater_equal(prob_sen, [FLAGS.threshold]),
                         tf.int32)
        # pred_y = tf.argmax(prob, 1)

        title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
            FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
            FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
            FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        import datetime
        # timestamp = str(int(time.time()))
        timestamp = datetime.datetime.now().isoformat()
        _dir = str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        test_f1_micro = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, f1_micro, test_loss, test_acc, test_f1_micro, _dir, title, sess)
        # validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        # saver = saver_func(save_dir)

        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, '/-')

        if FLAGS.is_r == '1':
            is_r = True
        else:
            is_r = False

        tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_y_sen, tr_target_word, tr_tar_len, _, _, _, tr_n_asp = load_inputs_twitter(
            train_path,
            word_id_mapping,
            FLAGS.max_sentence_len,
            'TC',
            is_r,  # reverse
            FLAGS.max_target_len)
        te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_y_sen, te_target_word, te_tar_len, _, _, _, te_n_asp = load_inputs_twitter(
            test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r,
            FLAGS.max_target_len)

        def get_batch_data(x_f,
                           sen_len_f,
                           x_b,
                           sen_len_b,
                           n_asp_b,
                           yi,
                           y_sen_i,
                           target,
                           tl,
                           batch_size,
                           is_shuffle=True):
            # for index in batch_index(len(yi), batch_size, 1, is_shuffle):
            for index in batch_index(len(n_asp_b), batch_size, 1, is_shuffle):
                selected_rows = itemgetter(*index)(list(n_asp_b.values()))
                r_index = []
                for idxs in selected_rows:
                    if idxs != []:
                        r_index.extend(idxs)
                _n_asp = np.asarray(
                    [len(tup) for tup in list(selected_rows) if len(tup) != 0])
                # print(f"length of _n_asp: {_n_asp.shape[0]}")
                feed_dict = {
                    x: x_f[r_index],
                    x_bw: x_b[r_index],
                    y: yi[r_index],
                    y_sen: y_sen_i[index],
                    n_asp: _n_asp,
                    sen_len: sen_len_f[r_index],
                    sen_len_bw: sen_len_b[r_index],
                    target_words: target[r_index],
                    tar_len: tl[r_index]
                }
                yield feed_dict, len(r_index)

        max_acc = 0.
        max_f1 = 0.
        max_fw, max_bw = None, None
        max_tl, max_tr = None, None
        max_ty, max_py = None, None
        max_prob = None
        step = None

        for i in range(FLAGS.n_iter):
            trainacc, trainf1, traincnt, train_batchcnt = 0., 0., 0, 0
            for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_n_asp, tr_y, \
                tr_y_sen, tr_target_word, tr_tar_len, FLAGS.batch_size):
                # _, step = sess.run([optimizer, global_step], feed_dict=train)
                _, step, summary, _trainacc, _trainf1 = sess.run(
                    [
                        optimizer, global_step, train_summary_op, acc_num,
                        f1_micro
                    ],
                    feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)
                trainacc += _trainacc  # saver.save(sess, save_dir, global_step=step)
                trainf1 += _trainf1
                traincnt += numtrain
                train_batchcnt += 1
            acc, f1, cost, cnt, test_batchcnt = 0., 0., 0., 0, 0
            fw, bw, tl, tr, ty, py = [], [], [], [], [], []
            p = []
            for test, num in get_batch_data(te_x, te_sen_len, te_x_bw,
                                            te_sen_len_bw, te_n_asp, te_y,
                                            te_y_sen, te_target_word,
                                            te_tar_len, 2000, False):
                if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN':
                    _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run(
                        [
                            loss, acc_num, alpha_fw, alpha_bw, alpha_t_l,
                            alpha_t_r, true_y, pred_y, prob
                        ],
                        feed_dict=test)
                    fw += list(_fw)
                    bw += list(_bw)
                    tl += list(_tl)
                    tr += list(_tr)
                else:
                    _loss, _acc, _f1, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run(
                        [
                            loss, acc_num, f1_micro, true_y, pred_y, prob,
                            alpha_fw, alpha_bw, alpha_t_l, alpha_t_r
                        ],
                        feed_dict=test)
                ty = np.asarray(_ty)
                py = np.asarray(_py)
                p = np.asarray(_p)
                fw = np.asarray(_fw)
                bw = np.asarray(_bw)
                tl = np.asarray(_tl)
                tr = np.asarray(_tr)
                acc += _acc
                f1 += _f1
                # cost += _loss * num
                cost += _loss
                cnt += num
                test_batchcnt += 1
            print('all samples={}, correct prediction={}'.format(cnt, acc))
            trainacc = trainacc / traincnt
            trainf1 = trainf1 / train_batchcnt
            acc = acc / cnt
            f1 = f1 / test_batchcnt
            totalacc = ((acc * remaining_size) +
                        (accuracyOnt *
                         (test_size - remaining_size))) / test_size
            # cost = cost / cnt
            cost = cost / test_batchcnt
            print(
                'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, train_f1_micro={:.6f}, test acc={:.6f}, \
                test_f1_micro={:.6f}, combined acc={:.6f}'.format(
                    i, cost, trainacc, trainf1, acc, f1, totalacc))
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc,
                                   test_f1_micro: f1
                               })
            test_summary_writer.add_summary(summary, step)
            # if acc > max_acc:
            if f1 > max_f1:
                max_acc = acc
                max_f1 = f1
                max_fw = fw
                max_bw = bw
                max_tl = tl
                max_tr = tr
                max_ty = ty
                max_py = py
                max_prob = p

        # encode training data
        train_feed_dict = {
            x: tr_x,
            x_bw: tr_x_bw,
            y: tr_y,
            y_sen: tr_y_sen,
            n_asp: tr_n_asp,
            sen_len: tr_sen_len,
            sen_len_bw: tr_sen_len_bw,
            target_words: tr_target_word,
            tar_len: tr_target_len
        }
        test_feed_dict = {
            x: te_x,
            x_bw: te_x_bw,
            y: te_y,
            y_sen: te_y_sen,
            n_asp: te_n_asp,
            sen_len: te_sen_len,
            sen_len_bw: te_sen_len_bw,
            target_words: te_target_word,
            tar_len: te_target_len
        }
        tr_outputs = sess.run([outputs], feed_dict=train_feed_dict)
        te_outputs = sess.run([outputs], feed_dict=test_feed_dict)

        with open("results/embeddings/train_emb.npy", 'wb') as f:
            np.save(f, tr_outputs)
        with open("results/embeddings/test_emb.npy", 'wb') as f:
            np.save(f, te_outputs)

        P = precision_score(max_ty, max_py, average='micro')
        R = recall_score(max_ty, max_py, average='micro')
        F1 = f1_score(max_ty, max_py, average='micro')
        print('(Individual aspect) P:', P, 'avg=', sum(P) / FLAGS.n_class)
        print('(Individual aspect) R:', R, 'avg=', sum(R) / FLAGS.n_class)
        print('(Individual aspect) F1:', F1, 'avg=', sum(F1) / FLAGS.n_class)

        prob_data = {
            'forward_att': max_fw,
            'backward_att': max_bw,
            'target_left_att': max_tl,
            'target_right_att': max_tr,
            'true': max_ty,
            'predict': max_py
        }
        fp = open(FLAGS.prob_file, 'w')
        pickle.dump(prob_data, fp)
        # for item in max_prob:
        #     fp.write(' '.join([str(it) for it in item]) + '\n')
        # fp = open(FLAGS.prob_file + '_fw', 'w')
        # for y1, y2, ws in zip(max_ty, max_py, max_fw):
        #     fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        # fp = open(FLAGS.prob_file + '_bw', 'w')
        # for y1, y2, ws in zip(max_ty, max_py, max_bw):
        #     fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        # fp = open(FLAGS.prob_file + '_tl', 'w')
        # for y1, y2, ws in zip(max_ty, max_py, max_tl):
        #     fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')
        # fp = open(FLAGS.prob_file + '_tr', 'w')
        # for y1, y2, ws in zip(max_ty, max_py, max_tr):
        #     fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n')

        print('Optimization Finished! Max acc={}, Max micro f1={}'.format(
            max_acc, max_f1))

        print(
            'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
            .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
                    FLAGS.n_hidden, FLAGS.l2_reg))

        return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0,
                                 1), max_fw.tolist(), max_bw.tolist(
                                 ), max_tl.tolist(), max_tr.tolist()