def __init__(self): self.Vocabulary_Size = 0 self.l = tf.placeholder(tf.float32, [], name='l') # Gradient reversal scaler self.dataset = Combined_Data_Processor.Model() self.bi_dataset = BiCorpus_Data_Processor.Data_holder() self.SE = Sentence_Representation.Conv_Rep() self.Fea_GEN = FT.Feature_Translator(length=50) self.Word_Embedding_Dimension = 100 self.Y_ = tf.placeholder(dtype=tf.int32, shape=[None]) self.Y_2 = tf.placeholder(dtype=tf.int32, shape=[None]) self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 1]) self.X_P = tf.placeholder( dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Q = tf.placeholder( dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Eng = tf.placeholder( dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Kor = tf.placeholder( dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
def __init__(self): self.dataset = Combined_Data_Processor.Model() self.bi_dataset = BiCorpus_Data_Processor.Data_holder() self.SE = Sentence_Representation.Conv_Rep() self.Fea_GEN = FT.Feature_Translator(length=50) self.Word_Embedding_Dimension = 100 self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 2]) self.X_P = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Q = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Eng = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Kor = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
def __init__(self): """ 추가해야 할 것: word2vec 읽어와서 tensor로 변환하여 넘겨주기 """ """ english glove """ in_path_glove = "C:\\Users\\Administrator\\Desktop\\qadataset\\glove6B100d.txt" glove_f = codecs.open(in_path_glove, 'r', 'utf-8') self.words = [] self.vectors = [] arr = [] for i in range(100): pm = 1 if i % 2 == 0: pm = -1 arr.append(0.002 * pm * i) self.words.append('#END') self.vectors.append(arr) arr = [] for i in range(100): pm = 1 if i % 2 == 0: pm = 0.1 elif i % 3 == 0: pm = -1 arr.append(0.1 * pm) self.words.append('#START') self.vectors.append(arr) for line in glove_f: tokens = line.split(' ') self.words.append(tokens.pop(0)) self.vectors.append(tokens) self.vectors = numpy.array((self.vectors), 'f').reshape((-1, self.Word_Embedding_Dimension)) self.dictionary = numpy.array(self.words) self.glove_arg_index = self.dictionary.argsort() self.dictionary.sort() ############### self.word_embedding_eng_tensor = tf.convert_to_tensor(self.vectors, dtype=tf.float32, name='eng_embedding') del self.vectors """ korean embedding """ word2vec_kor = codecs.open('C:\\Users\\Administrator\\Desktop\\qadataset\\kor_word2vec_100d', 'r', 'utf-8') self.kor_words = [] self.kor_vectors = [] arr = [] for i in range(100): pm = 1 if i % 2 == 0: pm = -1 arr.append(0.002 * pm * i) self.kor_words.append('#END') self.kor_vectors.append(arr) arr = [] for i in range(100): pm = 1 if i % 2 == 0: pm = 0.1 elif i % 3 == 0: pm = -1 arr.append(0.1 * pm) self.kor_words.append('#START') self.kor_vectors.append(arr) for line in word2vec_kor: tokens = line.split('\t') self.kor_words.append(tokens.pop(0)) self.kor_vectors.append(tokens) print(self.kor_words[0]) print(self.kor_words[1]) self.kor_dictionary = numpy.array(self.kor_words) self.word2vec_arg_index = self.kor_dictionary.argsort() self.kor_dictionary.sort() ################### self.word_embedding_kor_tensor = tf.convert_to_tensor(self.kor_vectors, dtype=tf.float32, name='kor_embedding') del self.kor_vectors self.l = tf.placeholder(tf.float32, [], name='l') # Gradient reversal scaler self.dataset = Combined_Data_Processor.Model() self.bi_dataset = BiCorpus_Data_Processor.Data_holder() self.SE = Sentence_Representation.Conv_Rep() self.Fea_GEN = FT.Feature_Translator(length=50) self.Word_Embedding_Dimension = 100 self.Y_ = tf.placeholder(dtype=tf.int32, shape=[None]) self.Y_2 = tf.placeholder(dtype=tf.int32, shape=[None]) self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 2]) self.X_P = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Q = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Eng = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.X_Kor = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension]) self.eng_vocab_size = self.dictionary.shape[0] self.kor_vocab_size = self.kor_dictionary.shape[0] self.eng_start_token = tf.placeholder(dtype=tf.int32, shape=[None, 1]) self.eng_end_token = tf.placeholder(dtype=tf.int32, shape=[None, 1]) self.kor_start_token = tf.placeholder(dtype=tf.int32, shape=[None, 1]) self.kor_end_token = tf.placeholder(dtype=tf.int32, shape=[None, 1]) self.encoder_inputs_eng = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.encoder_inputs_eng_q = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.encoder_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.shared_inputs_eng = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.shared_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.decoder_inputs_eng = tf.concat([self.eng_start_token, enco]) self.decoder_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None]) self.class_label = tf.placeholder(dtype=tf.float32, shape=[None, None]) self.domain_label = tf.placeholder(dtype=tf.float32, shape=[None, None]) self.hidden_size = 200 self.keep_prob = 0.8 self.encoder_eng_length = seq_length(self.encoder_inputs_eng) self.encoder_kor_length = seq_length(self.encoder_inputs_kor) self.shared_length_eng = seq_length(self.shared_inputs_eng) self.shared_length_kor = seq_length(self.shared_inputs_kor) self.attention_hidden_size = 400 self.batch_size = 64 self.max_decoder_length = 50 self.embedding_size = 100 self.word_embedding_eng = tf.get_variable("encoder_embeddings", shape=[self.eng_vocab_size, self.embedding_size], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(self.word_embedding_eng_tensor)) self.word_embedding_kor = tf.get_variable("decoder_embeddings", shape=[self.kor_vocab_size, self.embedding_size], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(self.word_embedding_kor_tensor))