示例#1
0
    def __init__(self):
        self.Vocabulary_Size = 0

        self.l = tf.placeholder(tf.float32, [],
                                name='l')  # Gradient reversal scaler

        self.dataset = Combined_Data_Processor.Model()
        self.bi_dataset = BiCorpus_Data_Processor.Data_holder()

        self.SE = Sentence_Representation.Conv_Rep()
        self.Fea_GEN = FT.Feature_Translator(length=50)

        self.Word_Embedding_Dimension = 100

        self.Y_ = tf.placeholder(dtype=tf.int32, shape=[None])
        self.Y_2 = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 1])
        self.X_P = tf.placeholder(
            dtype=tf.float32,
            shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Q = tf.placeholder(
            dtype=tf.float32,
            shape=[None, None, self.Word_Embedding_Dimension])

        self.X_Eng = tf.placeholder(
            dtype=tf.float32,
            shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Kor = tf.placeholder(
            dtype=tf.float32,
            shape=[None, None, self.Word_Embedding_Dimension])
示例#2
0
    def __init__(self):
        self.dataset = Combined_Data_Processor.Model()
        self.bi_dataset = BiCorpus_Data_Processor.Data_holder()

        self.SE = Sentence_Representation.Conv_Rep()
        self.Fea_GEN = FT.Feature_Translator(length=50)

        self.Word_Embedding_Dimension = 100

        self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 2])
        self.X_P = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Q = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])

        self.X_Eng = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Kor = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
示例#3
0
    def __init__(self):
        """
        추가해야 할 것:
        word2vec 읽어와서 tensor로 변환하여 넘겨주기
        """

        """
        english glove
        """
        in_path_glove = "C:\\Users\\Administrator\\Desktop\\qadataset\\glove6B100d.txt"
        glove_f = codecs.open(in_path_glove, 'r', 'utf-8')

        self.words = []
        self.vectors = []

        arr = []
        for i in range(100):
            pm = 1

            if i % 2 == 0:
                pm = -1

            arr.append(0.002 * pm * i)
        self.words.append('#END')
        self.vectors.append(arr)

        arr = []
        for i in range(100):
            pm = 1

            if i % 2 == 0:
                pm = 0.1
            elif i % 3 == 0:
                pm = -1

            arr.append(0.1 * pm)
        self.words.append('#START')
        self.vectors.append(arr)

        for line in glove_f:
            tokens = line.split(' ')
            self.words.append(tokens.pop(0))
            self.vectors.append(tokens)

        self.vectors = numpy.array((self.vectors), 'f').reshape((-1, self.Word_Embedding_Dimension))

        self.dictionary = numpy.array(self.words)
        self.glove_arg_index = self.dictionary.argsort()
        self.dictionary.sort()
        ###############

        self.word_embedding_eng_tensor = tf.convert_to_tensor(self.vectors, dtype=tf.float32, name='eng_embedding')
        del self.vectors

        """
        korean embedding
        """
        word2vec_kor = codecs.open('C:\\Users\\Administrator\\Desktop\\qadataset\\kor_word2vec_100d', 'r', 'utf-8')
        self.kor_words = []
        self.kor_vectors = []

        arr = []
        for i in range(100):
            pm = 1

            if i % 2 == 0:
                pm = -1

            arr.append(0.002 * pm * i)
        self.kor_words.append('#END')
        self.kor_vectors.append(arr)

        arr = []
        for i in range(100):
            pm = 1

            if i % 2 == 0:
                pm = 0.1
            elif i % 3 == 0:
                pm = -1

            arr.append(0.1 * pm)
        self.kor_words.append('#START')
        self.kor_vectors.append(arr)

        for line in word2vec_kor:
            tokens = line.split('\t')
            self.kor_words.append(tokens.pop(0))
            self.kor_vectors.append(tokens)

        print(self.kor_words[0])
        print(self.kor_words[1])

        self.kor_dictionary = numpy.array(self.kor_words)
        self.word2vec_arg_index = self.kor_dictionary.argsort()
        self.kor_dictionary.sort()
        ###################

        self.word_embedding_kor_tensor = tf.convert_to_tensor(self.kor_vectors, dtype=tf.float32, name='kor_embedding')
        del self.kor_vectors

        self.l = tf.placeholder(tf.float32, [], name='l')  # Gradient reversal scaler

        self.dataset = Combined_Data_Processor.Model()
        self.bi_dataset = BiCorpus_Data_Processor.Data_holder()

        self.SE = Sentence_Representation.Conv_Rep()
        self.Fea_GEN = FT.Feature_Translator(length=50)

        self.Word_Embedding_Dimension = 100

        self.Y_ = tf.placeholder(dtype=tf.int32, shape=[None])
        self.Y_2 = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Y = tf.placeholder(dtype=tf.float32, shape=[None, 2])
        self.X_P = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Q = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])

        self.X_Eng = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])
        self.X_Kor = tf.placeholder(dtype=tf.float32, shape=[None, None, self.Word_Embedding_Dimension])

        self.eng_vocab_size = self.dictionary.shape[0]
        self.kor_vocab_size = self.kor_dictionary.shape[0]

        self.eng_start_token = tf.placeholder(dtype=tf.int32, shape=[None, 1])
        self.eng_end_token = tf.placeholder(dtype=tf.int32, shape=[None, 1])
        self.kor_start_token = tf.placeholder(dtype=tf.int32, shape=[None, 1])
        self.kor_end_token = tf.placeholder(dtype=tf.int32, shape=[None, 1])

        self.encoder_inputs_eng = tf.placeholder(dtype=tf.int32, shape=[None, None])
        self.encoder_inputs_eng_q = tf.placeholder(dtype=tf.int32, shape=[None, None])
        self.encoder_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None])

        self.shared_inputs_eng = tf.placeholder(dtype=tf.int32, shape=[None, None])
        self.shared_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None])

        self.decoder_inputs_eng = tf.concat([self.eng_start_token, enco])
        self.decoder_inputs_kor = tf.placeholder(dtype=tf.int32, shape=[None, None])

        self.class_label = tf.placeholder(dtype=tf.float32, shape=[None, None])
        self.domain_label = tf.placeholder(dtype=tf.float32, shape=[None, None])

        self.hidden_size = 200
        self.keep_prob = 0.8

        self.encoder_eng_length = seq_length(self.encoder_inputs_eng)
        self.encoder_kor_length = seq_length(self.encoder_inputs_kor)

        self.shared_length_eng = seq_length(self.shared_inputs_eng)
        self.shared_length_kor = seq_length(self.shared_inputs_kor)

        self.attention_hidden_size = 400
        self.batch_size = 64
        self.max_decoder_length = 50

        self.embedding_size = 100

        self.word_embedding_eng = tf.get_variable("encoder_embeddings",
                                                  shape=[self.eng_vocab_size, self.embedding_size],
                                                  dtype=tf.float32, trainable=True,
                                                  initializer=tf.constant_initializer(self.word_embedding_eng_tensor))
        self.word_embedding_kor = tf.get_variable("decoder_embeddings",
                                                  shape=[self.kor_vocab_size, self.embedding_size],
                                                  dtype=tf.float32, trainable=True,
                                                  initializer=tf.constant_initializer(self.word_embedding_kor_tensor))