Exemplo n.º 1
0
    def __init__(self, kb, size, num_buckets, rel2seq, batch_size, learning_rate=1e-2):
        self._kb = kb
        self._size = size
        self._batch_size = batch_size
        self._rel2seq = rel2seq
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name="lr")
        self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.0)
        l_count = dict()
        total = 0
        max_l = 0
        self._vocab = {"#PADDING#": 0}
        for (rel, _, _), _, typ in kb.get_all_facts():
            s = self._rel2seq(rel)
            l = len(s)
            for word in s:
                if word not in self._vocab:
                    self._vocab[word] = len(self._vocab)
            max_l = max(max_l, l)
            if l not in l_count:
                l_count[l] = 0
            l_count[l] += 1
            total += 1
        self._seq_inputs = [tf.placeholder(tf.int64, shape=[None], name="seq_input%d" % i)
                            for i in xrange(max_l)]
        with vs.variable_scope("composition", initializer=model.default_init()):
            seq_outputs = self._comp_f()
        self._bucket_outputs = []
        ct = 0
        self._buckets = []
        for l in xrange(max_l):
            c = l_count.get(l)
            if c:
                ct += c
                if ct % (total / num_buckets) < c:
                    self._bucket_outputs.append(seq_outputs[l])
                    self._buckets.append(l)
        if len(self._buckets) >= num_buckets:
            self._buckets[-1] = max_l
            self._bucket_outputs[-1] = seq_outputs[-1]
        else:
            self._buckets.append(max_l)
            self._bucket_outputs.append(seq_outputs[-1])

        self._input = [[0]*self._batch_size for _ in xrange(max_l)]  # fill input with padding
        self._feed_dict = dict()
        train_params = filter(lambda v: "composition" in v.name, tf.trainable_variables())
        self._grad = tf.placeholder(tf.float32, shape=[None, self._size], name="rel_grad")
        self._grad_in = np.zeros((self._batch_size, self._size), dtype=np.float32)
        self._grads = [tf.gradients(o, train_params, self._grad) for o in self._bucket_outputs]
        self._bucket_update = [self.opt.apply_gradients(zip(grads, train_params))
                               for o, grads in zip(self._bucket_outputs, self._grads)]
Exemplo n.º 2
0
    def __init__(self, kb, size, batch_size, is_train=True, num_neg=200, learning_rate=1e-2, l2_lambda=0.0,
                 is_batch_training=False):
        self._kb = kb
        self._size = size
        self._batch_size = batch_size
        self._is_batch_training = is_batch_training
        self._is_train = is_train
        self._init = model.default_init()
        with vs.variable_scope(self.name(), initializer=self._init):
            self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name="lr")
            self.global_step = tf.Variable(0, trainable=False, name="step")
            with tf.device("/cpu:0"):
                if is_batch_training:
                    self.opt = rprop.RPropOptimizer()  # tf.train.GradientDescentOptimizer(self.learning_rate)
                else:
                    self.opt = tf.train.AdamOptimizer(self.learning_rate, beta1=0.0)
            self._init_inputs()
            with vs.variable_scope("score", initializer=self._init):
                self._scores = self._scoring_f()

        if is_train or is_batch_training:
            assert batch_size % (num_neg+1) == 0, "Batch size must be multiple of num_neg+1 during training"
            #with vs.variable_scope("score", initializer=init):
            #    tf.get_variable_scope().reuse_variables()
            #    for i in xrange(num_neg):
            #        self.triple_inputs.append((tf.placeholder(tf.int64, shape=[None], name="rel_%d" % (i+1)),
            #                                   tf.placeholder(tf.int64, shape=[None], name="subj_%d" % (i+1)),
            #                                   tf.placeholder(tf.int64, shape=[None], name="obj_%d" % (i+1))))
            #        self.scores.append(
            #            self._scoring_f(self.triple_inputs[i+1][0], self.triple_inputs[i+1][1], self.triple_inputs[i+1][2]))

            num_pos = int(batch_size/(num_neg+1))
            scores = tf.reshape(self._scores, [num_pos, num_neg + 1])
            labels = np.zeros([num_pos, num_neg+1], dtype=np.float32)
            labels[:, 0] = 1
            labels = tf.constant(labels, name="labels_constant", dtype=tf.float32)
            loss = math_ops.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(scores, labels))

            train_params = filter(lambda v: self.name() in v.name, tf.trainable_variables())

            self.training_weight = tf.Variable(float(learning_rate), trainable=False, name="training_weight")
            self._feed_dict[self.training_weight] = np.array([1.0], dtype=np.float32)
            with tf.device("/cpu:0"):
                #clipped_gradients = _clip_by_value(self.grads, -max_grad, max_grad)
                if is_batch_training:
                    self._grads = tf.gradients(loss, train_params, self.training_weight)
                    with vs.variable_scope("batch_gradient", initializer=self._init):
                        self._acc_gradients = map(lambda param: tf.get_variable(param.name.split(":")[0],
                                                                                param.get_shape(), param.dtype,
                                                                                tf.constant_initializer(0.0), False),
                                                  train_params)
                    self._loss = tf.get_variable("acc_loss", (), tf.float32, tf.constant_initializer(0.0), False)
                    # We abuse the gradient descent optimizer for accumulating gradients and loss (summing)
                    acc_opt = tf.train.GradientDescentOptimizer(-1.0)
                    self._accumulate_gradients = acc_opt.apply_gradients(zip(self._grads, self._acc_gradients))
                    self._acc_loss = acc_opt.apply_gradients([(loss, self._loss)])

                    self._update = self.opt.apply_gradients(
                        zip(map(lambda v: v.value(), self._acc_gradients), train_params), global_step=self.global_step)
                    self._reset = map(lambda param: param.initializer, self._acc_gradients)
                    self._reset.append(self._loss.initializer)
                else:
                    self._loss = loss / math_ops.cast(num_pos, dtypes.float32)
                    in_params = self._input_params()
                    if not in_params:
                        self._grads = tf.gradients(self._loss, train_params, self.training_weight)
                    else:
                        self._grads = tf.gradients(self._loss, train_params + in_params, self.training_weight)
                        self._input_grads = self._grads[len(train_params):]
                    if len(train_params) > 0:
                        self._update = self.opt.apply_gradients(zip(self._grads[:len(train_params)], train_params),
                                                                global_step=self.global_step)

            if l2_lambda > 0.0:
                l2 = tf.reduce_sum(array_ops.pack([tf.nn.l2_loss(t) for t in train_params]))
                l2_loss = l2_lambda * l2
                if is_batch_training:
                    l2_grads = tf.gradients(l2_loss, train_params)
                    self._l2_accumulate_gradients = acc_opt.apply_gradients(zip(l2_grads, self._acc_gradients))
                    self._l2_acc_loss = acc_opt.apply_gradients([(l2_loss, self._loss)])
                else:
                    self._l2_update = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(l2_loss, var_list=train_params)

        self.saver = tf.train.Saver(tf.all_variables())
Exemplo n.º 3
0
    def __init__(self,
                 kb,
                 size,
                 num_buckets,
                 rel2seq,
                 batch_size,
                 learning_rate=1e-2):
        self._kb = kb
        self._size = size
        self._batch_size = batch_size
        self._rel2seq = rel2seq
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         name="lr")
        self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                          beta1=0.0)
        l_count = dict()
        total = 0
        max_l = 0
        self._vocab = {"#PADDING#": 0}
        for (rel, _, _), _, typ in kb.get_all_facts():
            s = self._rel2seq(rel)
            l = len(s)
            for word in s:
                if word not in self._vocab:
                    self._vocab[word] = len(self._vocab)
            max_l = max(max_l, l)
            if l not in l_count:
                l_count[l] = 0
            l_count[l] += 1
            total += 1
        self._seq_inputs = [
            tf.placeholder(tf.int64, shape=[None], name="seq_input%d" % i)
            for i in xrange(max_l)
        ]
        with vs.variable_scope("composition",
                               initializer=model.default_init()):
            seq_outputs = self._comp_f()
        self._bucket_outputs = []
        ct = 0
        self._buckets = []
        for l in xrange(max_l):
            c = l_count.get(l)
            if c:
                ct += c
                if ct % (total / num_buckets) < c:
                    self._bucket_outputs.append(seq_outputs[l])
                    self._buckets.append(l)
        if len(self._buckets) >= num_buckets:
            self._buckets[-1] = max_l
            self._bucket_outputs[-1] = seq_outputs[-1]
        else:
            self._buckets.append(max_l)
            self._bucket_outputs.append(seq_outputs[-1])

        self._input = [[0] * self._batch_size
                       for _ in xrange(max_l)]  # fill input with padding
        self._feed_dict = dict()
        train_params = filter(lambda v: "composition" in v.name,
                              tf.trainable_variables())
        self._grad = tf.placeholder(tf.float32,
                                    shape=[None, self._size],
                                    name="rel_grad")
        self._grad_in = np.zeros((self._batch_size, self._size),
                                 dtype=np.float32)
        self._grads = [
            tf.gradients(o, train_params, self._grad)
            for o in self._bucket_outputs
        ]
        self._bucket_update = [
            self.opt.apply_gradients(zip(grads, train_params))
            for o, grads in zip(self._bucket_outputs, self._grads)
        ]
Exemplo n.º 4
0
    def __init__(self,
                 kb,
                 size,
                 batch_size,
                 is_train=True,
                 num_neg=200,
                 learning_rate=1e-2,
                 l2_lambda=0.0,
                 is_batch_training=False):
        self._kb = kb
        self._size = size
        self._batch_size = batch_size
        self._is_batch_training = is_batch_training
        self._is_train = is_train
        self._init = model.default_init()
        with vs.variable_scope(self.name(), initializer=self._init):
            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False,
                                             name="lr")
            self.global_step = tf.Variable(0, trainable=False, name="step")
            with tf.device("/cpu:0"):
                if is_batch_training:
                    self.opt = rprop.RPropOptimizer(
                    )  # tf.train.GradientDescentOptimizer(self.learning_rate)
                else:
                    self.opt = tf.train.AdamOptimizer(self.learning_rate,
                                                      beta1=0.0)
            self._init_inputs()
            with vs.variable_scope("score", initializer=self._init):
                self._scores = self._scoring_f()

        if is_train or is_batch_training:
            assert batch_size % (
                num_neg + 1
            ) == 0, "Batch size must be multiple of num_neg+1 during training"
            #with vs.variable_scope("score", initializer=init):
            #    tf.get_variable_scope().reuse_variables()
            #    for i in xrange(num_neg):
            #        self.triple_inputs.append((tf.placeholder(tf.int64, shape=[None], name="rel_%d" % (i+1)),
            #                                   tf.placeholder(tf.int64, shape=[None], name="subj_%d" % (i+1)),
            #                                   tf.placeholder(tf.int64, shape=[None], name="obj_%d" % (i+1))))
            #        self.scores.append(
            #            self._scoring_f(self.triple_inputs[i+1][0], self.triple_inputs[i+1][1], self.triple_inputs[i+1][2]))

            num_pos = int(batch_size / (num_neg + 1))
            scores = tf.reshape(self._scores, [num_pos, num_neg + 1])
            labels = np.zeros([num_pos, num_neg + 1], dtype=np.float32)
            labels[:, 0] = 1
            labels = tf.constant(labels,
                                 name="labels_constant",
                                 dtype=tf.float32)
            loss = math_ops.reduce_sum(
                tf.nn.softmax_cross_entropy_with_logits(scores, labels))

            train_params = filter(lambda v: self.name() in v.name,
                                  tf.trainable_variables())

            self.training_weight = tf.Variable(float(learning_rate),
                                               trainable=False,
                                               name="training_weight")
            self._feed_dict[self.training_weight] = np.array([1.0],
                                                             dtype=np.float32)
            with tf.device("/cpu:0"):
                #clipped_gradients = _clip_by_value(self.grads, -max_grad, max_grad)
                if is_batch_training:
                    self._grads = tf.gradients(loss, train_params,
                                               self.training_weight)
                    with vs.variable_scope("batch_gradient",
                                           initializer=self._init):
                        self._acc_gradients = map(
                            lambda param: tf.get_variable(
                                param.name.split(":")[0], param.
                                get_shape(), param.dtype,
                                tf.constant_initializer(0.0), False),
                            train_params)
                    self._loss = tf.get_variable("acc_loss", (), tf.float32,
                                                 tf.constant_initializer(0.0),
                                                 False)
                    # We abuse the gradient descent optimizer for accumulating gradients and loss (summing)
                    acc_opt = tf.train.GradientDescentOptimizer(-1.0)
                    self._accumulate_gradients = acc_opt.apply_gradients(
                        zip(self._grads, self._acc_gradients))
                    self._acc_loss = acc_opt.apply_gradients([(loss,
                                                               self._loss)])

                    self._update = self.opt.apply_gradients(
                        zip(map(lambda v: v.value(), self._acc_gradients),
                            train_params),
                        global_step=self.global_step)
                    self._reset = map(lambda param: param.initializer,
                                      self._acc_gradients)
                    self._reset.append(self._loss.initializer)
                else:
                    self._loss = loss / math_ops.cast(num_pos, dtypes.float32)
                    in_params = self._input_params()
                    if not in_params:
                        self._grads = tf.gradients(self._loss, train_params,
                                                   self.training_weight)
                    else:
                        self._grads = tf.gradients(self._loss,
                                                   train_params + in_params,
                                                   self.training_weight)
                        self._input_grads = self._grads[len(train_params):]
                    if len(train_params) > 0:
                        self._update = self.opt.apply_gradients(
                            zip(self._grads[:len(train_params)], train_params),
                            global_step=self.global_step)

            if l2_lambda > 0.0:
                l2 = tf.reduce_sum(
                    array_ops.pack([tf.nn.l2_loss(t) for t in train_params]))
                l2_loss = l2_lambda * l2
                if is_batch_training:
                    l2_grads = tf.gradients(l2_loss, train_params)
                    self._l2_accumulate_gradients = acc_opt.apply_gradients(
                        zip(l2_grads, self._acc_gradients))
                    self._l2_acc_loss = acc_opt.apply_gradients([(l2_loss,
                                                                  self._loss)])
                else:
                    self._l2_update = tf.train.GradientDescentOptimizer(
                        self.learning_rate).minimize(l2_loss,
                                                     var_list=train_params)

        self.saver = tf.train.Saver(tf.all_variables())