Пример #1
0
    def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq,
              entropy, log_prob):
      indices = tf.range(0, layer_id, dtype=tf.int32)
      start_id = 4 * (layer_id - 2)
      prev_layers = []
      for i in range(2):  # index_1, index_2
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        query = anchors_w_1.gather(indices)
        query = tf.reshape(query, [layer_id, self.lstm_size])
        query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
        query = tf.matmul(query, self.v_attn)
        logits = tf.reshape(query, [1, layer_id])
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)
        index = tf.multinomial(logits, 1)
        index = tf.to_int32(index)
        index = tf.reshape(index, [1])
        arc_seq = arc_seq.write(start_id + 2 * i, index)
        curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=index)
        log_prob += curr_log_prob
        curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits(
          logits=logits, labels=tf.nn.softmax(logits)))
        entropy += curr_ent
        prev_layers.append(anchors.read(tf.reduce_sum(index)))
        inputs = prev_layers[-1]

      for i in range(2):  # op_1, op_2
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          op_tanh = self.tanh_constant / self.op_tanh_reduce
          logits = op_tanh * tf.tanh(logits)
        if use_bias:
          logits += self.b_soft_no_learn
        op_id = tf.multinomial(logits, 1)
        op_id = tf.to_int32(op_id)
        op_id = tf.reshape(op_id, [1])
        arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
        curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=op_id)
        log_prob += curr_log_prob
        curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits(
          logits=logits, labels=tf.nn.softmax(logits)))
        entropy += curr_ent
        inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      anchors = anchors.write(layer_id, next_h[-1])
      anchors_w_1 = anchors_w_1.write(layer_id, tf.matmul(next_h[-1], self.w_attn_1))
      inputs = self.g_emb

      return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
              arc_seq, entropy, log_prob)
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        arc_seq = []
        sample_log_probs = []
        all_h = []

        # sampler ops
        inputs = self.g_emb
        prev_c = [
            tf.zeros([1, self.lstm_size], dtype=tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]
        prev_h = [
            tf.zeros([1, self.lstm_size], dtype=tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]
        for layer_id in xrange(self.num_layers):
            for branch_id in xrange(self.num_branches):
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
                all_h.append(tf.stop_gradient(next_h[-1]))

                logits = tf.matmul(next_h[-1], self.w_soft)
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)

                config_id = tf.multinomial(logits, 1)  # Deprecated
                # config_id = tf.random.categorical(logits, 1)

                config_id = tf.cast(config_id, tf.int32)
                config_id = tf.reshape(config_id, [1])
                arc_seq.append(config_id)
                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=config_id
                )
                sample_log_probs.append(log_prob)

                inputs = tf.nn.embedding_lookup(self.w_emb, config_id)
        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(
            tf.reduce_sum(self.sample_log_probs)
            / tf.cast(self.num_layers * self.num_branches, tf.float32)
        )
        self.all_h = all_h
Пример #3
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        arc_seq = []
        sample_log_probs = []
        sample_entropy = []
        all_h = []
        all_h_w = []

        # sampler ops
        inputs = self.g_emb
        prev_c, prev_h = [], []
        for _ in range(self.lstm_num_layers):
            prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))
            prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))

        # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32)
        for layer_id in range(self.rhn_depth):
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            all_h.append(next_h[-1])
            all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1))

            if layer_id > 0:
                query = tf.matmul(next_h[-1], self.attn_w_2)
                query = query + tf.concat(all_h_w[:-1], axis=0)
                query = tf.tanh(query)
                logits = tf.matmul(query, self.attn_v)
                logits = tf.reshape(logits, [1, layer_id])

                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                diff = tf.to_float(layer_id - tf.range(0, layer_id))**2
                logits -= tf.reshape(diff, [1, layer_id]) / 6.0

                skip_index = tf.compat.v1.random.categorical(logits,
                                                             1,
                                                             dtype=tf.int32)
                skip_index = tf.reshape(skip_index, [1])
                arc_seq.append(skip_index)

                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=skip_index)
                sample_log_probs.append(log_prob)

                entropy = log_prob * tf.exp(-log_prob)
                sample_entropy.append(tf.stop_gradient(entropy))

                inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0),
                                                skip_index)
                inputs /= (0.1 + tf.to_float(layer_id - skip_index))
            else:
                inputs = self.g_emb

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            logits = tf.matmul(next_h[-1], self.w_soft)
            if self.temperature is not None:
                logits /= self.temperature
            if self.tanh_constant is not None:
                logits = self.tanh_constant * tf.tanh(logits)
            func = tf.compat.v1.random.categorical(logits, 1, dtype=tf.int32)
            func = tf.reshape(func, [1])
            arc_seq.append(func)
            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=func)
            sample_log_probs.append(log_prob)
            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))
            inputs = tf.nn.embedding_lookup(self.w_emb, func)

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

        sample_entropy = tf.concat(sample_entropy, axis=0)
        self.sample_entropy = tf.reduce_sum(sample_entropy)

        self.all_h = all_h
Пример #4
0
  def _build_sampler(self):
    """Build the sampler ops and the log_prob ops."""

    print "-" * 80
    print "Build controller sampler"
    anchors = []
    anchors_w_1 = []

    arc_seq = []
    entropys = []
    log_probs = []
    skip_count = []
    skip_penaltys = []

    prev_c = [tf.zeros([1, self.lstm_size], tf.float32) for _ in
              xrange(self.lstm_num_layers)]
    prev_h = [tf.zeros([1, self.lstm_size], tf.float32) for _ in
              xrange(self.lstm_num_layers)]
    inputs = self.g_emb
    skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target],
                               dtype=tf.float32)
    for layer_id in xrange(self.num_layers):
      if self.search_whole_channels:
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        logit = tf.matmul(next_h[-1], self.w_soft)
        if self.temperature is not None:
          logit /= self.temperature
        if self.tanh_constant is not None:
          logit = self.tanh_constant * tf.tanh(logit)
        if self.search_for == "macro" or self.search_for == "branch":
          branch_id = tf.multinomial(logit, 1)
          branch_id = tf.to_int32(branch_id)
          branch_id = tf.reshape(branch_id, [1])
        elif self.search_for == "connection":
          branch_id = tf.constant([0], dtype=tf.int32)
        else:
          raise ValueError("Unknown search_for {}".format(self.search_for))
        arc_seq.append(branch_id)
        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logit, labels=branch_id)
        log_probs.append(log_prob)
        entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
        entropys.append(entropy)
        inputs = tf.nn.embedding_lookup(self.w_emb, branch_id)
      else:
        for branch_id in xrange(self.num_branches):
          next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
          prev_c, prev_h = next_c, next_h
          logit = tf.matmul(next_h[-1], self.w_soft["start"][branch_id])
          if self.temperature is not None:
            logit /= self.temperature
          if self.tanh_constant is not None:
            logit = self.tanh_constant * tf.tanh(logit)
          start = tf.multinomial(logit, 1)
          start = tf.to_int32(start)
          start = tf.reshape(start, [1])
          arc_seq.append(start)
          log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logit, labels=start)
          log_probs.append(log_prob)
          entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
          entropys.append(entropy)
          inputs = tf.nn.embedding_lookup(self.w_emb["start"][branch_id], start)

          next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
          prev_c, prev_h = next_c, next_h
          logit = tf.matmul(next_h[-1], self.w_soft["count"][branch_id])
          if self.temperature is not None:
            logit /= self.temperature
          if self.tanh_constant is not None:
            logit = self.tanh_constant * tf.tanh(logit)
          mask = tf.range(0, limit=self.out_filters-1, delta=1, dtype=tf.int32)
          mask = tf.reshape(mask, [1, self.out_filters - 1])
          mask = tf.less_equal(mask, self.out_filters-1 - start)
          logit = tf.where(mask, x=logit, y=tf.fill(tf.shape(logit), -np.inf))
          count = tf.multinomial(logit, 1)
          count = tf.to_int32(count)
          count = tf.reshape(count, [1])
          arc_seq.append(count + 1)
          log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logit, labels=count)
          log_probs.append(log_prob)
          entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
          entropys.append(entropy)
          inputs = tf.nn.embedding_lookup(self.w_emb["count"][branch_id], count)

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h

      if layer_id > 0:
        query = tf.concat(anchors_w_1, axis=0)
        query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
        query = tf.matmul(query, self.v_attn)
        logit = tf.concat([-query, query], axis=1)
        if self.temperature is not None:
          logit /= self.temperature
        if self.tanh_constant is not None:
          logit = self.tanh_constant * tf.tanh(logit)

        skip = tf.multinomial(logit, 1)
        skip = tf.to_int32(skip)
        skip = tf.reshape(skip, [layer_id])
        arc_seq.append(skip)

        skip_prob = tf.sigmoid(logit)
        kl = skip_prob * tf.log(skip_prob / skip_targets)
        kl = tf.reduce_sum(kl)
        skip_penaltys.append(kl)

        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logit, labels=skip)
        log_probs.append(tf.reduce_sum(log_prob, keep_dims=True))

        entropy = tf.stop_gradient(
          tf.reduce_sum(log_prob * tf.exp(-log_prob), keep_dims=True))
        entropys.append(entropy)

        skip = tf.to_float(skip)
        skip = tf.reshape(skip, [1, layer_id])
        skip_count.append(tf.reduce_sum(skip))
        inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
        inputs /= (1.0 + tf.reduce_sum(skip))
      else:
        inputs = self.g_emb

      anchors.append(next_h[-1])
      anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))

    arc_seq = tf.concat(arc_seq, axis=0)
    self.sample_arc = tf.reshape(arc_seq, [-1])

    entropys = tf.stack(entropys)
    self.sample_entropy = tf.reduce_sum(entropys)

    log_probs = tf.stack(log_probs)
    self.sample_log_prob = tf.reduce_sum(log_probs)

    skip_count = tf.stack(skip_count)
    self.skip_count = tf.reduce_sum(skip_count)

    skip_penaltys = tf.stack(skip_penaltys)
    self.skip_penaltys = tf.reduce_mean(skip_penaltys)
Пример #5
0
    def _build_sampler(self, prev_c=None, prev_h=None, use_bias=False):
        """Build the sampler ops and the log_prob ops."""

        print("-" * 80)
        print("Build controller sampler")

        anchors = tf.TensorArray(tf.float32,
                                 size=self.num_cells + 2,
                                 clear_after_read=False)
        anchors_w_1 = tf.TensorArray(tf.float32,
                                     size=self.num_cells + 2,
                                     clear_after_read=False)
        arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 4)
        if prev_c is None:
            assert prev_h is None, "prev_c and prev_h must both be None"
            prev_c = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]
            prev_h = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]
        inputs = self.g_emb

        for layer_id in range(2):
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            anchors = anchors.write(layer_id, tf.zeros_like(next_h[-1]))
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))

        def _condition(layer_id, *args):
            return tf.less(layer_id, self.num_cells + 2)

        def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1,
                  arc_seq, entropy, log_prob):
            indices = tf.range(0, layer_id, dtype=tf.int32)
            start_id = 4 * (layer_id - 2)
            prev_layers = []
            for i in range(2):  # index_1, index_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                query = anchors_w_1.gather(indices)
                query = tf.reshape(query, [layer_id, self.lstm_size])
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logits = tf.reshape(query, [1, layer_id])
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                index = tf.multinomial(logits, 1)
                index = tf.to_int32(index)
                index = tf.reshape(index, [1])
                arc_seq = arc_seq.write(start_id + 2 * i, index)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=index)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                prev_layers.append(anchors.read(tf.reduce_sum(index)))
                inputs = prev_layers[-1]

            for i in range(2):  # op_1, op_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    op_tanh = self.tanh_constant / self.op_tanh_reduce
                    logits = op_tanh * tf.tanh(logits)
                if use_bias:
                    logits += self.b_soft_no_learn
                op_id = tf.multinomial(logits, 1)
                op_id = tf.to_int32(op_id)
                op_id = tf.reshape(op_id, [1])
                arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=op_id)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            anchors = anchors.write(layer_id, next_h[-1])
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))
            inputs = self.g_emb

            return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
                    arc_seq, entropy, log_prob)

        loop_vars = [
            tf.constant(2, dtype=tf.int32, name="layer_id"),
            inputs,
            prev_c,
            prev_h,
            anchors,
            anchors_w_1,
            arc_seq,
            tf.constant([0.0], dtype=tf.float32, name="entropy"),
            tf.constant([0.0], dtype=tf.float32, name="log_prob"),
        ]

        loop_outputs = tf.while_loop(_condition,
                                     _body,
                                     loop_vars,
                                     parallel_iterations=1)

        arc_seq = loop_outputs[-3].stack()
        arc_seq = tf.reshape(arc_seq, [-1])
        entropy = tf.reduce_sum(loop_outputs[-2])
        log_prob = tf.reduce_sum(loop_outputs[-1])

        last_c = loop_outputs[-7]
        last_h = loop_outputs[-6]

        return arc_seq, entropy, log_prob, last_c, last_h
Пример #6
0
        def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1,
                  arc_seq, entropy, log_prob):
            indices = tf.range(0, layer_id, dtype=tf.int32)
            start_id = 4 * (layer_id - 2)
            prev_layers = []
            for i in range(2):  # index_1, index_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                query = anchors_w_1.gather(
                    indices)  # 将anchors_w_1中的位于indices位置上的数据合并起来得到一个Tensor
                query = tf.reshape(query, [layer_id, self.lstm_size])
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logits = tf.reshape(query, [1, layer_id])  # 预测值
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                '''
                tf.multinomial(logits, num_samples):
                第一个参数logits可以是一个数组,每个元素的值表示对应index的选择概率。
                    假设logits有两个元素,即[0.6,0.4],这表示的意思是取 0 的概率是0.6, 取 1 的概率是0.4。
                第二个参数num_samples表示抽样的个数。
                例如:
                tf.multinomial(tf.log([[0.1]]),3)  不管重复运行多少次结果都是 [0,0,0]
                tf.multinomial(tf.log([[0.1, 0.6]]),3)  结果可能 [0,0,0],也可能是[0,1,1],当然也有其他可能。
                '''
                index = tf.multinomial(logits, 1)
                index = tf.to_int32(index)
                index = tf.reshape(index, [1])  # 生成index_i 的值
                arc_seq = arc_seq.write(start_id + 2 * i,
                                        index)  # 将生成的index写入到arc_seq中去
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=index)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                prev_layers.append(anchors.read(tf.reduce_sum(index)))
                inputs = prev_layers[-1]

            for i in range(2):  # op_1, op_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    op_tanh = self.tanh_constant / self.op_tanh_reduce
                    logits = op_tanh * tf.tanh(logits)
                if use_bias:
                    logits += self.b_soft_no_learn
                op_id = tf.multinomial(logits, 1)
                op_id = tf.to_int32(op_id)
                op_id = tf.reshape(op_id, [1])
                arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=op_id)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            anchors = anchors.write(layer_id, next_h[-1])
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))
            inputs = self.g_emb

            return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
                    arc_seq, entropy, log_prob)
Пример #7
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        print "-" * 80
        print "Build controller sampler"
        #anchors = []
        #anchors_w_1 = []

        arc_seq = []
        entropys = []
        log_probs = []
        #skip_count = []
        #skip_penaltys = []
        #initialize the first c and h as zero
        prev_c = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]
        prev_h = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]

        #shape(self.g_emb) = [1, self.lstm_size]
        inputs = self.g_emb  #the first input = self.g_emb
        #skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target],
        #				dtype=tf.float32)
        for layer_id in xrange(self.num_layers):
            if self.search_whole_channels:
                #the shapes of c and h are both [1,self.lstm_size]
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                #logit is the result of lstm after softmax
                logit = tf.matmul(next_h[-1], self.w_soft)
                if self.temperature is not None:
                    logit /= self.temperature
                if self.tanh_constant is not None:
                    logit = self.tanh_constant * tf.tanh(logit)
                if self.search_for == "macro" or self.search_for == "branch":
                    branch_id = tf.multinomial(logit, 1)
                    branch_id = tf.to_int32(branch_id)
                    branch_id = tf.reshape(branch_id, [1])
                elif self.search_for == "connection":
                    branch_id = tf.constant([0], dtype=tf.int32)
                else:
                    raise ValueError("Unknown search_for {}".format(
                        self.search_for))
                arc_seq.append(branch_id)
                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=branch_id)
                log_probs.append(log_prob)
                entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                entropys.append(entropy)
                inputs = tf.nn.embedding_lookup(self.w_emb, branch_id)
            else:
                raise ValueError(
                    "Just consider the situation when self.search_whole_channels=true"
                )

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            """
			if layer_id > 0:
				query = tf.concat(anchors_w_1, axis=0)
				query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
				query = tf.matmul(query, self.v_attn)
				logit = tf.concat([-query, query], axis=1)
				if self.temperature is not None:
					logit /= self.temperature
				if self.tanh_constant is not None:
					logit = self.tanh_constant * tf.tanh(logit)
				
				skip = tf.multinomial(logit, 1)
				skip = tf.to_int32(skip)
				skip = tf.reshape(skip, [layer_id])
				arc_seq.append(skip)
				
				skip_prob = tf.sigmoid(logit)
				kl = skip_prob * tf.log(skip_prob / skip_targets)
				kl = tf.reduce_sum(kl)
				skip_penaltys.append(kl)
				
				log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
					logits=logit, labels=skip)
				log_probs.append(tf.reduce_sum(log_prob, keep_dims=True))
				
				entropy = tf.stop_gradient(
					tf.reduce_sum(log_prob * tf.exp(-log_prob), keep_dims=True))
				entropys.append(entropy)
				
				skip = tf.to_float(skip)
				skip = tf.reshape(skip, [1, layer_id])
				skip_count.append(tf.reduce_sum(skip))
				inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
				inputs /= (1.0 + tf.reduce_sum(skip))
			else:
				inputs = self.g_emb
			"""
            #anchors.append(next_h[-1])
            #anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = tf.reshape(arc_seq, [-1])

        entropys = tf.stack(entropys)
        self.sample_entropy = tf.reduce_sum(entropys)

        log_probs = tf.stack(log_probs)
        self.sample_log_prob = tf.reduce_sum(log_probs)
Пример #8
0
    def _build_sampler(self, prev_c=None, prev_h=None):
        anchors = tf.TensorArray(tf.float32,
                                 size=self.num_cells + 1,
                                 clear_after_read=False)
        anchors_w_1 = tf.TensorArray(tf.float32,
                                     size=self.num_cells + 1,
                                     clear_after_read=False)

        arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 2)

        if prev_c is None or prev_h is None:
            prev_c = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]
            prev_h = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]

        inputs = self.g_emb

        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        anchors = anchors.write(0, tf.zeros_like(next_h[-1]))
        anchors_w_1 = anchors_w_1.write(0, tf.matmul(next_h[-1],
                                                     self.w_attn_1))

        def _condition(layer_id, *args):
            return tf.less(layer_id, self.num_cells + 2)

        def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1,
                  arc_seq, entropy, log_prob):
            indices = tf.range(0, layer_id, dtype=tf.int32)
            start_id = 2 * (layer_id - 1)
            prev_layers = []

            # index
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            query = anchors_w_1.gather(indices)
            query = tf.reshape(query, [layer_id, self.lstm_size])
            query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
            query = tf.matmul(query, self.v_attn)
            logits = tf.reshape(query, [1, layer_id])

            if self.temperature:
                logits /= self.temperature
            if self.tanh_constant:
                logits = self.tanh_constant * tf.tanh(logits)

            index = tf.multinomial(logits, 1)
            index = tf.to_int32(index)
            index = tf.reshape(index, [1])
            arc_seq = arc_seq.write(start_id, index)

            curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=index)
            log_prob += curr_log_prob
            curr_ent = tf.stop_gradient(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits, labels=tf.nn.softmax(logits)))
            entropy += curr_ent

            prev_layers.append(anchors.read(tf.reduce_sum(index)))
            inputs = prev_layers[-1]

            # op
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft

            if self.temperature:
                logits /= self.temperature
            if self.tanh_constant:
                op_tanh = self.tanh_constant / self.op_tanh_reduce
                logits = op_tanh * tf.tanh(logits)

            op_id = tf.multinomial(logits, 1)
            op_id = tf.to_int32(op_id)
            op_id = tf.reshape(op_id, [1])
            arc_seq = arc_seq.write(start_id + 1, op_id)

            curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=op_id)
            log_prob += curr_log_prob
            curr_ent = tf.stop_gradient(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits, labels=tf.nn.softmax(logits)))
            entropy += curr_ent
            inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

            return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
                    arc_seq, entropy, log_prob)

        loop_vars = [
            tf.constant(1, dtype=tf.int32, name='layer_id'), inputs, prev_c,
            prev_h, anchors, anchors_w_1, arc_seq,
            tf.constant([0.0], dtype=tf.float32, name='entropy'),
            tf.constant([0.0], dtype=tf.float32, name='log_prob')
        ]

        loop_outputs = tf.while_loop(_condition,
                                     _body,
                                     loop_vars,
                                     parallel_iterations=1)

        arc_seq = loop_outputs[-3].stack()
        arc_seq = tf.reshape(arc_seq, [-1])
        entropy = tf.reduce_sum(loop_outputs[-2])
        log_prob = tf.reduce_sum(loop_outputs[-1])

        last_c = loop_outputs[2]
        last_h = loop_outputs[3]

        return arc_seq, entropy, log_prob, last_c, last_h
Пример #9
0
  def _build_trainer(self):
    print("-" * 80)
    print("Build controller trainer")
    anchors = []
    anchors_w_1 = []

    ops_each_layer = 2 if self.search_count else 1
    total_arc_len = sum([ops_each_layer] + [ ops_each_layer+i for i in range(1, self.num_layers) ])
    self.total_arc_len = total_arc_len
    self.input_arc = [tf.placeholder(shape=(), dtype=tf.int32, name='arc_{}'.format(i))
      for i in range(total_arc_len)]
    entropys = []
    log_probs = []
    skip_count = []
    skip_penaltys = []
    masks = []

    prev_c = [tf.zeros([1, self.lstm_size], tf.float32) for _ in
              range(self.lstm_num_layers)]
    prev_h = [tf.zeros([1, self.lstm_size], tf.float32) for _ in
              range(self.lstm_num_layers)]
    inputs = self.g_emb
    skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target],
                               dtype=tf.float32)
    
    arc_pointer = 0
    for layer_id in range(self.num_layers):
      ###
      ### for each layer, sample num_branches operations
      ###
      #for branch_id in range(self.num_branches):
      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h
      logit = tf.matmul(next_h[-1], self.w_soft["start"][layer_id]) # out_filter x 1
      if self.temperature is not None:
        logit /= self.temperature
      if self.tanh_constant is not None:
        logit = self.tanh_constant * tf.tanh(logit)
      # start: a random number from 0 to out_filters[i]
      start = self.input_arc[arc_pointer]
      start = tf.reshape(start, [1])

      log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logit, labels=start)
      log_probs.append(log_prob)
      entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
      entropys.append(entropy)
      # inputs: get a row slice of [out_filter[i], lstm_size]
      #inputs = tf.nn.embedding_lookup(self.w_emb["start"][branch_id], start) 
      inputs = tf.nn.embedding_lookup(self.w_emb["start"][layer_id], start) 

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h

      if self.search_count:
        #logit = tf.matmul(next_h[-1], self.w_soft["count"][branch_id])
        logit = tf.matmul(next_h[-1], self.w_soft["count"][layer_id])
        if self.temperature is not None:
          logit /= self.temperature
        if self.tanh_constant is not None:
          logit = self.tanh_constant * tf.tanh(logit)
        # mask: a boolean list of length out_filter[i]-1 
        # that is true for all <=out_filter[i]-start elements
        mask = tf.range(0, limit=self.out_filters[layer_id]-1, delta=1, dtype=tf.int32)
        mask = tf.reshape(mask, [1, self.out_filters[layer_id] - 1])
        mask = tf.less_equal(mask, self.out_filters[layer_id]-1 - start)
        masks.append([mask, start])
        # tf.where: for index of false in mask, x will be replaced with y
        logit = tf.where(mask, x=logit, y=tf.fill(tf.shape(logit), -np.inf))
        # logit: >out_filter[i]-start will be masked to 0
        # e.g.: if start is 3 and out_filter[i] is 10, then 8,9 will be masked to 0
        count = self.input_arc[arc_pointer+1]
        count = tf.reshape(count, [1])
        count = count - 1
        #arc_seq.append(count + 1)
        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logit, labels=count)
        log_probs.append(log_prob)
        entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
        entropys.append(entropy)
        # inputs: get a row slice of [out_filter[i]-1, lstm_size]
        #inputs = tf.nn.embedding_lookup(self.w_emb["count"][branch_id], count)
        inputs = tf.nn.embedding_lookup(self.w_emb["count"][layer_id], count)
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h

      ###
      ### sample the connections, unless the first layer
      ### the number `skip` of each layer grows as layer_id grows
      ###
      if layer_id > 0:
        query = tf.concat(anchors_w_1, axis=0)  # layer_id x lstm_size
        # w_attn_2: lstm_size x lstm_size
        query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) # query: layer_id x lstm_size
        ## P(Layer j is an input to layer i) = sigmoid(v^T %*% tanh(W_prev ∗ h_j + W_curr ∗ h_i))
        query = tf.matmul(query, self.v_attn) # query: layer_id x 1
        logit = tf.concat([-query, query], axis=1) # logit: layer_id x 2
        if self.temperature is not None:
          logit /= self.temperature
        if self.tanh_constant is not None:
          logit = self.tanh_constant * tf.tanh(logit)

        skip = self.input_arc[(arc_pointer+ops_each_layer) : (arc_pointer+ops_each_layer + layer_id)]
        #print(layer_id, (arc_pointer+2), (arc_pointer+2 + layer_id), skip)
        skip = tf.reshape(skip, [layer_id])
        
        skip_prob = tf.sigmoid(logit)
        kl = skip_prob * tf.log(skip_prob / skip_targets)
        kl = tf.reduce_sum(kl)
        skip_penaltys.append(kl)

        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logit, labels=skip)
        log_probs.append(tf.reshape(tf.reduce_sum(log_prob),[-1]))

        entropy = tf.stop_gradient(
          tf.reshape(tf.reduce_sum(log_prob * tf.exp(-log_prob)), [-1]) )
        entropys.append(entropy)

        skip = tf.to_float(skip)
        skip = tf.reshape(skip, [1, layer_id])
        skip_count.append(tf.reduce_sum(skip))
        inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
        inputs /= (1.0 + tf.reduce_sum(skip))
        
      else:
        inputs = self.g_emb

      anchors.append(next_h[-1])
      # next_h: 1 x lstm_size
      # anchors_w_1: 1 x lstm_size
      anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))
      arc_pointer += ops_each_layer + layer_id

    entropys = tf.stack(entropys)
    self.onehot_entropy = tf.reduce_sum(entropys)

    log_probs = tf.stack(log_probs)
    self.onehot_log_prob = tf.reduce_sum(log_probs)

    skip_count = tf.stack(skip_count)
    self.onehot_skip_count = tf.reduce_sum(skip_count)

    skip_penaltys = tf.stack(skip_penaltys)
    self.onehot_skip_penaltys = tf.reduce_mean(skip_penaltys)