示例#1
0
    def test_sum(self):
        program = Program()
        with program_guard(program):
            input = layers.data(name="input", shape=[13, 11], dtype='float32')

            out = layers.sum(input)
            self.assertIsNotNone(out)
        print(str(program))
    def build_program(self, dtype):
        with fluid.program_guard(self.main_program, self.startup_program):
            self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 3)
            self.feed_vars.append(
                fluid.data(name="data3", shape=[128, 128], dtype=dtype))

            # subgraph with 2 op nodes
            tmp_0 = layers.sum(
                [self.feed_vars[0], self.feed_vars[1], self.feed_vars[2]])
            tmp_1 = layers.sqrt(tmp_0)
            tmp_2 = layers.mul(tmp_0, self.feed_vars[3])
            # subgraph with 2 op nodes
            tmp_3 = layers.square(layers.sum([tmp_1, tmp_2]))

        self.append_gradients(tmp_3)

        self.num_fused_ops = 4
        self.fetch_list = [tmp_3, self.grad(tmp_0)]
示例#3
0
 def _add_average_apply_op(self, block, param):
     param = block._clone_variable(param)
     grad = block._clone_variable(self._get_accumulator('restore', param))
     sum_1 = block._clone_variable(self._get_accumulator('sum_1', param))
     sum_2 = block._clone_variable(self._get_accumulator('sum_2', param))
     sum_3 = block._clone_variable(self._get_accumulator('sum_3', param))
     num_accumulates = block._clone_variable(
         self._get_accumulator('num_accumulates', param))
     old_num_accumulates = block._clone_variable(
         self._get_accumulator('old_num_accumulates', param))
     # backup param value to grad
     layers.assign(input=param, output=grad)
     # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)
     tmp = layers.sum(x=[num_accumulates, old_num_accumulates])
     sum = layers.sum(x=[sum_1, sum_2, sum_3])
     tmp = layers.cast(
         x=tmp, dtype='float32' if self._dtype is None else self._dtype)
     sum = layers.cast(
         x=sum, dtype='float32' if self._dtype is None else self._dtype)
     layers.ops._elementwise_div(x=sum, y=tmp, out=param)
示例#4
0
def R2Penalty(fake_img, f):
    # gradient penalty
    fakes = fake_img
    fakes.stop_gradient = False
    fake_logit = f(fake)

    apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0))
    undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0))

    fake_logit = apply_loss_scaling(layers.sum(fake_logit))
    #grads = dygraph.grad(fake_logit, fakes,create_graph=True)
    grads = dygraph.grad(fake_logit, fakes, create_graph=False)
    fake_grads = layers.reshape(grads[0], (fakes.shape[0], -1))
    fake_grads = undo_loss_scaling(fake_grads)
    r2_penalty = layers.reduce_sum(
        layers.elementwise_mul(fake_grads, fake_grads))
    return r2_penalty
示例#5
0
def R1Penalty(real_img, f):
    # gradient penalty
    reals = real_img
    reals.stop_gradient = False
    #reals = real_img
    real_logit = f(reals)
    apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0,
                                                             dtype='float32'))
    undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0,
                                                             dtype='float32'))

    real_logit = apply_loss_scaling(layers.sum(real_logit))
    #grads = dygraph.grad(real_logit, reals, create_graph=True)
    grads = dygraph.grad(real_logit, reals, create_graph=False)
    real_grads = layers.reshape(grads[0], (reals.shape[0], -1))
    real_grads = undo_loss_scaling(real_grads)
    r1_penalty = layers.reduce_sum(
        layers.elementwise_mul(real_grads, real_grads))
    return r1_penalty
with open(data_dir + '/sample.data', 'w') as fout:
    for slot in slots:
        print('%s\t%s' % (slot, ' '.join(
            ['%.2f' % random.random() for i in range(emb_size)])),
              file=fout)

train_program = fluid.Program()
start_program = fluid.Program()
with fluid.program_guard(train_program, start_program):
    bows = []
    for slot in slots:
        bow = fluid.layers.data(name=slot, shape=[emb_size], dtype='float32')
        bows.append(bow)

    bow_sum = layers.sum(bows)
    data_norm = layers.data_norm(input=bow_sum)
    fc1 = layers.fc(input=data_norm, size=8, act='relu')
    #print(fc1.name) # fc_0.tmp_2
    fc2 = layers.fc(input=fc1, size=1)
    #print(fc2.name) # fc_1.tmp_1
    sigmoid = layers.sigmoid(fc2)

print('\nall variables:')
for var in train_program.current_block().vars:
    print(var)

print('\nall parameters:')
for param in train_program.current_block().all_parameters():
    print(param.name)
示例#7
0
def transformer_gat_pgl(gw,
                        feature,
                        hidden_size,
                        name,
                        num_heads=4,
                        attn_drop=0,
                        edge_feature=None,
                        concat=True,
                        is_test=False):
    '''transformer_gat_pgl
    '''

    def send_attention(src_feat, dst_feat, edge_feat):
        if edge_feat is None or not edge_feat:
            output = src_feat["k_h"] * dst_feat["q_h"]
            output = fluid.layers.reduce_sum(output, -1)
            return {
                "alpha": output,
                "v": src_feat["v_h"]
            }  # batch x h     batch x h x feat
        else:
            edge_feat = edge_feat["edge"]
            edge_feat = fluid.layers.reshape(edge_feat,
                                             [-1, num_heads, hidden_size])
            output = (src_feat["k_h"] + edge_feat) * dst_feat["q_h"]
            output = fluid.layers.reduce_sum(output, -1)
            return {
                "alpha": output,
                "v": (src_feat["v_h"] + edge_feat)
            }  # batch x h     batch x h x feat

    def reduce_attention(msg):
        alpha = msg["alpha"]  # lod-tensor (batch_size, seq_len, num_heads)
        h = msg["v"]
        alpha = paddle_helper.sequence_softmax(alpha)
        old_h = h

        if attn_drop > 1e-15:
            alpha = fluid.layers.dropout(
                alpha,
                dropout_prob=attn_drop,
                is_test=is_test,
                dropout_implementation="upscale_in_train")
        h = h * alpha
        #h = fluid.layers.lod_reset(h, old_h)
        h = fluid.layers.sequence_pool(h, "sum")
        if concat:
            h = fluid.layers.reshape(h, [-1, num_heads * hidden_size])
        else:
            h = fluid.layers.reduce_mean(h, dim=1)
        return h

    q_w_attr = fluid.ParamAttr(
        initializer=fluid.initializer.XavierInitializer())
    q_bias_attr = fluid.ParamAttr(
        initializer=fluid.initializer.ConstantInitializer(0.0))
    q = fluid.layers.fc(feature,
                        hidden_size * num_heads,
                        name=name + '_q_weight',
                        param_attr=q_w_attr,
                        bias_attr=q_bias_attr)
    q = q / (hidden_size**0.5)

    k_w_attr = fluid.ParamAttr(
        initializer=fluid.initializer.XavierInitializer())
    k_bias_attr = fluid.ParamAttr(
        initializer=fluid.initializer.ConstantInitializer(0.0))
    k = fluid.layers.fc(feature,
                        hidden_size * num_heads,
                        name=name + '_k_weight',
                        param_attr=k_w_attr,
                        bias_attr=k_bias_attr)

    v_w_attr = fluid.ParamAttr(
        initializer=fluid.initializer.XavierInitializer())
    v_bias_attr = fluid.ParamAttr(
        initializer=fluid.initializer.ConstantInitializer(0.0))
    v = fluid.layers.fc(feature,
                        hidden_size * num_heads,
                        name=name + '_v_weight',
                        param_attr=v_w_attr,
                        bias_attr=v_bias_attr)

    reshape_q = fluid.layers.reshape(q, [-1, num_heads, hidden_size])
    reshape_k = fluid.layers.reshape(k, [-1, num_heads, hidden_size])
    reshape_v = fluid.layers.reshape(v, [-1, num_heads, hidden_size])

    if not isinstance(gw, list):
        msg = gw.send(
            send_attention,
            nfeat_list=[("q_h", reshape_q), ("k_h", reshape_k),
                        ("v_h", reshape_v)],
            efeat_list=edge_feature)
        output = gw.recv(msg, reduce_attention)
        return output
    else:
        checkpoints = []
        outputs = []
        for batch_no, (batch_gw,
                       batch_edge_feat) in enumerate(zip(gw, edge_feature)):
            msg = batch_gw.send(
                send_attention,
                nfeat_list=[("q_h", reshape_q), ("k_h", reshape_k),
                            ("v_h", reshape_v)],
                efeat_list=batch_edge_feat)
            output = batch_gw.recv(msg, reduce_attention)
            outputs.append(output)
        outputs = L.sum(outputs)
        return outputs, checkpoints