def test_erroes(self): with program_guard(Program(), Program()): x = layers.zeros(dtype='int64', shape=[3, 100]) i = layers.zeros(dtype='int64', shape=[1]) rank_table_tensor = core.LoDTensor() rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]]) rank_table_tensor.set( np.random.random(size=(6, 1)).astype('float32'), core.CPUPlace()) rank_table = np.random.random(size=(6, 1)).astype('float32') # The type of x in shrink_rnn_memory must be Variable. def test_x_type(): out = shrink_memory(x=1, i=i, table=rank_table_tensor) self.assertRaises(TypeError, test_x_type) # The type of i in shrink_rnn_memory must be Variable. def test_i_type(): out = shrink_memory(x=x, i=0, table=rank_table_tensor) self.assertRaises(TypeError, test_i_type) # The type of table in shrink_rnn_memory must be Variable. def test_table_type(): out = shrink_memory(x=x, i=i, table=rank_table) self.assertRaises(TypeError, test_table_type)
def _test_read_write(x): i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(a0) mean_a1 = layers.mean(a1) mean_a2 = layers.mean(a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x[0]) mean_x1 = layers.mean(x[1]) mean_x2 = layers.mean(x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) return a_sum, x_sum
def simple_net(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') # fill_constant npu op doesn't support int64 i = layers.zeros(shape=[1], dtype='int32') i = layers.cast(i, 'int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int32') i = layers.cast(i, 'int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int32', value=5) array_len = layers.cast(array_len, 'int64') array_len.stop_gradient = True cond = layers.ones(shape=[1], dtype='int32') cond = layers.cast(cond, 'bool') j = layers.fill_constant(shape=[1], dtype='int32', value=1) j = layers.cast(j, 'int64') j.stop_gradient = True array_len2 = layers.fill_constant(shape=[1], dtype='int32', value=3) array_len2 = layers.cast(array_len2, 'int64') array_len2.stop_gradient = True cond2 = layers.logical_or(x=j, y=array_len2) cond2 = layers.ones(shape=[1], dtype='int32') cond2 = layers.cast(cond2, 'bool') while_op = layers.While(cond=cond) while_op2 = layers.While(cond=cond2) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) with while_op2.block(): d2 = layers.array_read(array=data_array, i=j) prev2 = layers.array_read(array=mem_array, i=j) result2 = layers.sums(input=[d2, prev2]) j = layers.increment(x=j, in_place=True) layers.array_write(result2, i=j, array=mem_array) layers.less_than(x=j, y=array_len2, cond=cond2) sum_result = layers.array_read(array=mem_array, i=j) loss = layers.mean(sum_result) return loss, sum_result
def no_objs_3(): masks = L.zeros([1, 1, 1], 'float32') - 1.0 classes = L.zeros([ 1, ], 'int64') - 1 scores = L.zeros([ 1, ], 'float32') - 2.0 return masks, classes, scores
def test_nested_net(self): def external_cond(i, j, init, sums): return layers.less_than(i, loop_len1) def external_body(i, j, init, sums): def internal_cond(j, init, sums): return layers.less_than(j, loop_len2) def internal_body(j, init, sums): init = layers.elementwise_add(x=init, y=ones) sums = layers.elementwise_add(x=init, y=sums) j = layers.increment(j) return [j, init, sums] result = layers.while_loop(internal_cond, internal_body, [j, init, sums]) j = result[0] init = result[1] sums = result[2] sums = layers.elementwise_add(x=init, y=sums) i = layers.increment(i) return [i, j, init, sums] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') j = layers.zeros(shape=[1], dtype='int64') init = fluid.data(name='init', shape=[3, 3], dtype='float32') sums = fluid.data(name='sums', shape=[3, 3], dtype='float32') loop_len1 = layers.fill_constant(shape=[1], dtype='int64', value=2) loop_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) ones = layers.fill_constant(shape=[3, 3], dtype='float32', value=1) out = layers.while_loop(external_cond, external_body, [i, j, init, sums]) data = np.random.rand(3, 3).astype('float32') data_sums = np.zeros([3, 3]).astype('float32') place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={ 'init': data, 'sums': data_sums }, fetch_list=out) for i in range(3): data = np.add(data, 1) data_sums = np.add(data, data_sums) for j in range(2): data_sums = np.add(data, data_sums) self.assertTrue(np.allclose(np.asarray(res[3]), data_sums))
def test_simple_forward(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(sum_result) append_backward(loss) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def matrix_nms(bboxes, scores, score_threshold, post_threshold, nms_top_k, keep_top_k, use_gaussian=False, gaussian_sigma=2.): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top nms_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if nms_top_k > 0 and len(sort_inds) > nms_top_k: sort_inds = sort_inds[:nms_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) # Matrix NMS kernel = 'gaussian' if use_gaussian else 'linear' cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma) # filter. keep = L.where(cate_scores >= post_threshold) if len(keep) == 0: return L.zeros((0, 6), 'float32') - 1.0 bboxes = L.gather(bboxes, keep) cate_scores = L.gather(cate_scores, keep) cate_labels = L.gather(cate_labels, keep) # sort and keep keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def get_embedding(self, num_embeddings, embedding_dim, padding_idx=None): """ Build sinusoidal embeddings. This matches the implementation in tensor2tensor, but differs slightly from the description in Section 3.5 of "Attention Is All You Need". """ half_dim = embedding_dim // 2 emb = layers.log(float(10000)) / (half_dim - -1) emb = layers.exp(layers.arange( start=0, end=half_dim, dtype='float32') * -emb) # [num_embeddings, embedding_dim // 2] emb = layers.unsqueeze(layers.arange(-num_embeddings // 2, num_embeddings // 2, dtype='float32'), axis=1) *\ layers.unsqueeze(emb, axis=0) emb = layers.concat([layers.sin(emb), layers.cos(emb)], dim=1) # [num_embeddings, embedding_dim] if embedding_dim % 2 == 1: emb = layers.concat( [emb, layers.zeros(shape=(num_embeddings, 1))], dim=1) if padding_idx is not None: emb[paddings_idx, :] = 0 self.origin_shift = num_embeddings // 2 return emb
def ernie_recv(feat): """doc""" num_neighbor = self.config.samples[0] pad_value = L.zeros([1], "int64") out, _ = L.sequence_pad(feat, pad_value=pad_value, maxlen=num_neighbor) out = L.reshape(out, [0, self.config.max_seqlen*num_neighbor]) return out
def net_profiler(self, state, profile_path='/tmp/profile'): enable_if_gpu = state == 'GPU' or state == "All" if enable_if_gpu and not core.is_compiled_with_cuda(): return startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: for iter in range(10): if iter == 2: profiler.reset_profiler() x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, feed={ 'x': x, 'y': y }, fetch_list=[avg_cost, batch_acc, batch_size]) acc = np.array(outs[1]) b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval()
def no_nms(bboxes, scores, score_threshold, keep_top_k): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if keep_top_k > 0 and len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def __init__(self, dim=300, K=65536, m=0.999, T=0.07, mlp=False): """ dim: feature dimension (default: 128) K: queue size; number of negative keys (default: 65536) m: moco momentum of updating key encoder (default: 0.999) T: softmax temperature (default: 0.07) """ super(MoCo, self).__init__() self.K = K self.m = m self.T = T # create the encoders self.encoder_q = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim) self.encoder_k = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim) if mlp: dim_mlp = 1024 self.encoder_q.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp, act='relu'), self.encoder_q.classifier) self.encoder_k.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp,act='relu'), self.encoder_k.classifier) for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k=param_q # initialize param_k.requires_grad = False # not update by gradient # create the queue self.queue = L.randn([dim, K]) self.queue = norm(self.queue, dim=0) self.queue_ptr = L.zeros([1], dtype='int32')
def no_objs_2(boxes, classes, scores): keep = P.zeros((1, 1), 'int64') boxes = P.gather(boxes, keep) classes = P.gather(classes, keep) scores = P.gather(scores, keep) scores -= 2.0 # 巧妙设置为负分数让python端过滤 return boxes, classes, scores
def add_input(self, x_t): """This method works similarily with forward but in a `step-in-step-out` fashion. Args: x (Variable): shape(B, C_in, T=1), dtype float32, input of Conv1D. Returns: Variable: shape(B, C_out, T=1), dtype float32, output of Conv1D. """ batch_size, c_in, _ = x_t.shape if self._buffer is None: self._buffer = F.zeros( (batch_size, c_in, self.receptive_field), dtype=x_t.dtype) self._buffer = F.concat([self._buffer[:, :, 1:], x_t], -1) if self._dilation[1] > 1: input = F.strided_slice( self._buffer, axes=[2], starts=[0], ends=[self.receptive_field], strides=[self._dilation[1]]) else: input = self._buffer input = F.reshape(input, (batch_size, -1)) y_t = F.matmul(input, self._reshaped_weight, transpose_y=True) y_t = y_t + self.bias y_t = F.unsqueeze(y_t, [-1]) return y_t
def test_var_list(self): def cond(i, mem): return layers.less_than(i, ten) def body(i, mem): mem = layers.elementwise_add(x=mem, y=one) i = layers.increment(i) return [i, mem] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') ten = layers.fill_constant(shape=[1], dtype='int64', value=10) mem = fluid.data(name='mem', shape=[10], dtype='float32') one = layers.fill_constant(shape=[10], dtype='float32', value=1) out = layers.while_loop(cond, body, [i, mem]) data = np.random.rand(10).astype('float32') data_one = np.ones(10).astype('float32') place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={'mem': data}, fetch_list=out) for i in range(10): data = np.add(data, data_one) self.assertTrue(np.allclose(np.asarray(res[1]), data))
def test_mul(self): i = zeros(shape=[1], dtype='int64') a = data(name='a', shape=[784], dtype='float32') array = array_write(x=a, i=i) i = increment(i) b = data( name='b', shape=[784, 100], dtype='float32', append_batch_size=False) array_write(x=b, i=i, array=array) i = increment(i) out = mul(x=a, y=b) array_write(x=out, i=i, array=array) a_np = numpy.random.random((100, 784)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32') exe = Executor() res, res_array = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out, array]) self.assertEqual((100, 100), res.shape) self.assertTrue(numpy.allclose(res, numpy.dot(a_np, b_np))) self.assertTrue(numpy.allclose(res_array[0], a_np)) self.assertTrue(numpy.allclose(res_array[1], b_np)) self.assertTrue(numpy.allclose(res_array[2], res))
def compute_position_embedding(radians, speaker_position_rate): """Compute sin/cos interleaved matrix from the radians. Arg: radians (Variable): shape(n_vocab, embed_dim), dtype float32, the radians matrix. speaker_position_rate (Variable): shape(B, ), speaker positioning rate. Returns: Variable: shape(B, n_vocab, embed_dim), the sin, cos interleaved matrix. """ _, embed_dim = radians.shape batch_size = speaker_position_rate.shape[0] scaled_radians = F.elementwise_mul(F.expand(F.unsqueeze(radians, [0]), [batch_size, 1, 1]), speaker_position_rate, axis=0) odd_mask = (np.arange(embed_dim) % 2).astype(np.float32) odd_mask = dg.to_variable(odd_mask) out = odd_mask * F.cos(scaled_radians) \ + (1 - odd_mask) * F.sin(scaled_radians) out = F.concat( [F.zeros((batch_size, 1, embed_dim), radians.dtype), out[:, 1:, :]], axis=1) return out
def test_var_dict(self): def cond(i, ten, test_dict, test_list, test_list_dict): return layers.less_than(i, ten) def body(i, ten, test_dict, test_list, test_list_dict): test_dict["test_key"] = i test_dict["test_key"] += 1 test_list[0] = fluid.layers.reshape(test_list[0], [2, -1]) + 1 test_list_dict[0]["test_key"] += 1 test_list_dict[0]["test_key"] = fluid.layers.relu(test_list_dict[0][ "test_key"]) i = layers.increment(i) return [i, ten, test_dict, test_list, test_list_dict] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') ten = layers.fill_constant(shape=[1], dtype='int64', value=10) test_data = layers.fill_constant(shape=[1], dtype='int64', value=0) test_dict = {"test_key": test_data} test_list = [ layers.fill_constant( shape=[1, 2], dtype='int64', value=0) ] test_list_dict = [{ "test_key": layers.fill_constant( shape=[1], dtype='float32', value=0) }] i, ten, test_dict, test_list, test_list_dict = layers.while_loop( cond, body, [i, ten, test_dict, test_list, test_list_dict]) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[ test_dict["test_key"], test_list[0], test_list_dict[0]["test_key"] ]) self.assertTrue( np.allclose( np.asarray(res[0]), np.full( shape=(1), fill_value=10, dtype=np.int64))) self.assertTrue( np.allclose( np.asarray(res[1]), np.full( shape=(2, 1), fill_value=10, dtype=np.int64))) self.assertTrue( np.allclose( np.asarray(res[2]), np.full( shape=(1), fill_value=10, dtype=np.float32)))
def ernie_recv(feat): """doc""" # TODO maxlen 400 #pad_value = L.cast(L.assign(input=np.array([0], dtype=np.int32)), "int64") pad_value = L.zeros([1], "int64") out, _ = L.sequence_pad(feat, pad_value=pad_value, maxlen=10) out = L.reshape(out, [0, 400]) return out
def test_exceptions(self): i = layers.zeros(shape=[2], dtype='int64') array_len = layers.fill_constant(shape=[2], dtype='int64', value=1) cond = layers.less_than(x=i, y=array_len) with self.assertRaises(TypeError): layers.While(cond=cond) cond = layers.cast(cond, dtype='float64') with self.assertRaises(TypeError): layers.While(cond=cond)
def no_objs_2(seg_masks, masks, sum_masks, scores, classes): keep = L.zeros((1, ), np.int64) keep.stop_gradient = True seg_masks = L.gather(seg_masks, keep) # [M2, s4, s4] M2个物体的掩码 masks = L.gather(masks, keep) # [M2, s4, s4] M2个物体的掩码概率 sum_masks = L.gather(sum_masks, keep) # [M2, ] M2个物体的掩码面积 scores = L.gather(scores, keep) - 99.0 # [M2, ] M2个物体的分数。负分数,后面会被过滤掉。 classes = L.gather(classes, keep) # [M2, ] M2个物体的类别id return seg_masks, masks, sum_masks, scores, classes
def test_array_length(self): tmp = layers.zeros(shape=[10], dtype='int32') i = layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_write(tmp, i=i) arr_len = layers.array_length(arr) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[arr_len])[0] self.assertEqual(11, result[0])
def test_array_length(self): tmp = layers.zeros(shape=[10], dtype='int32') i = layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_write(tmp, i=i) arr_len = layers.array_length(arr) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[arr_len])[0] self.assertEqual(11, result[0])
def net_profiler(self, state, profile_path='/tmp/profile'): enable_if_gpu = state == 'GPU' or state == "All" if enable_if_gpu and not core.is_compiled_with_cuda(): return startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros( shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: for iter in range(10): if iter == 2: profiler.reset_profiler() x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, feed={'x': x, 'y': y}, fetch_list=[avg_cost, batch_acc, batch_size]) acc = np.array(outs[1]) b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval()
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, pos=None, query_pos=None): output = tgt intermediate = [] assert tgt_mask is None, "Not implement compute tgt_mask's attn_mask." if memory_mask is not None: bs, tgt_length = tgt.shape[:2] memory_length = memory.shape[1] attn_mask = L.zeros([bs, tgt_length, memory_length], dtype="float32") memory_mask = L.expand( L.unsqueeze(memory_mask, [1]), (1, tgt_length, 1)) # [bs, tgt_length, memory_length] attn_mask = attn_mask.numpy() memory_mask = memory_mask.numpy() attn_mask[memory_mask] = -1e8 attn_mask = dg.to_variable(attn_mask) attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, self.nhead, 1, 1)) # [bs, nhead, tgt_length, memory_length] memory_mask = attn_mask attention_weight = [] for layer in self.layers: output, self_attn_weights, multihead_attn_weights = layer( output, memory, tgt_mask=tgt_mask, memory_mask=memory_mask, pos=pos, query_pos=query_pos) attention_weight.append( (self_attn_weights, multihead_attn_weights)) if self.return_intermediate: intermediate.append(self.norm(output)) if self.norm is not None: output = self.norm(output) if self.return_intermediate: intermediate.pop() intermediate.append(output) if self.return_intermediate: return L.stack(intermediate), attention_weight return L.unsqueeze(output, [0]), attention_weight
def _build_sentence_ids(self, src_ids): src_shape = L.shape(src_ids) src_seqlen = src_shape[1] src_batch = src_shape[0] slot_seqlen = self.slot_seqlen zeros = L.zeros([src_batch, slot_seqlen], "int64") ones = L.ones([src_batch, src_seqlen - slot_seqlen], "int64") sentence_ids = L.concat([zeros, ones], 1) sentence_ids.stop_gradient = True return sentence_ids
def forward(self, inputs, keys, values, lengths, start_index, speaker_embed=None, state=None, force_monotonic_attention=None, coeffs=None, window=(0, 4)): hidden = inputs for layer in self.prenet: hidden = layer(hidden, speaker_embed) attentions = [] # every layer of (B, T_dec, T_enc) attention final_state = [] # layers * (B, (k-1)d, C_dec) batch_size = inputs.shape[0] causal_padding_shape = (batch_size, self.kernel_size - 1, self.decoder_dim) for i in range(len(self.causal_convs)): if state is None: padding = F.zeros(causal_padding_shape, dtype="float32") else: padding = state[i] new_state = F.concat([padding, hidden], axis=1) # => to be used next step # causal conv, (B, T, C) hidden = self.causal_convs[i](hidden, speaker_embed, padding=padding) # attn prev_coeffs = None if coeffs is None else coeffs[i] force_monotonic = False if force_monotonic_attention is None else force_monotonic_attention[ i] context, attention = self.attention_blocks[i]( hidden, keys, values, lengths, speaker_embed, start_index, force_monotonic, prev_coeffs, window) # residual connextion (B, T_dec, C_dec) hidden = F.scale(hidden + context, np.sqrt(0.5)) attentions.append(attention) # layers * (B, T_dec, T_enc) # new state: shift a step, layers * (B, T, C) new_state = new_state[:, -(self.kernel_size - 1):, :] final_state.append(new_state) # predict mel spectrogram (B, 1, T_dec, r * C_in) decoded = self.out_affine(hidden) if self.has_bias: decoded *= F.sigmoid( F.unsqueeze(self.out_sp_affine(speaker_embed), [1])) return decoded, hidden, attentions, final_state
def compute_neuron_head_importance(args, model, dev_ds, place, model_cfg): n_layers, n_heads = model_cfg['num_hidden_layers'], model_cfg[ 'num_attention_heads'] head_importance = L.zeros(shape=[n_layers, n_heads], dtype='float32') head_mask = L.ones(shape=[n_layers, n_heads], dtype='float32') head_mask.stop_gradient = False intermediate_weight = [] intermediate_bias = [] output_weight = [] for name, w in model.named_parameters(): if 'ffn.i' in name: if len(w.shape) > 1: intermediate_weight.append(w) else: intermediate_bias.append(w) if 'ffn.o' in name: if len(w.shape) > 1: output_weight.append(w) neuron_importance = [] for w in intermediate_weight: neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32')) eval_task_names = ('mnli', 'mnli-mm') if args.task == 'mnli' else (args.task, ) for eval_task in eval_task_names: for batch in dev_ds.start(place): ids, sids, label = batch out = model(ids, sids, labels=label, head_mask=head_mask, num_layers=model_cfg['num_hidden_layers']) loss = out[0] loss.backward() head_importance += L.abs(FD.to_variable(head_mask.gradient())) for w1, b1, w2, current_importance in zip(intermediate_weight, intermediate_bias, output_weight, neuron_importance): current_importance += np.abs( (np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() * b1.gradient())) current_importance += np.abs( np.sum(w2.numpy() * w2.gradient(), axis=1)) return head_importance, neuron_importance
def get_attention_mask(mask, nhead): # mask: [bs, L] -> attn_mask: [bs, nhead, L, L] bs, l = mask.shape row_mask = L.expand(L.unsqueeze(mask, [2]), (1, 1, l)) # [bs, L, L] col_mask = L.expand(L.unsqueeze(mask, [1]), (1, l, 1)) # [bs, L, L] mask = L.logical_or(row_mask, col_mask) attn_mask = L.zeros([bs, l, l], dtype="float32") attn_mask = attn_mask.numpy() mask = mask.numpy() attn_mask[mask] = -1e8 attn_mask = dg.to_variable(attn_mask) attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, nhead, 1, 1)) # [bs, nhead, L1, L2] return attn_mask
def U(self): r"""量子线路的酉矩阵形式。 Returns: ComplexVariable: 当前线路的酉矩阵表示 代码示例: .. code-block:: python from paddle import fluid from paddle_quantum.circuit import UAnsatz n = 2 with fluid.dygraph.guard(): cir = UAnsatz(2) cir.h(0) cir.cnot([0, 1]) matrix = cir.U print("The unitary matrix of the circuit for Bell state preparation is\n",matrix.numpy()) :: The unitary matrix of the circuit for Bell state preparation is [[ 0.70710678+0.j 0. +0.j 0.70710678+0.j 0. +0.j] [ 0. +0.j 0.70710678+0.j 0. +0.j 0.70710678+0.j] [ 0. +0.j 0.70710678+0.j 0. +0.j -0.70710678+0.j] [ 0.70710678+0.j 0. +0.j -0.70710678+0.j 0. +0.j]] """ state = ComplexVariable(eye(2**self.n, dtype='float64'), zeros([2**self.n, 2**self.n], dtype='float64')) shape = (2**self.n, 2**self.n) num_ele = reduce(lambda x, y: x * y, shape) state = ComplexVariable(reshape(state.real, [num_ele]), reshape(state.imag, [num_ele])) for history_ele in self.__history: if history_ele[0] == 'u': state = StateTranfer(state, 'u', history_ele[1], history_ele[2]) elif history_ele[0] in {'x', 'y', 'z', 'h'}: state = StateTranfer(state, history_ele[0], history_ele[1], params=history_ele[2]) elif history_ele[0] == 'SWAP': state = StateTranfer(state, 'SWAP', history_ele[1]) elif history_ele[0] == 'CNOT': state = StateTranfer(state, 'CNOT', history_ele[1]) return ComplexVariable(reshape(state.real, shape), reshape(state.imag, shape))
def pad_packed_sequence(self, x, batch_sizes, unsorted_indices): """Pads a packed sequences.""" h_size = x.shape[1] split_x = layers.split(x, batch_sizes, dim=0) max_bs = batch_sizes[0] step_embs = [] for step, cur_bs in enumerate(batch_sizes): pad_emb = layers.zeros(shape=(max_bs - cur_bs, h_size), dtype=x.dtype) step_emb = layers.concat(input=(split_x[step], pad_emb)) step_embs.append(step_emb) new_x = layers.stack(step_embs, axis=1) new_x = layers.index_select(new_x, unsorted_indices) return new_x
def __init__(self, problem: MinimizationProblem, variable: TensorList, step_length: float, momentum: float = 0.0, debug=False, plotting=False, fig_num=(10, 11)): self.problem = problem self.x = variable self.step_legnth = step_length self.momentum = momentum self.debug = debug or plotting self.plotting = plotting self.fig_num = fig_num self.losses = layers.zeros((0, ), 'float32') self.gradient_mags = layers.zeros((0, ), 'float32') self.residuals = None self.clear_temp()
def forward(self, sen_q, seg_q, sen_k, seg_k): """ Input: im_q: a batch of query images im_k: a batch of key images Output: logits, targets """ # compute query features q = self.encoder_q(sen_q, seg_q) # queries: N q = norm(q, dim=1) # compute key features with D.no_grad(): # no gradient to keys self._momentum_update_key_encoder() # update the key encoder # shuffle for making use of BN #sen_k, idx_unshuffle = self._batch_shuffle_ddp(sen_k) k = self.encoder_k(sen_k, seg_k) # keys: NxC k = norm(k, dim=1) # undo shuffle #k = self._batch_unshuffle_ddp(k, idx_unshuffle) l_pos=L.unsqueeze(L.reduce_sum(L.elementwise_mul(q, k), dim=1),axes=[-1]) # negative logits: NxK l_neg = L.matmul(q, self.queue.detach()) # logits: Nx(1+K) logits = L.concat([l_pos, l_neg], axis=-1) # apply temperature logits /= self.T # labels: positive key indicators labels = L.zeros([logits.shape[0]], dtype='int64') self._dequeue_and_enqueue(k) if labels is not None: if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) return loss
def setUp(self): self.main_program = Program() switch_main_program(self.main_program) x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False rank_table_tensor = layers.data( 'rank_table_tensor', shape=[1], dtype='float32', lod_level=1) table = layers.lod_rank_table(x=rank_table_tensor) i = layers.zeros(dtype='int64', shape=[1]) self.mem1 = layers.shrink_memory(x=x, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) mem3_mean = layers.mean(self.mem3) append_backward(loss=mem3_mean) self.x_grad = self.main_program.global_block().var('x@GRAD')
def test_read_write(self): x = [ layers.data( name='x0', shape=[100]), layers.data( name='x1', shape=[100]), layers.data( name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(a0) mean_a1 = layers.mean(a1) mean_a2 = layers.mean(a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x[0]) mean_x1 = layers.mean(x[1]) mean_x2 = layers.mean(x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) scope = core.Scope() cpu = core.CPUPlace() exe = Executor(cpu) tensor = numpy.random.random(size=(100, 100)).astype('float32') outs = exe.run(feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=[a_sum, x_sum], scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() for item in exe.run( feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=topk_size) selected_ids, selected_scores = pd.beam_search( pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores