def addition_rnn_neural_network(inputs, labels): print('Build model...') # input_shape=(None, num_feature). _, hidden, _ = basic_lstm(inputs, None, None, HIDDEN_SIZE) expand_hidden = fluid.layers.expand(hidden[0], expand_times=[1, DIGITS + 1]) outputs = fluid.layers.reshape(expand_hidden, shape=[BATCH_SIZE, DIGITS + 1, HIDDEN_SIZE]) for _ in range(LAYERS): # outputs, _, _ = fluid.layers.lstm(outputs, init_h, init_c, MAXLEN, HIDDEN_SIZE, num_layers=1) outputs, _, _ = basic_lstm(outputs, None, None, HIDDEN_SIZE) probs = fluid.layers.fc(input=outputs, size=len(chars), act='softmax', num_flatten_dims=2) loss = fluid.layers.cross_entropy(input=probs, label=labels, soft_label=True) avg_loss = fluid.layers.mean(loss) preds = fluid.layers.reshape(probs, shape=[BATCH_SIZE * (DIGITS + 1), len(chars)]) labs = fluid.layers.reshape(fluid.layers.argmax(labels, axis=-1), shape=[BATCH_SIZE * (DIGITS + 1), 1]) accuracy = fluid.layers.accuracy(preds, labs) return avg_loss, accuracy
def babirnn_neural_network(sentence, question, answer): encoded_sentence_emb = fluid.layers.embedding( input=sentence, size=[vocab_size, EMBED_HIDDEN_SIZE], is_sparse=True) _, encoded_sentence, _ = basic_lstm(encoded_sentence_emb, None, None, SENT_HIDDEN_SIZE) encoded_question_emb = fluid.layers.embedding( input=question, size=[vocab_size, EMBED_HIDDEN_SIZE], is_sparse=True) _, encoded_question, _ = basic_lstm(encoded_question_emb, None, None, QUERY_HIDDEN_SIZE) merged = fluid.layers.concat( input=[encoded_sentence[0], encoded_question[0]], axis=-1) preds = fluid.layers.fc(input=merged, size=vocab_size, act='softmax') # loss loss = fluid.layers.cross_entropy(input=preds, label=answer, soft_label=True) avg_loss = fluid.layers.mean(loss) label = fluid.layers.reshape(fluid.layers.argmax(answer, axis=-1), shape=[-1, 1]) accuracy = fluid.layers.accuracy(input=preds, label=label) return preds, avg_loss, accuracy
def hierarchical_rnn_neural_network(img, label): img = (img + 1) / 2 # [-1, 1] --> [0, 1] encoded_rows, _, _ = basic_lstm(img, None, None, row_hidden) _, encoded_columns, _ = basic_lstm(encoded_rows, None, None, col_hidden) prediction = fluid.layers.fc(encoded_columns[0], num_classes, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) accuracy = fluid.layers.accuracy(input=prediction, label=label) return avg_loss, accuracy
def test_name(self): batch_size = 20 input_size = 128 hidden_size = 256 num_layers = 2 dropout = 0.5 bidirectional = True batch_first = False with new_program_scope(): input = layers.data(name="input", shape=[-1, batch_size, input_size], dtype='float32') pre_hidden = layers.data(name="pre_hidden", shape=[-1, hidden_size], dtype='float32') pre_cell = layers.data(name="pre_cell", shape=[-1, hidden_size], dtype='float32') sequence_length = layers.data(name="sequence_length", shape=[-1], dtype='int32') rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \ hidden_size, num_layers = num_layers, \ sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ param_attr=fluid.ParamAttr( name ="test1"), bias_attr=fluid.ParamAttr( name = "test1"), \ batch_first = batch_first) var_list = fluid.io.get_program_parameter( fluid.default_main_program()) for var in var_list: self.assertTrue(var.name in self.name_set)
def _build_encoder(self): self.enc_output, enc_last_hidden, enc_last_cell = basic_lstm( self.src_emb, None, None, self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, \ dropout_prob=self.dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \ sequence_length=self.src_sequence_length) return self.enc_output, enc_last_hidden, enc_last_cell
def build_model(is_training): input_text = fluid.layers.data(name = "text", shape = [-1, max_len, 1], dtype = "int64") input_re_text = fluid.layers.data(name = "re_text", shape = [-1, max_len, 1], dtype = "int64") input_text_len = fluid.layers.data(name = "text_len", shape = [-1], dtype = "int32") if is_training: input_label = fluid.layers.data(name = "label", shape = [-1, 1], dtype = "int64") input_text_emb = fluid.layers.embedding(input = input_text, size = [vocab_size, embedding_dims], param_attr = ParamAttr(name = "shared_emb")) input_re_text_emb = fluid.layers.embedding(input = input_re_text, size = [vocab_size, embedding_dims], param_attr = ParamAttr(name = "shared_emb")) _, _, input_text_lstm = basic_lstm(input_text_emb, None, None, lstm_hidden_size, num_layers = 1, sequence_length = input_text_len) _, _, input_re_text_lstm = basic_lstm(input_re_text_emb, None, None, lstm_hidden_size, num_layers = 1, sequence_length = input_text_len) input_text_lstm = fluid.layers.transpose(input_text_lstm, perm = [1, 0, 2]) input_re_text_lstm = fluid.layers.transpose(input_re_text_lstm, perm = [1, 0, 2]) input_text_lstm = fluid.layers.reshape(input_text_lstm, shape = [-1, lstm_hidden_size]) input_re_text_lstm = fluid.layers.reshape(input_re_text_lstm, shape = [-1, lstm_hidden_size]) input_text_hidden = fluid.layers.concat([input_text_lstm, input_re_text_lstm], axis = -1) input_text_hidden = fluid.layers.dropout(input_text_lstm, 0.5, is_test = not is_training) input_text_hidden = fluid.layers.fc(input_text_hidden, size = 2, act = "softmax") if is_training: loss = fluid.layers.cross_entropy(input_text_hidden, input_label) loss = fluid.layers.reduce_mean(loss) optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01) optimizer.minimize(loss) return loss else: return input_text_hidden
def lstm_text_generation_neural_network(sentences, next_chars=None): print('Build model...') _, hidden, _ = basic_lstm(sentences, None, None, hidden_size=128) preds = fluid.layers.fc(input=hidden[0], size=len(chars), act='softmax') # loss loss = fluid.layers.cross_entropy(input=preds, label=next_chars, soft_label=True) avg_loss = fluid.layers.mean(loss) label = fluid.layers.reshape(fluid.layers.argmax(next_chars, axis=-1), shape=[-1, 1]) accuracy = fluid.layers.accuracy(input=preds, label=label) return preds, avg_loss, accuracy
def _build_rnn_graph(self, inputs, init_hidden, init_cell, sequence_length_ph): rnn_out, last_hidden, last_cell = basic_lstm( input=inputs, init_hidden=init_hidden, init_cell=init_cell, hidden_size=self.n_hidden_, num_layers=self.num_layers_, batch_first=True, dropout_prob=self.dropout_prob_, sequence_length=sequence_length_ph, param_attr=ParamAttr( initializer=fluid.initializer.UniformInitializer( low=-self.init_scale_, high=self.init_scale_)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)), forget_bias=0.0) return rnn_out, last_hidden, last_cell
def _build_decoder(self, enc_last_hidden, enc_last_cell, mode='train'): softmax_weight = layers.create_parameter([self.hidden_size, self.tar_vocab_size], dtype="float32", name="softmax_weight", \ default_initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale)) if mode == 'train': #fluid.layers.Print(self.tar_emb) #fluid.layers.Print(enc_last_hidden) #fluid.layers.Print(enc_last_cell) dec_output, dec_last_hidden, dec_last_cell = basic_lstm( self.tar_emb, enc_last_hidden, enc_last_cell, \ self.hidden_size, num_layers=self.num_layers, \ batch_first=self.batch_first, \ dropout_prob=self.dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) )) dec_output = layers.matmul(dec_output, softmax_weight) return dec_output else: print("mode not supprt", mode)
def _build_decoder(self, enc_last_hidden, enc_last_cell, mode='train', beam_size=10): softmax_weight = layers.create_parameter([self.hidden_size, self.tar_vocab_size], dtype="float32", name="softmax_weight", \ default_initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale)) if mode == 'train': dec_output, dec_last_hidden, dec_last_cell = basic_lstm( self.tar_emb, enc_last_hidden, enc_last_cell, \ self.hidden_size, num_layers=self.num_layers, \ batch_first=self.batch_first, \ dropout_prob=self.dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) )) dec_output = layers.matmul(dec_output, softmax_weight) return dec_output elif mode == 'beam_search' or mode == 'greedy_search': dec_unit_list = [] name = 'basic_lstm' for i in range(self.num_layers): new_name = name + "_layers_" + str(i) dec_unit_list.append( BasicLSTMUnit(new_name, self.hidden_size, dtype='float32')) def decoder_step(current_in, pre_hidden_array, pre_cell_array): new_hidden_array = [] new_cell_array = [] step_in = current_in for i in range(self.num_layers): pre_hidden = pre_hidden_array[i] pre_cell = pre_cell_array[i] new_hidden, new_cell = dec_unit_list[i](step_in, pre_hidden, pre_cell) new_hidden_array.append(new_hidden) new_cell_array.append(new_cell) step_in = new_hidden return step_in, new_hidden_array, new_cell_array if mode == 'beam_search': max_src_seq_len = layers.shape(self.src)[1] max_length = max_src_seq_len * 2 #max_length = layers.fill_constant( [1], dtype='int32', value = 10) pre_ids = layers.fill_constant([1, 1], dtype='int64', value=1) full_ids = layers.fill_constant([1, 1], dtype='int64', value=1) score = layers.fill_constant([1], dtype='float32', value=0.0) #eos_ids = layers.fill_constant( [1, 1], dtype='int64', value=2) pre_hidden_array = [] pre_cell_array = [] pre_feed = layers.fill_constant([beam_size, self.hidden_size], dtype='float32', value=0) for i in range(self.num_layers): pre_hidden_array.append( layers.expand(enc_last_hidden[i], [beam_size, 1])) pre_cell_array.append( layers.expand(enc_last_cell[i], [beam_size, 1])) eos_ids = layers.fill_constant([beam_size], dtype='int64', value=2) init_score = np.zeros((beam_size)).astype('float32') init_score[1:] = -INF pre_score = layers.assign(init_score) #pre_score = layers.fill_constant( [1,], dtype='float32', value= 0.0) tokens = layers.fill_constant([beam_size, 1], dtype='int64', value=1) enc_memory = layers.expand(self.enc_output, [beam_size, 1, 1]) pre_tokens = layers.fill_constant([beam_size, 1], dtype='int64', value=1) finished_seq = layers.fill_constant([beam_size, 1], dtype='int64', value=0) finished_scores = layers.fill_constant([beam_size], dtype='float32', value=-INF) finished_flag = layers.fill_constant([beam_size], dtype='float32', value=0.0) step_idx = layers.fill_constant(shape=[1], dtype='int32', value=0) cond = layers.less_than(x=step_idx, y=max_length) # default force_cpu=True parent_idx = layers.fill_constant([1], dtype='int32', value=0) while_op = layers.While(cond) def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, beam_size, select_beam=None, generate_id=None): scores = layers.reshape(scores, shape=[1, -1]) _, topk_indexs = layers.topk(scores, k=beam_size) topk_indexs = layers.reshape(topk_indexs, shape=[-1]) # gather result top_seq = layers.gather(sequences, topk_indexs) topk_flags = layers.gather(flags, topk_indexs) topk_gather_scores = layers.gather(scores_to_gather, topk_indexs) if select_beam: topk_beam = layers.gather(select_beam, topk_indexs) else: topk_beam = select_beam if generate_id: topk_id = layers.gather(generate_id, topk_indexs) else: topk_id = generate_id return top_seq, topk_gather_scores, topk_flags, topk_beam, topk_id def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished, select_beam, generate_id): curr_scores += curr_finished * -INF return compute_topk_scores_and_seq(curr_seq, curr_scores, curr_log_probs, curr_finished, beam_size, select_beam, generate_id=generate_id) def grow_finished(finished_seq, finished_scores, finished_flag, curr_seq, curr_scores, curr_finished): finished_seq = layers.concat([ finished_seq, layers.fill_constant( [beam_size, 1], dtype='int64', value=1) ], axis=1) curr_scores += (1.0 - curr_finished) * -INF #layers.Print( curr_scores, message="curr scores") curr_finished_seq = layers.concat([finished_seq, curr_seq], axis=0) curr_finished_scores = layers.concat( [finished_scores, curr_scores], axis=0) curr_finished_flags = layers.concat( [finished_flag, curr_finished], axis=0) return compute_topk_scores_and_seq(curr_finished_seq, curr_finished_scores, curr_finished_scores, curr_finished_flags, beam_size) def is_finished(alive_log_prob, finished_scores, finished_in_finished): max_out_len = 200 max_length_penalty = layers.pow( layers.fill_constant([1], dtype='float32', value=((5.0 + max_out_len) / 6.0)), alpha) lower_bound_alive_score = layers.slice( alive_log_prob, starts=[0], ends=[1], axes=[0]) / max_length_penalty lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished lowest_score_of_fininshed_in_finished += ( 1.0 - finished_in_finished) * -INF lowest_score_of_fininshed_in_finished = layers.reduce_min( lowest_score_of_fininshed_in_finished) met = layers.less_than( lower_bound_alive_score, lowest_score_of_fininshed_in_finished) met = layers.cast(met, 'float32') bound_is_met = layers.reduce_sum(met) finished_eos_num = layers.reduce_sum(finished_in_finished) finish_cond = layers.less_than( finished_eos_num, layers.fill_constant([1], dtype='float32', value=beam_size)) return finish_cond def grow_top_k(step_idx, alive_seq, alive_log_prob, parant_idx): pre_ids = alive_seq dec_step_emb = layers.embedding( input=pre_ids, size=[self.tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='target_embedding', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale))) dec_att_out, new_hidden_array, new_cell_array = decoder_step( dec_step_emb, pre_hidden_array, pre_cell_array) projection = layers.matmul(dec_att_out, softmax_weight) logits = layers.softmax(projection) current_log = layers.elementwise_add(x=layers.log(logits), y=alive_log_prob, axis=0) base_1 = layers.cast(step_idx, 'float32') + 6.0 base_1 /= 6.0 length_penalty = layers.pow(base_1, alpha) len_pen = layers.pow( ((5. + layers.cast(step_idx + 1, 'float32')) / 6.), alpha) current_log = layers.reshape(current_log, shape=[1, -1]) current_log = current_log / length_penalty topk_scores, topk_indices = layers.topk(input=current_log, k=beam_size) topk_scores = layers.reshape(topk_scores, shape=[-1]) topk_log_probs = topk_scores * length_penalty generate_id = layers.reshape( topk_indices, shape=[-1]) % self.tar_vocab_size selected_beam = layers.reshape( topk_indices, shape=[-1]) // self.tar_vocab_size topk_finished = layers.equal(generate_id, eos_ids) topk_finished = layers.cast(topk_finished, 'float32') generate_id = layers.reshape(generate_id, shape=[-1, 1]) pre_tokens_list = layers.gather(tokens, selected_beam) full_tokens_list = layers.concat( [pre_tokens_list, generate_id], axis=1) return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \ dec_att_out, new_hidden_array, new_cell_array with while_op.block(): topk_seq, topk_log_probs, topk_scores, topk_finished, topk_beam, topk_generate_id, attention_out, new_hidden_array, new_cell_array = \ grow_top_k( step_idx, pre_tokens, pre_score, parent_idx) alive_seq, alive_log_prob, _, alive_beam, alive_id = grow_alive( topk_seq, topk_scores, topk_log_probs, topk_finished, topk_beam, topk_generate_id) finished_seq_2, finished_scores_2, finished_flags_2, _, _ = grow_finished( finished_seq, finished_scores, finished_flag, topk_seq, topk_scores, topk_finished) finished_cond = is_finished(alive_log_prob, finished_scores_2, finished_flags_2) layers.increment(x=step_idx, value=1.0, in_place=True) layers.assign(alive_beam, parent_idx) layers.assign(alive_id, pre_tokens) layers.assign(alive_log_prob, pre_score) layers.assign(alive_seq, tokens) layers.assign(finished_seq_2, finished_seq) layers.assign(finished_scores_2, finished_scores) layers.assign(finished_flags_2, finished_flag) # update init_hidden, init_cell, input_feed new_feed = layers.gather(attention_out, parent_idx) layers.assign(new_feed, pre_feed) for i in range(self.num_layers): new_hidden_var = layers.gather(new_hidden_array[i], parent_idx) layers.assign(new_hidden_var, pre_hidden_array[i]) new_cell_var = layers.gather(new_cell_array[i], parent_idx) layers.assign(new_cell_var, pre_cell_array[i]) length_cond = layers.less_than(x=step_idx, y=max_length) layers.logical_and(x=length_cond, y=finished_cond, out=cond) tokens_with_eos = tokens all_seq = layers.concat([tokens_with_eos, finished_seq], axis=0) all_score = layers.concat([pre_score, finished_scores], axis=0) _, topk_index = layers.topk(all_score, k=beam_size) topk_index = layers.reshape(topk_index, shape=[-1]) final_seq = layers.gather(all_seq, topk_index) final_score = layers.gather(all_score, topk_index) return final_seq elif mode == 'greedy_search': max_src_seq_len = layers.shape(self.src)[1] max_length = max_src_seq_len * 2 #max_length = layers.fill_constant( [1], dtype='int32', value = 10) pre_ids = layers.fill_constant([1, 1], dtype='int64', value=1) full_ids = layers.fill_constant([1, 1], dtype='int64', value=1) score = layers.fill_constant([1], dtype='float32', value=0.0) eos_ids = layers.fill_constant([1, 1], dtype='int64', value=2) pre_hidden_array = [] pre_cell_array = [] pre_feed = layers.fill_constant([1, self.hidden_size], dtype='float32', value=0) for i in range(self.num_layers): pre_hidden_array.append(enc_last_hidden[i]) pre_cell_array.append(enc_last_cell[i]) #pre_hidden_array.append( layers.fill_constant( [1, hidden_size], dtype='float32', value=0) ) #pre_cell_array.append( layers.fill_constant( [1, hidden_size], dtype='float32', value=0) ) step_idx = layers.fill_constant(shape=[1], dtype='int32', value=0) cond = layers.less_than(x=step_idx, y=max_length) # default force_cpu=True while_op = layers.While(cond) with while_op.block(): dec_step_emb = layers.embedding( input=pre_ids, size=[self.tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='target_embedding', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale))) dec_att_out, new_hidden_array, new_cell_array = decoder_step( dec_step_emb, pre_hidden_array, pre_cell_array) projection = layers.matmul(dec_att_out, softmax_weight) logits = layers.softmax(projection) logits = layers.log(logits) current_log = layers.elementwise_add(logits, score, axis=0) topk_score, topk_indices = layers.topk(input=current_log, k=1) new_ids = layers.concat([full_ids, topk_indices]) layers.assign(new_ids, full_ids) #layers.Print( full_ids, message="ful ids") layers.assign(topk_score, score) layers.assign(topk_indices, pre_ids) layers.assign(dec_att_out, pre_feed) for i in range(self.num_layers): layers.assign(new_hidden_array[i], pre_hidden_array[i]) layers.assign(new_cell_array[i], pre_cell_array[i]) layers.increment(x=step_idx, value=1.0, in_place=True) eos_met = layers.not_equal(topk_indices, eos_ids) length_cond = layers.less_than(x=step_idx, y=max_length) layers.logical_and(x=length_cond, y=eos_met, out=cond) return full_ids raise Exception("error") else: print("mode not supprt", mode)
def build_model(is_training): input_text = fluid.layers.data(name="text", shape=[-1, max_len, 1], dtype="int64") input_text_len = fluid.layers.data(name="text_len", shape=[-1], dtype="int32") if is_training: input_label = fluid.layers.data(name="label", shape=[-1, 1], dtype="int64") input_text_emb = fluid.layers.embedding( input=input_text, size=[vocab_size, embedding_dims], param_attr=ParamAttr(name="shared_emb")) input_text_emb = fluid.layers.transpose(input_text_emb, perm=[0, 2, 1]) input_text_emb = fluid.layers.reshape( input_text_emb, shape=[-1, embedding_dims, max_len, 1]) input_text_conv = fluid.layers.conv2d(input=input_text_emb, num_filters=filters, filter_size=(kernel_size, 1), stride=(conv_stride, 1)) input_text_conv = fluid.layers.relu(input_text_conv) input_text_conv = fluid.layers.pool2d(input_text_conv, pool_size=(pool_size, 1), pool_stride=(pool_stride, 1)) input_text_conv = fluid.layers.squeeze(input_text_conv, axes=[3]) _, _, input_text_lstm = basic_lstm(input_text_conv, None, None, lstm_hidden_size, num_layers=1, sequence_length=input_text_len) input_text_lstm = fluid.layers.transpose(input_text_lstm, perm=[1, 0, 2]) input_text_lstm = fluid.layers.reshape(input_text_lstm, shape=[-1, lstm_hidden_size]) input_text_hidden = fluid.layers.fc(input_text_lstm, size=2, act="softmax") if is_training: loss = fluid.layers.cross_entropy(input_text_hidden, input_label) loss = fluid.layers.reduce_mean(loss) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01) optimizer.minimize(loss) return loss else: return input_text_hidden
def test_run(self): inputs_basic_lstm = fluid.data( name='inputs_basic_lstm', shape=[None, None, self.input_size], dtype='float32') sequence_length = fluid.data( name="sequence_length", shape=[None], dtype='int64') inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2]) cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn") output, final_state = dynamic_rnn( cell=cell, inputs=inputs_dynamic_rnn, sequence_length=sequence_length, is_reverse=False) output_new = layers.transpose(output, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = basic_lstm(inputs_basic_lstm, None, None, self.hidden_size, num_layers=1, \ batch_first = False, bidirectional=False, sequence_length=sequence_length, forget_bias = 1.0) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) inputs_basic_lstm_np = np.random.uniform( -0.1, 0.1, (self.seq_len, self.batch_size, self.input_size)).astype('float32') sequence_length_np = np.ones( self.batch_size, dtype='int64') * self.seq_len inputs_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.input_size)).astype('float32') pre_hidden_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') pre_cell_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') param_names = [[ "LSTMCell_for_rnn/BasicLSTMUnit_0.w_0", "basic_lstm_layers_0/BasicLSTMUnit_0.w_0" ], [ "LSTMCell_for_rnn/BasicLSTMUnit_0.b_0", "basic_lstm_layers_0/BasicLSTMUnit_0.b_0" ]] for names in param_names: param = np.array(fluid.global_scope().find_var(names[0]).get_tensor( )) param = np.random.uniform( -0.1, 0.1, size=param.shape).astype('float32') fluid.global_scope().find_var(names[0]).get_tensor().set(param, place) fluid.global_scope().find_var(names[1]).get_tensor().set(param, place) out = exe.run(feed={ 'inputs_basic_lstm': inputs_basic_lstm_np, 'sequence_length': sequence_length_np, 'inputs': inputs_np, 'pre_hidden': pre_hidden_np, 'pre_cell': pre_cell_np }, fetch_list=[output_new, rnn_out]) self.assertTrue(np.allclose(out[0], out[1], rtol=1e-4))
input_encoder_m = layers.embedding(input = input_sequence, size = [vocab_size, 64]) input_encoder_m = layers.dropout(input_encoder_m, 0.3) input_encoder_c = layers.embedding(input = input_sequence, size = [vocab_size, query_maxlen]) input_encoder_c = layers.dropout(input_encoder_c, 0.3) question_encoder = layers.embedding(input = input_sequence, size = [vocab_size, 64]) question_encoder = layers.dropout(question_encoder, 0.3) match = layers.elementwise_mul(input_encoder_m, question_encoder) response = layers.softmax(match, axis = -1) answer = layers.concat([response, question_encoder], axis = -1) _, _, answer = basic_lstm(answer, None, None, 32) answer = layers.transpose(answer, perm = (1, 0, 2)) answer = layers.reshape(answer, shape = [-1, 32]) answer = layers.dropout(answer, 0.3) answer = layers.fc(answer, size = vocab_size, act = "softmax") loss = layers.cross_entropy(answer, true_answer) loss = layers.reduce_mean(loss) optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01) optimizer.minimize(loss) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program())
def lm_model(hidden_size, vocab_size, num_layers=2, num_steps=20, init_scale=0.1, dropout=None, rnn_model='static', use_dataloader=False): def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) cell_array.append(pre_cell) input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): input = rnn.step_input(input_embedding) for k in range(num_layers): pre_hidden = rnn.memory(init=hidden_array[k]) pre_cell = rnn.memory(init=cell_array[k]) weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i = layers.slice( gate_input, axes=[1], starts=[0], ends=[hidden_size]) j = layers.slice( gate_input, axes=[1], starts=[hidden_size], ends=[hidden_size * 2]) f = layers.slice( gate_input, axes=[1], starts=[hidden_size * 2], ends=[hidden_size * 3]) o = layers.slice( gate_input, axes=[1], starts=[hidden_size * 3], ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) rnn.update_memory(pre_hidden, m) rnn.update_memory(pre_cell, c) rnn.step_output(m) rnn.step_output(c) input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') rnn.step_output(input) rnnout = rnn() last_hidden_array = [] last_cell_array = [] real_res = rnnout[-1] for i in range(num_layers): m = rnnout[i * 2] c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True last_h = layers.slice( m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_hidden_array.append(last_h) last_c = layers.slice( c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) return real_res, last_hidden, last_cell def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape( pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape( pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split( input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape( last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape( last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape( real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell x = fluid.data(name="x", shape=[None, num_steps, 1], dtype='int64') y = fluid.data(name="y", shape=[None, 1], dtype='int64') if use_dataloader: dataloader = fluid.io.DataLoader.from_generator( feed_list=[x, y], capacity=16, iterable=False, use_double_buffer=True) init_hidden = fluid.data( name="init_hidden", shape=[None, num_layers, hidden_size], dtype='float32') init_cell = fluid.data( name="init_cell", shape=[None, num_layers, hidden_size], dtype='float32') init_hidden = layers.transpose(init_hidden, perm=[1, 0, 2]) init_cell = layers.transpose(init_cell, perm=[1, 0, 2]) init_hidden_reshape = layers.reshape( init_hidden, shape=[num_layers, -1, hidden_size]) init_cell_reshape = layers.reshape( init_cell, shape=[num_layers, -1, hidden_size]) x_emb = layers.embedding( input=x, size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) x_emb = layers.reshape( x_emb, shape=[-1, num_steps, hidden_size], inplace=True) if dropout != None and dropout > 0.0: x_emb = layers.dropout( x_emb, dropout_prob=dropout, dropout_implementation='upscale_in_train') if rnn_model == "padding": rnn_out, last_hidden, last_cell = padding_rnn( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "static": rnn_out, last_hidden, last_cell = encoder_static( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "cudnn": x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden_reshape, init_cell_reshape, num_steps, hidden_size, num_layers, is_bidirec=False, default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \ num_layers=num_layers, batch_first=True, dropout_prob=dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \ forget_bias = 0.0) else: print("type not support") return rnn_out = layers.reshape( rnn_out, shape=[-1, num_steps, hidden_size], inplace=True) softmax_weight = layers.create_parameter( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) softmax_bias = layers.create_parameter( [vocab_size], dtype="float32", name='softmax_bias', default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) projection = layers.matmul(rnn_out, softmax_weight) projection = layers.elementwise_add(projection, softmax_bias) projection = layers.reshape( projection, shape=[-1, vocab_size], inplace=True) loss = layers.softmax_with_cross_entropy( logits=projection, label=y, soft_label=False) loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True) loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) loss.persistable = True # This will feed last_hidden, last_cell to init_hidden, init_cell, which # can be used directly in next batch. This can avoid the fetching of # last_hidden and last_cell and feeding of init_hidden and init_cell in # each training step. last_hidden = layers.transpose(last_hidden, perm=[1, 0, 2]) last_cell = layers.transpose(last_cell, perm=[1, 0, 2]) feeding_list = ['x', 'y', 'init_hidden', 'init_cell'] if use_dataloader: return loss, last_hidden, last_cell, feeding_list, dataloader else: return loss, last_hidden, last_cell, feeding_list
def test_run(self): x = layers.data(name='x', shape=[-1, self.batch_size, self.hidden_size], dtype='float32') sequence_length = layers.data(name="sequence_length", shape=[-1], dtype='float32') rnn_out, last_hidden, last_cell = basic_lstm( x, None, None, self.hidden_size, num_layers=self.num_layers, \ batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length, forget_bias = self.forget_bias ) last_hidden.persisbale = True rnn_out.persisbale = True if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) param_list = fluid.default_main_program().block(0).all_parameters() # process weight and bias gate_weight = [] gate_bias = [] for i in range(self.num_layers): gate_w_name = "basic_lstm_layers_" + str( i) + "/BasicLSTMUnit_0.w_0" gate_b_name = "basic_lstm_layers_" + str( i) + "/BasicLSTMUnit_0.b_0" gate_w = np.array( fluid.global_scope().find_var(gate_w_name).get_tensor()) gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set( gate_w, place) gate_b = np.array( fluid.global_scope().find_var(gate_b_name).get_tensor()) gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set( gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) if self.is_bidirect: for i in range(self.num_layers): gate_w_name = "basic_lstm_reverse_layers_" + str( i) + "/BasicLSTMUnit_0.w_0" gate_b_name = "basic_lstm_reverse_layers_" + str( i) + "/BasicLSTMUnit_0.b_0" gate_w = np.array( fluid.global_scope().find_var(gate_w_name).get_tensor()) gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set( gate_w, place) gate_b = np.array( fluid.global_scope().find_var(gate_b_name).get_tensor()) gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set( gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) step_input_np = np.random.uniform(-0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size)).astype('float32') sequence_length_np = np.random.randint( self.seq_len // 2, self.seq_len, size=(self.batch_size)).astype('int64') out = exe.run(feed={ 'x': step_input_np, 'sequence_length': sequence_length_np }, fetch_list=[rnn_out, last_hidden, last_cell]) api_rnn_out = out[0] api_last_hidden = out[1] api_last_cell = out[2] np_out = lstm_np(step_input_np, None, None, self.hidden_size, gate_weight, gate_bias, num_layers=self.num_layers, batch_first=self.batch_first, is_bidirect=self.is_bidirect, sequence_length=sequence_length_np) self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0)) self.assertTrue( np.allclose(api_last_hidden, np_out[1], rtol=1e-4, atol=0)) self.assertTrue( np.allclose(api_last_cell, np_out[2], rtol=1e-4, atol=0))
x = layers.data(name="x", shape=[-1, tsteps, 1], dtype="float32") y = layers.data(name="y", shape=[-1, 1], dtype="float32") lstm1_init_h = layers.data(name="lstm1_h", shape=[1, batch_size, 50], dtype="float32", append_batch_size=False) lstm1_init_c = layers.data(name="lstm1_c", shape=[1, batch_size, 50], dtype="float32", append_batch_size=False) lstm1, lstm1_h, lstm1_c = basic_lstm(x, lstm1_init_h, lstm1_init_c, 50, num_layers=1) _, lstm2_h, lstm2_c = basic_lstm(lstm1, lstm1_h, lstm1_c, 50, num_layers=1) lstm2_c_batch_first = layers.transpose(lstm2_c, [1, 0, 2]) pred = layers.fc(lstm2_c_batch_first, 1) loss = layers.reduce_mean(layers.square(pred - y)) test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.RMSPropOptimizer(learning_rate=0.001) optimizer.minimize(loss) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program())
def lm_model(hidden_size, vocab_size, batch_size, num_layers=2, num_steps=20, init_scale=0.1, dropout=None, rnn_model='static', use_dataloader=False): if rnn_model == 'lod': x = fluid.data(name="x", shape=[None, 1], dtype='int64', lod_level=1) y = fluid.data(name="y", shape=[None, 1], dtype='int64', lod_level=1) if use_dataloader: dataloader = fluid.io.DataLoader.from_generator( feed_list=[x, y], capacity=16, iterable=False, use_double_buffer=True) init_hidden = fluid.data(name="init_hidden", shape=[None, num_layers, hidden_size], dtype='float32') init_cell = fluid.data(name="init_cell", shape=[None, num_layers, hidden_size], dtype='float32') init_cell.persistable = True init_hidden.persistable = True x_emb = layers.embedding( input=x, size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) if dropout != None and dropout > 0.0: x_emb = layers.dropout(x_emb, dropout_prob=dropout, dropout_implementation='upscale_in_train') lstm_input = x_emb last_hidden_array = [] last_cell_array = [] for i in range(num_layers): lstm_input = fluid.layers.fc(input=lstm_input, size=hidden_size * 4, bias_attr=False) hidden, cell = fluid.layers.dynamic_lstm( input=lstm_input, size=hidden_size * 4, h_0=init_hidden[:, i, :], c_0=init_cell[:, i, :], use_peepholes=False, param_attr=fluid.ParamAttr( initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) last_hidden = layers.sequence_pool(hidden, pool_type='last') last_cell = layers.sequence_pool(cell, pool_type='last') last_hidden_array.append(last_hidden) last_cell_array.append(last_cell) lstm_input = hidden if dropout != None and dropout > 0.0: lstm_input = layers.dropout( lstm_input, dropout_prob=dropout, dropout_implementation='upscale_in_train') last_hidden = layers.stack(last_hidden_array, 1) last_cell = layers.stack(last_cell_array, 1) softmax_weight = layers.create_parameter( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) softmax_bias = layers.create_parameter( [vocab_size], dtype="float32", name='softmax_bias', default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) projection = layers.matmul(hidden, softmax_weight) projection = layers.elementwise_add(projection, softmax_bias, axis=-1) loss = layers.softmax_with_cross_entropy(logits=projection, label=y, soft_label=False) loss = layers.sequence_pool(loss, pool_type='sum') loss = layers.reduce_mean(loss) feeding_list = ['x', 'y', 'init_hidden', 'init_cell'] if use_dataloader: return loss, last_hidden, last_cell, feeding_list, dataloader else: return loss, last_hidden, last_cell, feeding_list def seq2seq_api_rnn(input_embedding, len=3, init_hiddens=None, init_cells=None): class EncoderCell(layers.RNNCell): def __init__(self, num_layers, hidden_size, dropout_prob=0., forget_bias=0.): self.num_layers = num_layers self.hidden_size = hidden_size self.dropout_prob = dropout_prob self.lstm_cells = [] for i in range(num_layers): self.lstm_cells.append( layers.LSTMCell( hidden_size, forget_bias=forget_bias, param_attr=fluid.ParamAttr( initializer=fluid.initializer. UniformInitializer(low=-init_scale, high=init_scale)))) def call(self, step_input, states): new_states = [] for i in range(self.num_layers): out, new_state = self.lstm_cells[i](step_input, states[i]) step_input = layers.dropout( out, self.dropout_prob, dropout_implementation='upscale_in_train' ) if self.dropout_prob > 0 else out new_states.append(new_state) return step_input, new_states cell = EncoderCell(num_layers, hidden_size, dropout) output, new_states = layers.rnn( cell, inputs=input_embedding, initial_states=[[hidden, cell] for hidden, cell in zip([ layers.reshape(init_hidden, shape=[-1, hidden_size]) for init_hidden in layers.split( init_hiddens, num_or_sections=num_layers, dim=0) ], [ layers.reshape(init_cell, shape=[-1, hidden_size]) for init_cell in layers.split( init_cells, num_or_sections=num_layers, dim=0) ])], time_major=False) last_hidden = layers.stack([hidden for hidden, _ in new_states], 0) last_cell = layers.stack([cell for _, cell in new_states], 0) return output, last_hidden, last_cell def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) cell_array.append(pre_cell) input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): input = rnn.step_input(input_embedding) for k in range(num_layers): pre_hidden = rnn.memory(init=hidden_array[k]) pre_cell = rnn.memory(init=cell_array[k]) weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i = layers.slice(gate_input, axes=[1], starts=[0], ends=[hidden_size]) j = layers.slice(gate_input, axes=[1], starts=[hidden_size], ends=[hidden_size * 2]) f = layers.slice(gate_input, axes=[1], starts=[hidden_size * 2], ends=[hidden_size * 3]) o = layers.slice(gate_input, axes=[1], starts=[hidden_size * 3], ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) rnn.update_memory(pre_hidden, m) rnn.update_memory(pre_cell, c) rnn.step_output(m) rnn.step_output(c) input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') rnn.step_output(input) rnnout = rnn() last_hidden_array = [] last_cell_array = [] real_res = rnnout[-1] for i in range(num_layers): m = rnnout[i * 2] c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True last_h = layers.slice(m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_hidden_array.append(last_h) last_c = layers.slice(c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) return real_res, last_hidden, last_cell def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split(input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) try: from paddle.fluid.contrib.layers import fused_elemwise_activation # fluid.contrib.layers.fused_elemwise_activation can do a fused # operation, like: # 1) x + sigmoid(y); x + tanh(y) # 2) tanh(x + y) # Now the unary operation supported in this fused op is limit, and # we will extent this operation to support more unary operations and # do this kind of fusion automitically in future version of paddle.fluid. # layers.sigmoid(i) * layers.tanh(j) tmp0 = fused_elemwise_activation( x=layers.tanh(j), y=i, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) # pre_cell * layers.sigmoid(f) tmp1 = fused_elemwise_activation( x=pre_cell, y=f, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) c = tmp0 + tmp1 # layers.tanh(c) * layers.sigmoid(o) m = fused_elemwise_activation( x=layers.tanh(c), y=o, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) except ImportError: c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape(last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape(last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape(real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell batch_size_each = batch_size // fluid.core.get_cuda_device_count() x = fluid.data( # name="x", shape=[batch_size_each, num_steps, 1], dtype='int64') name="x", shape=[None, num_steps, 1], dtype='int64') y = fluid.data( # name="y", shape=[batch_size_each * num_steps, 1], dtype='int64') name="y", shape=[None, 1], dtype='int64') if use_dataloader: dataloader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=16, iterable=False, use_double_buffer=True) init_hidden = fluid.data( name="init_hidden", # shape=[num_layers, batch_size_each, hidden_size], shape=[num_layers, None, hidden_size], dtype='float32') init_cell = fluid.data( name="init_cell", # shape=[num_layers, batch_size_each, hidden_size], shape=[num_layers, None, hidden_size], dtype='float32') init_cell.persistable = True init_hidden.persistable = True init_hidden_reshape = layers.reshape(init_hidden, shape=[num_layers, -1, hidden_size]) init_cell_reshape = layers.reshape(init_cell, shape=[num_layers, -1, hidden_size]) x_emb = layers.embedding( input=x, size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale))) x_emb = layers.reshape(x_emb, shape=[-1, num_steps, hidden_size], inplace=True) if dropout != None and dropout > 0.0: x_emb = layers.dropout(x_emb, dropout_prob=dropout, dropout_implementation='upscale_in_train') if rnn_model == "padding": rnn_out, last_hidden, last_cell = padding_rnn( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "static": rnn_out, last_hidden, last_cell = encoder_static( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "cudnn": x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden_reshape, init_cell_reshape, num_steps, hidden_size, num_layers, is_bidirec=False, default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \ num_layers=num_layers, batch_first=True, dropout_prob=dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \ forget_bias = 0.0) elif rnn_model == "seq2seq_api": rnn_out, last_hidden, last_cell = seq2seq_api_rnn( x_emb, len=num_steps, init_hiddens=init_hidden_reshape, init_cells=init_cell_reshape) else: print("type not support") return rnn_out = layers.reshape(rnn_out, shape=[-1, num_steps, hidden_size], inplace=True) softmax_weight = layers.create_parameter( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) softmax_bias = layers.create_parameter( [vocab_size], dtype="float32", name='softmax_bias', default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) projection = layers.matmul(rnn_out, softmax_weight) projection = layers.elementwise_add(projection, softmax_bias) projection = layers.reshape(projection, shape=[-1, vocab_size], inplace=True) loss = layers.softmax_with_cross_entropy(logits=projection, label=y, soft_label=False) loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True) loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) loss.persistable = True last_cell.persistable = True last_hidden.persistable = True # This will feed last_hidden, last_cell to init_hidden, init_cell, which # can be used directly in next batch. This can avoid the fetching of # last_hidden and last_cell and feeding of init_hidden and init_cell in # each training step. layers.assign(input=last_cell, output=init_cell) layers.assign(input=last_hidden, output=init_hidden) feeding_list = ['x', 'y', 'init_hidden', 'init_cell'] if use_dataloader: return loss, last_hidden, last_cell, feeding_list, dataloader else: return loss, last_hidden, last_cell, feeding_list
def lm_model(hidden_size, vocab_size, batch_size, num_layers=2, num_steps=20, init_scale=0.1, dropout=None, rnn_model='static', use_py_reader=False): def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) cell_array.append(pre_cell) input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): input = rnn.step_input(input_embedding) for k in range(num_layers): pre_hidden = rnn.memory(init=hidden_array[k]) pre_cell = rnn.memory(init=cell_array[k]) weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i = layers.slice( gate_input, axes=[1], starts=[0], ends=[hidden_size]) j = layers.slice( gate_input, axes=[1], starts=[hidden_size], ends=[hidden_size * 2]) f = layers.slice( gate_input, axes=[1], starts=[hidden_size * 2], ends=[hidden_size * 3]) o = layers.slice( gate_input, axes=[1], starts=[hidden_size * 3], ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) rnn.update_memory(pre_hidden, m) rnn.update_memory(pre_cell, c) rnn.step_output(m) rnn.step_output(c) input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') rnn.step_output(input) rnnout = rnn() last_hidden_array = [] last_cell_array = [] real_res = rnnout[-1] for i in range(num_layers): m = rnnout[i * 2] c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True last_h = layers.slice( m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_hidden_array.append(last_h) last_c = layers.slice( c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) return real_res, last_hidden, last_cell def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape( pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape( pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split( input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) try: from paddle.fluid.contrib.layers import fused_elemwise_activation # fluid.contrib.layers.fused_elemwise_activation can do a fused # operation, like: # 1) x + sigmoid(y); x + tanh(y) # 2) tanh(x + y) # Now the unary operation supported in this fused op is limit, and # we will extent this operation to support more unary operations and # do this kind of fusion automitically in future version of paddle.fluid. # layers.sigmoid(i) * layers.tanh(j) tmp0 = fused_elemwise_activation( x=layers.tanh(j), y=i, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) # pre_cell * layers.sigmoid(f) tmp1 = fused_elemwise_activation( x=pre_cell, y=f, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) c = tmp0 + tmp1 # layers.tanh(c) * layers.sigmoid(o) m = fused_elemwise_activation( x=layers.tanh(c), y=o, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) except ImportError: c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape( last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape( last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape( real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell batch_size_each = batch_size if use_py_reader: feed_shapes = [[batch_size_each, num_steps, 1], [batch_size_each * num_steps, 1]] py_reader = fluid.layers.py_reader( capacity=16, shapes=feed_shapes, dtypes=['int64', 'int64']) x, y = fluid.layers.read_file(py_reader) else: x = layers.data( name="x", shape=[batch_size_each, num_steps, 1], dtype='int64', append_batch_size=False) y = layers.data( name="y", shape=[batch_size_each * num_steps, 1], dtype='int64', append_batch_size=False) init_hidden = layers.data( name="init_hidden", shape=[num_layers, batch_size_each, hidden_size], dtype='float32', append_batch_size=False) init_cell = layers.data( name="init_cell", shape=[num_layers, batch_size_each, hidden_size], dtype='float32', append_batch_size=False) init_cell.persistable = True init_hidden.persistable = True init_hidden_reshape = layers.reshape( init_hidden, shape=[num_layers, -1, hidden_size]) init_cell_reshape = layers.reshape( init_cell, shape=[num_layers, -1, hidden_size]) x_emb = layers.embedding( input=x, size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) x_emb = layers.reshape( x_emb, shape=[-1, num_steps, hidden_size], inplace=True) if dropout != None and dropout > 0.0: x_emb = layers.dropout( x_emb, dropout_prob=dropout, dropout_implementation='upscale_in_train') if rnn_model == "padding": rnn_out, last_hidden, last_cell = padding_rnn( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "static": rnn_out, last_hidden, last_cell = encoder_static( x_emb, len=num_steps, init_hidden=init_hidden_reshape, init_cell=init_cell_reshape) elif rnn_model == "cudnn": x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden_reshape, init_cell_reshape, num_steps, hidden_size, num_layers, is_bidirec=False, default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \ num_layers=num_layers, batch_first=True, dropout_prob=dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \ forget_bias = 0.0) else: print("type not support") return rnn_out = layers.reshape( rnn_out, shape=[-1, num_steps, hidden_size], inplace=True) softmax_weight = layers.create_parameter( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) softmax_bias = layers.create_parameter( [vocab_size], dtype="float32", name='softmax_bias', default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) projection = layers.matmul(rnn_out, softmax_weight) projection = layers.elementwise_add(projection, softmax_bias) projection = layers.reshape( projection, shape=[-1, vocab_size], inplace=True) loss = layers.softmax_with_cross_entropy( logits=projection, label=y, soft_label=False) loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True) loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) loss.persistable = True last_cell.persistable = True last_hidden.persistable = True # This will feed last_hidden, last_cell to init_hidden, init_cell, which # can be used directly in next batch. This can avoid the fetching of # last_hidden and last_cell and feeding of init_hidden and init_cell in # each training step. layers.assign(input=last_cell, output=init_cell) layers.assign(input=last_hidden, output=init_hidden) feeding_list = ['x', 'y', 'init_hidden', 'init_cell'] if use_py_reader: return loss, last_hidden, last_cell, feeding_list, py_reader else: return loss, last_hidden, last_cell, feeding_list
batch_label.append([sample[1]]) if len(batch_img) >= batch_size: yield np.array(batch_img).astype("float32"), np.array(batch_label).astype("int64") batch_img = [] batch_label = [] if batch_img: yield np.array(batch_img).astype("float32"), np.array(batch_label).astype("int64") # define network data = fluid.layers.data(name="img", shape=[-1, 28, 28], dtype='float32') label = fluid.layers.data(name="label", shape=[-1,1], dtype='int64') sequence_length = fluid.layers.data(name="sequence_length", shape=[-1], dtype='int32') output_row, _, _ = basic_lstm(data, None, None, 128,sequence_length=sequence_length) output_col, _, _ = basic_lstm(output_row, None, None, 128,sequence_length=sequence_length) predict=fluid.layers.fc(input=output_row, size=num_classes,act="softmax") cost = fluid.layers.cross_entropy(input=predict, label=label) loss = fluid.layers.reduce_mean(cost) acc = fluid.layers.accuracy(input = predict, label = label) #set train and test program test_program = fluid.default_main_program().clone(for_test=True) #define optimizer optimizer = fluid.optimizer.RMSPropOptimizer(learning_rate=0.001,rho=0.9) optimizer.minimize(loss)