def test_cost_layer(self): cost1 = layer.classification_cost(input=inference, label=label) cost2 = layer.classification_cost( input=inference, label=label, weight=weight) cost3 = layer.cross_entropy_cost(input=inference, label=label) cost4 = layer.cross_entropy_with_selfnorm_cost( input=inference, label=label) cost5 = layer.square_error_cost(input=inference, label=label) cost6 = layer.square_error_cost( input=inference, label=label, weight=weight) cost7 = layer.multi_binary_label_cross_entropy_cost( input=inference, label=label) cost8 = layer.rank_cost(left=score, right=score, label=score) cost9 = layer.lambda_cost(input=inference, score=score) cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_regression_cost(input=score, label=label) cost12 = layer.huber_classification_cost(input=score, label=label) print layer.parse_network([cost1, cost2]) print layer.parse_network([cost3, cost4]) print layer.parse_network([cost5, cost6]) print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12]) crf = layer.crf(input=inference, label=label) crf_decoding = layer.crf_decoding(input=inference, size=3) ctc = layer.ctc(input=inference, label=label) warp_ctc = layer.warp_ctc(input=pixel, label=label) nce = layer.nce(input=inference, label=label, num_classes=3) hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) print layer.parse_network( [crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid])
def test_cost_layer(self): cost1 = layer.classification_cost(input=inference, label=label) cost2 = layer.classification_cost(input=inference, label=label, weight=weight) cost3 = layer.cross_entropy_cost(input=inference, label=label) cost4 = layer.cross_entropy_with_selfnorm_cost(input=inference, label=label) cost5 = layer.mse_cost(input=inference, label=label) cost6 = layer.mse_cost(input=inference, label=label, weight=weight) cost7 = layer.multi_binary_label_cross_entropy_cost(input=inference, label=label) cost8 = layer.rank_cost(left=score, right=score, label=score) cost9 = layer.lambda_cost(input=inference, score=score) cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_cost(input=score, label=label) print layer.parse_network(cost1, cost2) print layer.parse_network(cost3, cost4) print layer.parse_network(cost5, cost6) print layer.parse_network(cost7, cost8, cost9, cost10, cost11) crf = layer.crf(input=inference, label=label) crf_decoding = layer.crf_decoding(input=inference, size=3) ctc = layer.ctc(input=inference, label=label) warp_ctc = layer.warp_ctc(input=pixel, label=label) nce = layer.nce(input=inference, label=label, num_classes=3) hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid)
def test_op(self): x = layer.data(name='data', type=data_type.dense_vector(128)) x = op.exp(x) x = op.sqrt(x) x = op.reciprocal(x) x = op.log(x) x = op.abs(x) x = op.sigmoid(x) x = op.tanh(x) x = op.square(x) x = op.relu(x) y = 1 + x y = y + 1 y = x + y y = y - x y = y - 2 y = 2 - y y = 2 * y y = y * 3 z = layer.data(name='data_2', type=data_type.dense_vector(1)) y = y * z y = z * y y = y + z y = z + y print layer.parse_network(y)
def test_aggregate_layer(self): pool = layer.pooling(input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.EACH_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
def test_aggregate_layer(self): pool = layer.pooling( input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.TO_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network( [pool, last_seq, first_seq, concat, seq_concat])
def test_reshape_layer(self): block_expand = layer.block_expand( input=conv, num_channels=4, stride_x=1, block_x=1) expand = layer.expand( input=weight, expand_as=pixel, expand_level=layer.ExpandLevel.FROM_TIMESTEP) repeat = layer.repeat(input=pixel, num_repeats=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4) rotate = layer.rotate(input=pixel, height=16, width=49) print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
def test_evaluator(self): img = layer.data(name='pixel2', type=data_type.dense_vector(784)) output = layer.fc(input=img, size=10, act=activation.Softmax(), name='fc_here') lbl = layer.data(name='label2', type=data_type.integer_value(10)) cost = layer.cross_entropy_cost(input=output, label=lbl) evaluator.classification_error(input=output, label=lbl) print layer.parse_network(cost) print layer.parse_network(output)
def test_reshape_layer(self): block_expand = layer.block_expand( input=conv, num_channels=4, stride_x=1, block_x=1) expand = layer.expand( input=weight, expand_as=pixel, expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE) repeat = layer.repeat(input=pixel, num_repeats=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4) rotate = layer.rotate(input=pixel, height=16, width=49) print layer.parse_network( [block_expand, expand, repeat, reshape, rotate])
def test_pooling_layer(self): maxpool = layer.img_pool(input=conv, pool_size=2, num_channels=16, padding=1, pool_type=pooling.Max()) spp = layer.spp(input=conv, pyramid_height=2, num_channels=16, pool_type=pooling.Max()) maxout = layer.maxout(input=conv, num_channels=16, groups=4) print layer.parse_network(maxpool, spp, maxout)
def test_pooling_layer(self): maxpool = layer.img_pool( input=conv, pool_size=2, num_channels=16, padding=1, pool_type=pooling.Max()) spp = layer.spp(input=conv, pyramid_height=2, num_channels=16, pool_type=pooling.Max()) maxout = layer.maxout(input=conv, num_channels=16, groups=4) print layer.parse_network([maxpool, spp, maxout])
def test_math_layer(self): addto = layer.addto(input=[pixel, pixel]) linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) interpolation = layer.interpolation( input=[hidden, hidden], weight=score) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) power = layer.power(input=pixel, weight=score) scaling = layer.scaling(input=pixel, weight=score) slope = layer.slope_intercept(input=pixel) tensor = layer.tensor(a=pixel, b=pixel, size=1000) cos_sim = layer.cos_sim(a=pixel, b=pixel) trans = layer.trans(input=tensor) print layer.parse_network(addto, linear_comb, interpolation, power, scaling, slope, tensor, cos_sim, trans)
def test_operator(self): ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) dotmul0 = layer.mixed(input=dotmul_op) with layer.mixed() as dotmul1: dotmul1 += dotmul_op conv = layer.conv_operator(img=ipt0, filter=ipt1, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv) with layer.mixed() as conv1: conv1 += conv print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_operator(self): ipt0 = layer.data(name='data1', type=data_type.dense_vector(784)) ipt1 = layer.data(name='word1', type=data_type.dense_vector(128)) fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) dotmul0 = layer.mixed(input=dotmul_op) with layer.mixed() as dotmul1: dotmul1 += dotmul_op conv = layer.conv_operator( img=ipt0, filter=ipt1, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv) with layer.mixed() as conv1: conv1 += conv print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def parse_new_rnn(): data = layer.data(name="word", type=data_type.dense_vector(dict_dim)) label = layer.data(name="label", type=data_type.dense_vector(label_dim)) emb = layer.embedding(input=data, size=word_dim) boot_layer = layer.data(name="boot", type=data_type.dense_vector(10)) boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) def step(y, wid): z = layer.embedding(input=wid, size=word_dim) mem = layer.memory(name="rnn_state", size=hidden_dim, boot_layer=boot_layer) out = layer.fc(input=[y, z, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") return out out = layer.recurrent_group(name="rnn", step=step, input=[emb, data]) rep = layer.last_seq(input=out) prob = layer.fc(size=label_dim, input=rep, act=activation.Softmax(), bias_attr=True) cost = layer.classification_cost(input=prob, label=label) return str(layer.parse_network(cost))
def dump_v2_config(cls, topology, save_path, binary=False): import collections from paddle.trainer_config_helpers.layers import LayerOutput from paddle.v2.layer import parse_network from paddle.proto import TrainerConfig_pb2 """ Dump the network topology to a specified file. This function is only used to dump network defined by using PaddlePaddle V2 API. :param topology: The output layers in the entire network. :type topology: LayerOutput|List|Tuple :param save_path: The path to save the dump network topology. :type save_path: str :param binary: Whether to dump the serialized network topology. The default value is false. :type binary: bool. """ if isinstance(topology, LayerOutput): topology = [topology] elif isinstance(topology, collections.Sequence): for out_layer in topology: assert isinstance(out_layer, LayerOutput), ( "The type of each element in the parameter topology " "should be LayerOutput.") else: raise RuntimeError("Error input type for parameter topology.") model_str = parse_network(topology) with open(save_path, "w") as fout: if binary: fout.write(model_str.SerializeToString()) else: fout.write(str(model_str))
def parse_new_rnn(): reset_parser() data = layer.data( name="word", type=data_type.dense_vector(dict_dim)) label = layer.data( name="label", type=data_type.dense_vector(label_dim)) emb = layer.embedding(input=data, size=word_dim) boot_layer = layer.data( name="boot", type=data_type.dense_vector(10)) boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) def step(y, wid): z = layer.embedding(input=wid, size=word_dim) mem = layer.memory( name="rnn_state", size=hidden_dim, boot_layer=boot_layer) out = layer.fc(input=[y, z, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") return out out = layer.recurrent_group( name="rnn", step=step, input=[emb, data]) rep = layer.last_seq(input=out) prob = layer.fc(size=label_dim, input=rep, act=activation.Softmax(), bias_attr=True) cost = layer.classification_cost(input=prob, label=label) return str(layer.parse_network(cost))
def test_math_layer(self): addto = layer.addto(input=[pixel, pixel]) linear_comb = layer.linear_comb( weights=combine_weight, vectors=hidden, size=10) interpolation = layer.interpolation( input=[hidden, hidden], weight=score) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) power = layer.power(input=pixel, weight=score) scaling = layer.scaling(input=pixel, weight=score) slope = layer.slope_intercept(input=pixel) tensor = layer.tensor(a=pixel, b=pixel, size=1000) cos_sim = layer.cos_sim(a=pixel, b=pixel) trans = layer.trans(input=tensor) print layer.parse_network([ addto, linear_comb, interpolation, power, scaling, slope, tensor, cos_sim, trans ])
def get_spec(self, model): # credit to https://github.com/lcy-seso/paddle_example/blob/master/seq_slice_demo/test_seq_slice.py#L55 # Paddle Official: https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/layer.py#L263-L322 # Pb Definition: https://github.com/PaddlePaddle/Paddle/blob/d02a68c4472d3b85559f82c026896bf2cf563b07/proto/ModelConfig.proto from paddle.v2.layer import parse_network self.spec_dict = dict() net_pb = parse_network(model) for l in net_pb.layers: self.spec_dict[l.name] = l
def parse_new_rnn(): def new_step(y): mem = layer.memory(name="rnn_state", size=hidden_dim) out = layer.fc(input=[y, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") return out data = layer.data( name="word", type=data_type.integer_value(dict_dim)) embd = layer.embedding(input=data, size=word_dim) rnn_layer = layer.recurrent_group( name="rnn", step=new_step, input=embd) return str(layer.parse_network(rnn_layer))
def parse_new_rnn(): def new_step(y): mem = layer.memory(name="rnn_state", size=hidden_dim) out = layer.fc(input=[y, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") return out data = layer.data(name="word", type=data_type.integer_value(dict_dim)) embd = layer.embedding(input=data, size=word_dim) rnn_layer = layer.recurrent_group(name="rnn", step=new_step, input=embd) return str(layer.parse_network(rnn_layer))
def dump_v2_config(topology, save_path, binary=False): """ Dump the network topology to a specified file. This function is only used to dump network defined by using PaddlePaddle V2 APIs. This function will NOT dump configurations related to PaddlePaddle optimizer. :param topology: The output layers (can be more than one layers given in a Python List or Tuple) of the entire network. Using the specified layers (if more than one layer is given) as root, traversing back to the data layer(s), all the layers connected to the specified output layers will be dumped. Layers not connceted to the specified will not be dumped. :type topology: LayerOutput|List|Tuple :param save_path: The path to save the dumped network topology. :type save_path: str :param binary: Whether to dump the serialized network topology or not. The default value is false. NOTE that, if you call this function to generate network topology for PaddlePaddle C-API, a serialized version of network topology is required. When using PaddlePaddle C-API, this flag MUST be set to True. :type binary: bool """ if isinstance(topology, LayerOutput): topology = [topology] elif isinstance(topology, collections.Sequence): for out_layer in topology: assert isinstance(out_layer, LayerOutput), ( "The type of each element in the parameter topology " "should be LayerOutput.") else: raise RuntimeError("Error input type for parameter topology.") model_str = parse_network(topology) with open(save_path, "w") as fout: if binary: fout.write(model_str.SerializeToString()) else: fout.write(str(model_str))
same_as_question = binary_input("SameAsQuestion") correct_sentence = binary_output("CorrectSentence") correct_start_word = binary_output("CorrectStartWord") correct_end_word = binary_output("CorrectEndWord") # here the question vector is not a sequence question_vector = build_question_vector(config, questions) document_embeddings = build_document_embeddings( config, documents, same_as_question, question_vector) sentence_pred = pick_word(config, document_embeddings) start_word_pred = pick_word(config, document_embeddings) end_word_pred = pick_word(config, document_embeddings) if is_infer: return [sentence_pred, start_word_pred, end_word_pred] else: return [ build_classification_loss(sentence_pred, correct_sentence), build_classification_loss(start_word_pred, correct_start_word), build_classification_loss(end_word_pred, correct_end_word) ] if __name__ == "__main__": from paddle_train import load_config conf = load_config("paddle-config.json") losses = build_model(conf) print(parse_network(losses))
def test_projection(self): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data(name='word', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed(size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=100, input=context) with layer.mixed(size=100) as context1: context1 += context conv = layer.conv_projection(input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_slicing_joining_layer(self): pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) print layer.parse_network(pad)
def test_sampling_layer(self): maxid = layer.max_id(input=inference) sampling_id = layer.sampling_id(input=inference) eos = layer.eos(input=maxid, eos_id=5) print layer.parse_network(maxid, sampling_id, eos)
def test_conv_layer(self): conv_shift = layer.conv_shift(a=pixel, b=score) print layer.parse_network(conv, conv_shift)
def test_recurrent_layer(self): word = layer.data(name='word', type=data_type.integer_value(12)) recurrent = layer.recurrent(input=word) lstm = layer.lstmemory(input=word) gru = layer.grumemory(input=word) print layer.parse_network(recurrent, lstm, gru)
def test_projection(self): input = layer.data(name='data2', type=data_type.dense_vector(784)) word = layer.data( name='word2', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=500, input=context) with layer.mixed(size=500) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_vgg(self): img = layer.data(name='pixel', type=data_type.dense_vector(784)) vgg_out = networks.small_vgg(input_image=img, num_channels=1, num_classes=2) print layer.parse_network(vgg_out)
def test_recurrent_layer(self): word = layer.data(name='word', type=data_type.integer_value(12)) recurrent = layer.recurrent(input=word) lstm = layer.lstmemory(input=word) gru = layer.grumemory(input=word) print layer.parse_network([recurrent, lstm, gru])
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1)) if is_train == True: return paddle.layer.hsigmoid( input=hidden_layer, label=target_word, num_classes=dict_size, param_attr=paddle.attr.Param(name="sigmoid_w"), bias_attr=paddle.attr.Param(name="sigmoid_b")) else: return paddle.layer.mixed( size=dict_size - 1, input=paddle.layer.trans_full_matrix_projection( hidden_layer, param_attr=paddle.attr.Param(name="sigmoid_w")), act=paddle.activation.Sigmoid(), bias_attr=paddle.attr.Param(name="sigmoid_b")) if __name__ == "__main__": # this is to test and debug the network topology defination. # please set the hyper-parameters as needed. print( parse_network( ngram_lm(hidden_size=512, embed_size=512, dict_size=1024, gram_num=4, is_train=False)))
def test_sampling_layer(self): maxid = layer.max_id(input=inference) sampling_id = layer.sampling_id(input=inference) eos = layer.eos(input=maxid, eos_id=5) layer.printer(maxid) print layer.parse_network([maxid, sampling_id, eos])
# encode document words document_embeddings = build_pretrained_embedding( "documents", paddle.data_type.integer_value_sub_sequence(config.vocab_size), config.embedding_dim, config.embedding_droprate) same_as_question = paddle.layer.data( name="same_as_question", type=paddle.data_type.dense_vector_sub_sequence(1)) document_words_ecoding = encode_documents( document_embeddings, same_as_question, question_vector, question_lstm_outs, config.passage_indep_embedding_dim, "__doc") doc_lstm_outs = basic_modules.stacked_bidirectional_lstm_by_nested_seq( document_words_ecoding, config.lstm_depth, config.lstm_hidden_dim, "__doc_lstm") # search the answer. sentence_idx = paddle.layer.data(name="sen_idx", type=paddle.data_type.integer_value(1)) start_idx = paddle.layer.data(name="start_idx", type=paddle.data_type.integer_value(1)) end_idx = paddle.layer.data(name="end_idx", type=paddle.data_type.integer_value(1)) return search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, is_infer) if __name__ == "__main__": print(parse_network(GNR(ModelConfig)))
def test_norm_layer(self): norm1 = layer.img_cmrnorm(input=conv, size=5) norm2 = layer.batch_norm(input=conv) norm3 = layer.sum_to_one_norm(input=conv) print layer.parse_network(norm1, norm2, norm3)
def test_vgg(self): img = layer.data(name='pixel1', type=data_type.dense_vector(784)) vgg_out = networks.small_vgg( input_image=img, num_channels=1, num_classes=2) print layer.parse_network(vgg_out)
def test_norm_layer(self): norm1 = layer.img_cmrnorm(input=conv, size=5) norm2 = layer.batch_norm(input=conv) norm3 = layer.sum_to_one_norm(input=conv) print layer.parse_network([norm1, norm2, norm3])
act=paddle.activation.Tanh(), param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(emb_size * 8))) if is_train: return paddle.layer.nce(input=hidden_layer, label=next_word, num_classes=dict_size, param_attr=paddle.attr.Param(name="nce_w"), bias_attr=paddle.attr.Param(name="nce_b"), num_neg_samples=25, neg_distribution=None) else: return paddle.layer.mixed( size=dict_size, input=paddle.layer.trans_full_matrix_projection( hidden_layer, param_attr=paddle.attr.Param(name="nce_w")), act=paddle.activation.Softmax(), bias_attr=paddle.attr.Param(name="nce_b")) if __name__ == "__main__": # this is to test and debug the network topology defination. # please set the hyper-parameters as needed. print( parse_network( ngram_lm(hidden_size=256, emb_size=256, dict_size=1024, gram_num=4, is_train=True)))