def __net_conf(word, target): x = fluid.layers.embedding( input=word, size=[config.src_vocab_size, config.embedding_dim], dtype="float32", is_sparse=True) fwd_lstm, _ = fluid.layers.dynamic_lstm( input=fluid.layers.fc(size=config.unit_num * 4, input=x), size=config.unit_num * 4, candidate_activation="tanh", gate_activation="sigmoid", cell_activation="sigmoid", bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), is_reverse=False) bwd_lstm, _ = fluid.layers.dynamic_lstm( input=fluid.layers.fc(size=config.unit_num * 4, input=x), size=config.unit_num * 4, candidate_activation="tanh", gate_activation="sigmoid", cell_activation="sigmoid", bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), is_reverse=True) outputs = fluid.layers.concat([fwd_lstm, bwd_lstm], axis=1) emission = fluid.layers.fc(size=config.tag_num, input=outputs) crf_cost = fluid.layers.linear_chain_crf( input=emission, label=target, param_attr=fluid.ParamAttr(name="crfw", )) avg_cost = fluid.layers.mean(x=crf_cost) return avg_cost, emission
def _initialize_alphas(self): k = sum(1 for i in range(self._steps) for n in range(2 + i)) num_ops = len(self._primitives) self.alphas_normal = fluid.layers.create_parameter( shape=[k, num_ops], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) self.alphas_reduce = fluid.layers.create_parameter( shape=[k, num_ops], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) self._arch_parameters = [ self.alphas_normal, self.alphas_reduce, ] if self._method == "PC-DARTS": self.betas_normal = fluid.layers.create_parameter( shape=[k], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) self.betas_reduce = fluid.layers.create_parameter( shape=[k], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) self._arch_parameters += [self.betas_normal, self.betas_reduce]
def mixed_op(x, c_out, stride, index, reduction, name): param_attr = ParamAttr( name="arch/weight{}_{}".format(2 if reduction else 1, index)) weight = fluid.layers.create_parameter( shape=[len(PRIMITIVES)], dtype="float32", attr=param_attr, default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) weight = fluid.layers.softmax(weight) ops = [] index = 0 for primitive in PRIMITIVES: op = OPS[primitive](x, c_out, stride, False, name) if 'pool' in primitive: gama = ParamAttr(name=name + '_' + primitive + "_mixed_bn_gama", initializer=fluid.initializer.Constant(value=1), trainable=False) beta = ParamAttr(name=name + '_' + primitive + "_mixed_bn_beta", initializer=fluid.initializer.Constant(value=0), trainable=False) op = fluid.layers.batch_norm( op, param_attr=gama, bias_attr=beta, moving_mean_name=name + '_' + primitive + "_mixed_bn_mean", moving_variance_name=name + '_' + primitive + "_mixed_bn_variance") ops.append(fluid.layers.elementwise_mul(op, weight[index])) index += 1 out = fluid.layers.sums(ops) return out
def __rnn(self, input): for i in range(self.num_layers): hidden = fluid.layers.fc( size=self.hidden_dim * 4, bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), input=hidden if i else input) return fluid.layers.dynamic_lstm( input=hidden, size=self.hidden_dim * 4, candidate_activation="tanh", gate_activation="sigmoid", cell_activation="sigmoid", bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), is_reverse=False)
def vgg16_bn_drop(input,class_dim): # 定义卷积块 def conv_block(input, num_filter, groups, dropouts): return fluid.nets.img_conv_group( input=input, pool_size=2, pool_stride=2, conv_num_filter=[num_filter] * groups, conv_filter_size=3, conv_act='relu', conv_with_batchnorm=True, conv_batchnorm_drop_rate=dropouts, pool_type='max') # 定义一个VGG16的卷积组 conv1 = conv_block(input, 64, 2, [0.3, 0]) conv2 = conv_block(conv1, 128, 2, [0.4, 0]) conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) # 定义第一个drop层 drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) # 定义第一层全连接层 fc1 = fluid.layers.fc(input=drop, size=512, act=None) # 定义BN层 bn = fluid.layers.batch_norm(input=fc1, act='relu') # 定义第二层全连接层 drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) # 定义第二层全连接层 fc2 = fluid.layers.fc(input=drop2, size=512, act=None) # 获取全连接输出,获得分类器 predict = fluid.layers.fc( input=fc2, size=class_dim, act='softmax', param_attr=ParamAttr(name="param1", initializer=NormalInitializer())) return predict
def __init__(self, n_layer, hidden_size=768, name="encoder", search_layer=True, use_fixed_gumbel=False, gumbel_alphas=None): super(EncoderLayer, self).__init__() self._n_layer = n_layer self._hidden_size = hidden_size self._n_channel = 256 self._steps = 3 self._n_ops = len(ConvBN_PRIMITIVES) self.use_fixed_gumbel = use_fixed_gumbel self.stem = fluid.dygraph.Sequential( Conv2D(num_channels=1, num_filters=self._n_channel, filter_size=[3, self._hidden_size], padding=[1, 0], param_attr=fluid.ParamAttr(initializer=MSRA()), bias_attr=False), BatchNorm(num_channels=self._n_channel, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0)))) cells = [] for i in range(n_layer): cell = Cell(steps=self._steps, n_channel=self._n_channel, name="%s/layer_%d" % (name, i)) cells.append(cell) self._cells = fluid.dygraph.LayerList(cells) k = sum(1 for i in range(self._steps) for n in range(2 + i)) num_ops = self._n_ops self.alphas = fluid.layers.create_parameter( shape=[k, num_ops], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) # self.k = fluid.layers.create_parameter( # shape=[1, self._n_layer], # dtype="float32", # default_initializer=NormalInitializer( # loc=0.0, scale=1e-3)) self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) self.bns = [] self.outs = [] for i in range(self._n_layer): bn = BatchNorm(num_channels=self._n_channel, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1), trainable=False), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0), trainable=False)) self.bns.append(bn) out = Linear(self._n_channel, 3, param_attr=ParamAttr(initializer=MSRA()), bias_attr=ParamAttr(initializer=MSRA())) self.outs.append(out) self.use_fixed_gumbel = use_fixed_gumbel self.gumbel_alphas = gumbel_softmax(self.alphas) if gumbel_alphas is not None: self.gumbel_alphas = np.array(gumbel_alphas).reshape( self.alphas.shape) else: self.gumbel_alphas = gumbel_softmax(self.alphas) self.gumbel_alphas.stop_gradient = True print("gumbel_alphas: {}".format(self.gumbel_alphas))
def _net_conf(word, mark, target): word_embedding = fluid.layers.embedding(input=word, size=[word_dict_len, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name=embedding_name, trainable=False)) mark_embedding = fluid.layers.embedding(input=mark, size=[mark_dict_len, mark_dim], dtype='float32', is_sparse=IS_SPARSE) word_caps_vector = fluid.layers.concat( input=[word_embedding, mark_embedding], axis=1) mix_hidden_lr = 1 rnn_para_attr = fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=0.0), learning_rate=mix_hidden_lr) hidden_para_attr = fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)), learning_rate=mix_hidden_lr) hidden = fluid.layers.fc( input=word_caps_vector, name="__hidden00__", size=hidden_dim, act="tanh", bias_attr=fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3))), param_attr=fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)))) fea = [] for direction in ["fwd", "bwd"]: for i in range(stack_num): if i != 0: hidden = fluid.layers.fc( name="__hidden%02d_%s__" % (i, direction), size=hidden_dim, act="stanh", bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), input=[hidden, rnn[0], rnn[1]], param_attr=[ hidden_para_attr, rnn_para_attr, rnn_para_attr ]) rnn = fluid.layers.dynamic_lstm( name="__rnn%02d_%s__" % (i, direction), input=hidden, size=hidden_dim, candidate_activation='relu', gate_activation='sigmoid', cell_activation='sigmoid', bias_attr=fluid.ParamAttr( initializer=NormalInitializer(loc=0.0, scale=1.0)), is_reverse=(i % 2) if direction == "fwd" else not i % 2, param_attr=rnn_para_attr) fea += [hidden, rnn[0], rnn[1]] rnn_fea = fluid.layers.fc( size=hidden_dim, bias_attr=fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3))), act="stanh", input=fea, param_attr=[hidden_para_attr, rnn_para_attr, rnn_para_attr] * 2) emission = fluid.layers.fc( size=label_dict_len, input=rnn_fea, param_attr=fluid.ParamAttr(initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)))) crf_cost = fluid.layers.linear_chain_crf( input=emission, label=target, param_attr=fluid.ParamAttr(name='crfw', initializer=NormalInitializer( loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)), learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(x=crf_cost) return avg_cost, emission
def train(use_cuda, learning_rate, num_passes, BATCH_SIZE=128): class_dim = 2 image_shape = [3, 48, 48] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net, conv1 = simplenet(image) # 获取全连接输出,获得分类器 predict = fluid.layers.fc(input=net, size=class_dim, act='softmax', param_attr=ParamAttr( name="param1", initializer=NormalInitializer())) # 获取损失函数 cost = fluid.layers.cross_entropy(input=predict, label=label) # 定义平均损失函数 avg_cost = fluid.layers.mean(x=cost) # 计算batch,从而来求平均的准确率 batch_size = fluid.layers.create_tensor(dtype='int64') print "batchsize=", batch_size batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) # 定义优化方法 optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(5 * 1e-5)) opts = optimizer.minimize(avg_cost) # 是否使用GPU place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() # 创建调试器 exe = fluid.Executor(place) # 初始化调试器 exe.run(fluid.default_startup_program()) # 保存结果 model_save_dir = "./models" # 获取训练数据 resizesize = 60 cropsize = 48 mydata = Dataset(cropsize=cropsize, resizesize=resizesize) mydatareader = mydata.train_reader(train_list='./all_shuffle_train.txt') train_reader = paddle.batch(reader=paddle.reader.shuffle( reader=mydatareader, buf_size=50000), batch_size=128) # 指定数据和label的对应关系 feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) step = 0 sample_num = 0 start_up_program = framework.default_startup_program() param1_var = start_up_program.global_block().var("param1") accuracy = fluid.average.WeightedAverage() # 开始训练,使用循环的方式来指定训多少个Pass for pass_id in range(num_passes): # 从训练数据中按照一个个batch来读取数据 accuracy.reset() for batch_id, data in enumerate(train_reader()): loss, conv1_out, param1, acc, weight = exe.run( fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[ avg_cost, conv1, param1_var, batch_acc, batch_size ]) accuracy.add(value=acc, weight=weight) pass_acc = accuracy.eval() # 重新启动图形化展示组件 if sample_num == 0: input_image.start_sampling() conv_image.start_sampling() # 获取taken idx1 = input_image.is_sample_taken() idx2 = conv_image.is_sample_taken() # 保证它们的taken是一样的 assert idx1 == idx2 idx = idx1 if idx != -1: # 加载输入图像的数据数据 image_data = data[0][0] input_image_data = np.transpose( image_data.reshape(image_shape), axes=[1, 2, 0]) input_image.set_sample(idx, input_image_data.shape, input_image_data.flatten()) # 加载卷积数据 conv_image_data = conv1_out[0][0] conv_image.set_sample(idx, conv_image_data.shape, conv_image_data.flatten()) # 完成输出一次 sample_num += 1 if sample_num % num_samples == 0: input_image.finish_sampling() conv_image.finish_sampling() sample_num = 0 # 加载趋势图的数据 loss_scalar.add_record(step, loss) acc_scalar.add_record(step, acc) # 添加模型结构数据 param1_histgram.add_record(step, param1.flatten()) # 输出训练日志 print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(pass_acc)) step += 1 model_path = os.path.join(model_save_dir, str(pass_id)) if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) fluid.io.save_inference_model(model_path, ['image'], [predict], exe)
def __init__(self, num_labels, n_layer, hidden_size=768, name="encoder", search_layer=True, use_fixed_gumbel=False, gumbel_alphas=None): super(EncoderLayer, self).__init__() self._n_layer = n_layer self._hidden_size = hidden_size self._n_channel = 128 self._steps = 3 self._n_ops = len(ConvBN_PRIMITIVES) self.use_fixed_gumbel = use_fixed_gumbel self.stem0 = fluid.dygraph.Sequential( Conv2D(num_channels=1, num_filters=self._n_channel, filter_size=[3, self._hidden_size], padding=[1, 0], param_attr=fluid.ParamAttr(initializer=MSRA()), bias_attr=False), BatchNorm(num_channels=self._n_channel, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0)))) self.stem1 = fluid.dygraph.Sequential( Conv2D(num_channels=1, num_filters=self._n_channel, filter_size=[3, self._hidden_size], padding=[1, 0], param_attr=fluid.ParamAttr(initializer=MSRA()), bias_attr=False), BatchNorm(num_channels=self._n_channel, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0)))) cells = [] for i in range(n_layer): cell = Cell(steps=self._steps, n_channel=self._n_channel, name="%s/layer_%d" % (name, i)) cells.append(cell) self._cells = fluid.dygraph.LayerList(cells) k = sum(1 for i in range(self._steps) for n in range(2 + i)) num_ops = self._n_ops self.alphas = fluid.layers.create_parameter( shape=[k, num_ops], dtype="float32", default_initializer=NormalInitializer(loc=0.0, scale=1e-3)) self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) self.bns = [] self.outs = [] for i in range(self._n_layer): bn = BatchNorm(num_channels=self._n_channel, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1), trainable=False), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0), trainable=False)) out = Linear(self._n_channel, num_labels, param_attr=ParamAttr(initializer=MSRA()), bias_attr=ParamAttr(initializer=MSRA())) self.bns.append(bn) self.outs.append(out) self._bns = fluid.dygraph.LayerList(self.bns) self._outs = fluid.dygraph.LayerList(self.outs) self.use_fixed_gumbel = use_fixed_gumbel #self.gumbel_alphas = gumbel_softmax(self.alphas, 0).detach() mrpc_arch = [ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], # std_conv7 0 # node 0 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # dil_conv5 1 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], # std_conv7 0 # node 1 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # dil_conv5 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], # zero 2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], # zero 0 # node2 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # std_conv3 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], # zero 2 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] # dil_conv3 3 ] self.gumbel_alphas = to_variable( np.array(mrpc_arch).astype(np.float32)) self.gumbel_alphas.stop_gradient = True print("gumbel_alphas: \n", self.gumbel_alphas.numpy())
def train(use_cuda, learning_rate, num_passes, BATCH_SIZE=128): # 定义图像的类别数量 class_dim = 10 # 定义图像的通道数和大小 image_shape = [3, 32, 32] # 定义输入数据大小,指定图像的形状,数据类型是浮点型 image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # 定义标签,类型是整型 label = fluid.layers.data(name='label', shape=[1], dtype='int64') # 获取神经网络 net, conv1 = vgg16_bn_drop(image) # 获取全连接输出,获得分类器 predict = fluid.layers.fc( input=net, size=class_dim, act='softmax', param_attr=ParamAttr(name="param1", initializer=NormalInitializer())) # 获取损失函数 cost = fluid.layers.cross_entropy(input=predict, label=label) # 定义平均损失函数 avg_cost = fluid.layers.mean(x=cost) # 每个batch计算的时候能取到当前batch里面样本的个数,从而来求平均的准确率 batch_size = fluid.layers.create_tensor(dtype='int64') print batch_size batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) # 定义优化方法 optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(5 * 1e-5)) opts = optimizer.minimize(avg_cost) # 是否使用GPU place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() # 创建调试器 exe = fluid.Executor(place) # 初始化调试器 exe.run(fluid.default_startup_program()) # 获取训练数据 train_reader = paddle.batch( paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE) # 指定数据和label的对于关系 feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) step = 0 sample_num = 0 start_up_program = framework.default_startup_program() param1_var = start_up_program.global_block().var("param1") accuracy = fluid.average.WeightedAverage() # 开始训练,使用循环的方式来指定训多少个Pass for pass_id in range(num_passes): # 从训练数据中按照一个个batch来读取数据 accuracy.reset() for batch_id, data in enumerate(train_reader()): loss, conv1_out, param1, acc, weight = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, conv1, param1_var, batch_acc, batch_size]) accuracy.add(value=acc, weight=weight) pass_acc = accuracy.eval() # 重新启动图形化展示组件 if sample_num == 0: input_image.start_sampling() conv_image.start_sampling() # 获取taken idx1 = input_image.is_sample_taken() idx2 = conv_image.is_sample_taken() # 保证它们的taken是一样的 assert idx1 == idx2 idx = idx1 if idx != -1: # 加载输入图像的数据数据 image_data = data[0][0] input_image_data = np.transpose( image_data.reshape(image_shape), axes=[1, 2, 0]) input_image.set_sample(idx, input_image_data.shape, input_image_data.flatten()) # 加载卷积数据 conv_image_data = conv1_out[0][0] conv_image.set_sample(idx, conv_image_data.shape, conv_image_data.flatten()) # 完成输出一次 sample_num += 1 if sample_num % num_samples == 0: input_image.finish_sampling() conv_image.finish_sampling() sample_num = 0 # 加载趋势图的数据 loss_scalar.add_record(step, loss) acc_scalar.add_record(step, acc) # 添加模型结构数据 param1_histgram.add_record(step, param1.flatten()) # 输出训练日志 print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(pass_acc)) step += 1