def save_model(self, save_path): """保存模型 """ start_time = time.time() if self.parallelized and D.parallel.Env().local_rank == 0: F.save_dygraph(self.model.state_dict(), save_path) logging.info("cost time: %.4fs" % (time.time() - start_time))
def run(): with fluid.dygraph.guard(): model = MultilayerPerception() model.train() # 使用SGD优化器,learning_rate设置为0.01 optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters()) # 训练5轮 train_loader = paddle.batch(paddle.dataset.mnist.train(), batch_size=16) EPOCH_NUM = 5 for epoch_id in range(EPOCH_NUM): for batch_id, data in enumerate(train_loader()): # 准备数据 image_data = np.array([x[0] for x in data]).astype('float32') label_data = np.array([x[1] for x in data]).astype('float32').reshape(-1, 1) image = fluid.dygraph.to_variable(image_data) label = fluid.dygraph.to_variable(label_data) # 前向计算的过程 predict = model(image) # 计算损失,取一个批次样本损失的平均值 loss = fluid.layers.square_error_cost(predict, label) avg_loss = fluid.layers.mean(loss) # 每训练了200批次的数据,打印下当前Loss的情况 if batch_id % 200 == 0: print("epoch: {}, batch: {}, loss is: {}".format(epoch_id, batch_id, avg_loss.numpy())) # 后向传播,更新参数的过程 avg_loss.backward() optimizer.minimize(avg_loss) model.clear_gradients() # 保存模型参数 fluid.save_dygraph(model.state_dict(), 'mnist')
def torch2paddle(torch_para, paddle_model, paddle_para_name=None): torch_state_dict = torch.load(torch_para)['state_dict'] paddel_state_dict = paddle_model.state_dict() #去掉bn中多余的参数 tmp = [] for key in torch_state_dict.keys(): if ('_tracked' in key): tmp.append(key) for i in range(len(tmp)): torch_state_dict.pop(tmp[i]) assert (len(torch_state_dict) == len(paddel_state_dict)) new_weight = collections.OrderedDict() for torch_key, paddle_key in zip(torch_state_dict.keys(), paddel_state_dict.keys()): tmp = torch_state_dict[torch_key].detach().numpy() if 'fc' in torch_key: new_weight[paddle_key] = tmp.T else: new_weight[paddle_key] = tmp paddle_model.set_dict(new_weight) if paddle_para_name == None: name = torch_para[0:-4] fluid.save_dygraph(paddle_model.state_dict(), name) else: fluid.save_dygraph(paddle_model.state_dict(), paddle_para_name)
def save_network(self, epoch): for name in self.model_names: if isinstance(name, str): save_filename = '%s_net_%s' % (epoch, name) save_path = os.path.join(self.args.save_dir, save_filename) net = getattr(self, 'net' + name) fluid.save_dygraph(net.state_dict(), save_path)
def torch2paddle(torch_para, paddle_model, paddle_para_name=None): torch_state_dict = torch.load(torch_para) #加载模型参数 paddel_state_dict = paddle_model.state_dict() #去掉bn中多余的参数 tmp = [] for key in torch_state_dict.keys(): if ('_tracked' in key): tmp.append(key) for i in range(len(tmp)): torch_state_dict.pop(tmp[i]) #删除给定键对应的值 assert (len(torch_state_dict) == len(paddel_state_dict) ) #将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 new_weight = collections.OrderedDict() #根据放入元素的先后顺序进行排序,所以输出的值是排好序的 for torch_key, paddle_key in zip(torch_state_dict.keys(), paddel_state_dict.keys()): tmp = torch_state_dict[torch_key].detach().numpy() #detach()不参与参数更新 if 'fc' in torch_key: new_weight[paddle_key] = tmp.T else: new_weight[paddle_key] = tmp paddle_model.set_dict(new_weight) if paddle_para_name == None: name = torch_para[0:-4] fluid.save_dygraph(paddle_model.state_dict(), name) else: fluid.save_dygraph(paddle_model.state_dict(), paddle_para_name)
def save_network(self, epoch): for name in self.model_names: if isinstance(name, str): save_filename = '%s_net%s' % (epoch, name[3:]) save_path = os.path.join(self.cfgs.save_dir, 'mobile', 'checkpoint', save_filename) net = getattr(self, name) fluid.save_dygraph(net.state_dict(), save_path)
def save(self, path): params = self.model.state_dict() fluid.save_dygraph(params, path) if self.model._optimizer is None: return if self.model._optimizer.state_dict(): optim = self.model._optimizer.state_dict() fluid.save_dygraph(optim, path)
def train(model): print('start training ... ') model.train() epoch_num = 5 opt = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters()) # 使用Paddle自带的数据读取器 train_loader = paddle.batch(paddle.dataset.mnist.train(), batch_size=10) valid_loader = paddle.batch(paddle.dataset.mnist.test(), batch_size=10) for epoch in range(epoch_num): for batch_id, data in enumerate(train_loader()): # 调整输入数据形状和类型 x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28) y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1) # 将numpy.ndarray转化成Tensor img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) # 计算模型输出 logits = model(img) # 计算损失函数 loss = fluid.layers.softmax_with_cross_entropy(logits, label) avg_loss = fluid.layers.mean(loss) if batch_id % 1000 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format( epoch, batch_id, avg_loss.numpy())) avg_loss.backward() opt.minimize(avg_loss) model.clear_gradients() model.eval() accuracies = [] losses = [] for batch_id, data in enumerate(valid_loader()): # 调整输入数据形状和类型 x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28) y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1) # 将numpy.ndarray转化成Tensor img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) # 计算模型输出 logits = model(img) pred = fluid.layers.softmax(logits) # 计算损失函数 loss = fluid.layers.softmax_with_cross_entropy(logits, label) acc = fluid.layers.accuracy(pred, label) accuracies.append(acc.numpy()) losses.append(loss.numpy()) print("[validation] accuracy/loss: {}/{}".format( np.mean(accuracies), np.mean(losses))) model.train() # 保存模型参数 fluid.save_dygraph(model.state_dict(), 'mnist')
def trian_model(): # 训练配置,并启动训练过程 with fluid.dygraph.guard(): model = MNIST("mnist") model.train() # 调用加载数据的函数 train_loader = load_data('train') # 创建异步数据读取器 place = fluid.CPUPlace() data_loader = fluid.io.DataLoader.from_generator(capacity=5, return_list=True) data_loader.set_batch_generator(train_loader, places=place) optimizer = fluid.optimizer.SGDOptimizer( learning_rate=0.001, parameter_list=model.parameters()) EPOCH_NUM = 10 iter = 0 iters = [] losses = [] accs = [] for epoch_id in range(EPOCH_NUM): for batch_id, data in enumerate(data_loader): # 准备数据,变得更加简洁 image_data, label_data = data image = fluid.dygraph.to_variable(image_data) label = fluid.dygraph.to_variable(label_data) # 前向计算的过程 predict, acc = model(image, label) # 计算损失,取一个批次样本损失的平均值 # loss = fluid.layers.square_error_cost(predict, label) # 损失函数改为交叉熵 loss = fluid.layers.cross_entropy(predict, label) avg_loss = fluid.layers.mean(loss) # 每训练了200批次的数据,打印下当前Loss的情况 if batch_id % 200 == 0: print( "epoch: {}, batch: {}, loss is: {}, acc is {}".format( epoch_id, batch_id, avg_loss.numpy(), acc.numpy())) iters.append(iter) losses.append(avg_loss.numpy()) accs.append(acc.numpy()) iter = iter + 100 # show_trianning(iters, accs) # 后向传播,更新参数的过程 avg_loss.backward() optimizer.minimize(avg_loss) model.clear_gradients() show_trianning(iters, losses) show_trianning(iters, accs) # 保存模型参数 fluid.save_dygraph(model.state_dict(), 'mnist')
def pytorch_to_paddle(pytorch_model, paddle_model, flip_filters=False, flip_channels=None, verbose=True): paddle_dict = paddle_model.state_dict() fluid.save_dygraph(paddle_dict, "save_temp") pytorch_input_state_dict = pytorch_model.state_dict() pytorch_layer_names = util.state_dict_layer_names(pytorch_input_state_dict) with open('save_temp', 'a') as f: model_weights = f['model_weights'] target_layer_names = list(map(str, model_weights.keys())) check_for_missing_layers(target_layer_names, pytorch_layer_names, verbose) for layer in pytorch_layer_names: paddle_h5_layer_param = util.dig_to_params_pf(model_weights[layer]) weight_key = layer + '.weight' bias_key = layer + '.bias' running_mean_key = layer + '.running_mean' running_var_key = layer + '.running_var' # Load weights (or other learned parameters) if weight_key in pytorch_input_state_dict: weights = pytorch_input_state_dict[weight_key].numpy() weights = convert_weights(weights, to_pytorch=False, flip_filters=flip_filters, flip_channels=flip_channels) # Load bias if bias_key in pytorch_input_state_dict: bias = pytorch_input_state_dict[bias_key].numpy() if running_var_key in pytorch_input_state_dict: paddle_h5_layer_param[bias_key][:] = bias else: paddle_h5_layer_param[bias_key][:] = bias # Load batch normalization running mean if running_mean_key in pytorch_input_state_dict: running_mean = pytorch_input_state_dict[ running_mean_key].numpy() paddle_h5_layer_param[PADDLE_MOVING_MEAN_KEY][:] = running_mean # Load batch normalization running variance if running_var_key in pytorch_input_state_dict: running_var = pytorch_input_state_dict[running_var_key].numpy() # account for difference in epsilon used running_var += PYTORCH_EPSILON - PADDLE_EPSILON paddle_h5_layer_param[ PADDLE_MOVING_VARIANCE_KEY][:] = running_var # pytorch_model.load_state_dict(state_dict) paddle_model.load_weights('temp.h5')
def testOnlyLoadParams(self): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding("emb", [10, 10]) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, "emb_dy") para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy") self.assertTrue(opti_state_dict == None)
def save(self, step): # 要在 `fluid.dygraph.guard()` 的环境下运行 fname = self.fname_template.format(step) print('Saving checkpoint into %s...' % fname) outdict = {} for name, module in self.module_dict.items(): outdict[name] = module.state_dict() fluid.save_dygraph(outdict, fname)
def func_test_load_compatible_with_keep_name_table(self): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy'), keep_name_table=True) self.assertTrue(para_state_dict != None) self.assertTrue(opti_state_dict == None)
def save(self, dir, step): params = {} params['genA2B'] = self.genA2B.state_dict() params['genB2A'] = self.genB2A.state_dict() params['disGA'] = self.disGA.state_dict() params['disGB'] = self.disGB.state_dict() params['disLA'] = self.disLA.state_dict() params['disLB'] = self.disLB.state_dict() fluid.save_dygraph( params, os.path.join(dir, self.dataset + '_params_%07d' % step))
def testOnlyLoadParams(self): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy')) self.assertTrue(opti_state_dict == None)
def train_mnist(args, model, tokens=None): epoch_num = args.epoch BATCH_SIZE = 64 adam = AdamOptimizer(learning_rate=0.001, parameter_list=model.parameters()) train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = model.forward(img, label, tokens=tokens) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = model.scale_loss(avg_loss) avg_loss.backward() model.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint model.clear_gradients() if batch_id % 1 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) model.eval() test_acc = test_mnist(model, tokens=tokens) model.train() print("Loss at epoch {} , acc is: {}".format(epoch, test_acc)) save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters: fluid.save_dygraph(model.state_dict(), "save_temp") print("checkpoint saved")
def gen_item_embedding(click_seq0_1): all_item = click_seq0_1.item_feat.item_id.tolist() dataset_res = [] for kkey in click_seq0_1.click_seq.keys(): iseq = click_seq0_1.click_seq[kkey]['item'] # item的前1个,后2个为正样本。可以考虑增加停留时间的影响 dataset_tmp = gen_negative_samp(iseq, all_item) dataset_res += dataset_tmp batch_size = 512 epoch_num = 3 embedding_size = 200 step = 0 learning_rate = 0.001 vocab_size = click_seq0_1.item_id_size with fluid.dygraph.guard(fluid.CUDAPlace(0)): #通过我们定义的SkipGram类,来构造一个Skip-gram模型网络 skip_gram_model = SkipGram(vocab_size, embedding_size) #构造训练这个网络的优化器 adam = fluid.optimizer.AdamOptimizer( learning_rate=learning_rate, parameter_list=skip_gram_model.parameters()) #使用build_batch函数,以mini-batch为单位,遍历训练数据,并训练网络 for center_words, target_words, label in build_batch( dataset_res, batch_size, epoch_num): #使用fluid.dygraph.to_variable函数,将一个numpy的tensor,转换为飞桨可计算的tensor center_words_var = fluid.dygraph.to_variable(center_words) target_words_var = fluid.dygraph.to_variable(target_words) label_var = fluid.dygraph.to_variable(label) #将转换后的tensor送入飞桨中,进行一次前向计算,并得到计算结果 pred, loss = skip_gram_model(center_words_var, target_words_var, label_var) #通过backward函数,让程序自动完成反向计算 loss.backward() #通过minimize函数,让程序根据loss,完成一步对参数的优化更新 adam.minimize(loss) #使用clear_gradients函数清空模型中的梯度,以便于下一个mini-batch进行更新 skip_gram_model.clear_gradients() #每经过100个mini-batch,打印一次当前的loss,看看loss是否在稳定下降 step += 1 if step % 500 == 0: print("step %d, loss %.3f" % (step, loss.numpy()[0])) model_dict = skip_gram_model.state_dict() fluid.save_dygraph(model_dict, './item_embed/skip_model') # 会自动创建文件夹 embedding_weight = skip_gram_model.embedding.weight.numpy() pickle.dump(embedding_weight, open('./item_embed/embedding_array', 'wb')) return embedding_weight
def main(args): if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) with fluid.dygraph.guard(place): # 1. Define data reader train_loader, valid_loader = create_reader(place, args) # 2. Define neural network models = [ MobileNetV1(class_dim=args.class_num), MobileNetV1(class_dim=args.class_num) ] optimizers = create_optimizer(models, args) # 3. Use PaddleSlim DML strategy dml_model = DML(models, args.use_parallel) dml_optimizer = dml_model.opt(optimizers) # 4. Train your network save_parameters = (not args.use_parallel) or ( args.use_parallel and fluid.dygraph.parallel.Env().local_rank == 0) best_valid_acc = [0] * dml_model.model_num for epoch_id in range(args.epochs): current_step_lr = dml_optimizer.get_lr() lr_msg = "Epoch {}".format(epoch_id) for model_id, lr in enumerate(current_step_lr): lr_msg += ", {} lr: {:.6f}".format( dml_model.full_name()[model_id], lr) logger.info(lr_msg) train_losses, train_accs = train(train_loader, dml_model, dml_optimizer, args) valid_losses, valid_accs = valid(valid_loader, dml_model, args) for i in range(dml_model.model_num): if valid_accs[i].avg[0] > best_valid_acc[i]: best_valid_acc[i] = valid_accs[i].avg[0] if save_parameters: fluid.save_dygraph( models[i].state_dict(), os.path.join(args.model_save_dir, dml_model.full_name()[i], "best_model")) summery_msg = "Epoch {} {}: valid_loss {:.6f}, valid_acc {:.6f}, best_valid_acc {:.6f}" logger.info( summery_msg.format(epoch_id, dml_model.full_name()[i], valid_losses[i].avg[0], valid_accs[i].avg[0], best_valid_acc[i]))
def save_state(self, path=None): import os if path is None: path = '' if self._model is None: raise KeyError('Model don\'t exist!') with fluid.dygraph.guard(): fluid.save_dygraph(self._model, os.path.join(path, self._name)) fluid.save_dygraph(self._optimizer, os.path.join(path, self._name))
def finetune(args): ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): dataset = hub.dataset.ChnSentiCorp() tc = TransformerClassifier(num_classes=dataset.num_labels, transformer=ernie) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) tc.load_dict(state_dict) reader = hub.reader.ClassifyReader( dataset=dataset, vocab_path=ernie.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=ernie.get_spm_path(), word_dict_path=ernie.get_word_dict_path()) train_reader = reader.data_generator(batch_size=args.batch_size, phase='train') loss_sum = acc_sum = cnt = 0 # 执行epoch_num次训练 for epoch in range(args.num_epoch): # 读取训练数据进行训练 for batch_id, data in enumerate(train_reader()): input_ids = np.array(data[0][0]).astype(np.int64) position_ids = np.array(data[0][1]).astype(np.int64) segment_ids = np.array(data[0][2]).astype(np.int64) input_mask = np.array(data[0][3]).astype(np.float32) labels = np.array(data[0][4]).astype(np.int64) pred = tc(input_ids, position_ids, segment_ids, input_mask) acc = fluid.layers.accuracy(pred, to_variable(labels)) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() # 参数更新 adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] acc_sum += acc.numpy() * labels.shape[0] cnt += labels.shape[0] if batch_id % args.log_interval == 0: print('epoch {}: loss {}, acc {}'.format( epoch, loss_sum / cnt, acc_sum / cnt)) loss_sum = acc_sum = cnt = 0 if batch_id % args.save_interval == 0: state_dict = tc.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def save(model, model_path): if isinstance(model, parallel.DataParallel): model = model._layers if hasattr(fluid, "save_dygraph"): # >= 1.6.0 compatible fluid.save_dygraph(model.state_dict(), model_path) fluid.save_dygraph(model.optimizer.state_dict(), model_path) else: dygraph.save_persistables(model.state_dict(), model_path, optimizers=model.optimizer) return
def train(): # 定义飞桨动态图的工作环境 with fluid.dygraph.guard(): # 声明定义好的线性回归模型 model = Regressor("Regressor") # 开启模型训练模式 model.train() # 加载数据 training_data, test_data = load_data() # 定义优化算法,这里使用随机梯度下降-SGD # 学习率设置为0.01 opt = fluid.optimizer.SGD(learning_rate=0.01) EPOCH_NUM = 10 # 设置外层循环次数 BATCH_SIZE = 10 # 设置batch大小 # 定义外层循环 for epoch_id in range(EPOCH_NUM): # 在每轮迭代开始之前,将训练数据的顺序随机的打乱 np.random.shuffle(training_data) # 将训练数据进行拆分,每个batch包含10条数据 mini_batches = [ training_data[k:k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) ] # 定义内层循环 for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]).astype( 'float32') # 获得当前批次训练数据 y = np.array(mini_batch[:, -1:]).astype( 'float32') # 获得当前批次训练标签(真实房价) # 将numpy数据转为飞桨动态图variable形式 house_features = dygraph.to_variable(x) prices = dygraph.to_variable(y) # 前向计算 predicts = model(house_features) # 计算损失 loss = fluid.layers.square_error_cost(predicts, label=prices) avg_loss = fluid.layers.mean(fluid.layers.sqrt(loss)) if iter_id % 20 == 0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) # 反向传播 avg_loss.backward() # 最小化loss,更新参数 opt.minimize(avg_loss) # 清除梯度 model.clear_gradients() # 保存模型 fluid.save_dygraph(model.state_dict(), 'LR_model')
def save(self, dir, step): params = {} params['genA2B'] = self.genA2B.state_dict() params['genB2A'] = self.genB2A.state_dict() params['disGA'] = self.disGA.state_dict() params['disGB'] = self.disGB.state_dict() params['disLA'] = self.disLA.state_dict() params['disLB'] = self.disLB.state_dict() for k, v in params.items(): fluid.save_dygraph( v, os.path.join(dir, self.dataset + '_%s_params_%07d' % (k, step)))
def train(args): print('Now startingt training.......') with fluid.dygraph.guard(place): model = getattr(models, config.model_name)() train_loader = load_data('train', config.batch_size) data_loader = fluid.io.DataLoader.from_generator(capacity=5, return_list=True) data_loader.set_batch_generator(train_loader, places=place) # train_loader = paddle.batch(paddle.dataset.mnist.train(), batch_size=config.batch_size) # optimizer = fluid.optimizer.Adam(learning_rate=config.lr) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.piecewise_decay( boundaries=[15630, 31260], values=[1e-3, 1e-4, 1e-5]), regularization=fluid.regularizer.L2Decay( regularization_coeff=1e-4)) EPOCH_NUM = config.max_epoch best_acc = -1 for epoch_id in range(EPOCH_NUM): model.train() for batch_id, data in enumerate(data_loader): # image_data = np.array([x[0] for x in data]).astype('float32').reshape(-1, 28, 28) # label_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) # image_data = np.expand_dims(image_data, axis=1) image_data, label_data = data # print("data shape => ", image_data.shape) # print("label shape => ", label_data.shape) image = fluid.dygraph.to_variable(image_data) label = fluid.dygraph.to_variable(label_data) predict, avg_acc = model(image, label) loss = fluid.layers.cross_entropy(predict, label) # print(loss) avg_loss = fluid.layers.mean(loss) if batch_id != 0 and batch_id % 200 == 0: print( "epoch: {}, batch: {}, loss is: {}, acc is {}".format( epoch_id, batch_id, avg_loss.numpy(), avg_acc.numpy())) avg_loss.backward() optimizer.minimize(avg_loss) model.clear_gradients() fluid.save_dygraph(model.state_dict(), config.model_name + '_current') val_acc = val(model) if val_acc > best_acc: fluid.save_dygraph(model.state_dict(), config.model_name + '_best') best_acc = max(val_acc, best_acc)
def finetune(args): with fluid.dygraph.guard(): resnet50_vd_10w = hub.Module(name="resnet50_vd_10w") dataset = hub.dataset.Flowers() resnet = ResNet50(num_classes=dataset.num_labels, backbone=resnet50_vd_10w) adam = AdamOptimizer(learning_rate=0.001, parameter_list=resnet.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) resnet.load_dict(state_dict) reader = hub.reader.ImageClassificationReader( image_width=resnet50_vd_10w.get_expected_image_width(), image_height=resnet50_vd_10w.get_expected_image_height(), images_mean=resnet50_vd_10w.get_pretrained_images_mean(), images_std=resnet50_vd_10w.get_pretrained_images_std(), dataset=dataset) train_reader = reader.data_generator(batch_size=args.batch_size, phase='train') loss_sum = acc_sum = cnt = 0 # 执行epoch_num次训练 for epoch in range(args.num_epoch): # 读取训练数据进行训练 for batch_id, data in enumerate(train_reader()): imgs = np.array(data[0][0]) labels = np.array(data[0][1]) pred = resnet(imgs) acc = fluid.layers.accuracy(pred, to_variable(labels)) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() # 参数更新 adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * imgs.shape[0] acc_sum += acc.numpy() * imgs.shape[0] cnt += imgs.shape[0] if batch_id % args.log_interval == 0: print('epoch {}: loss {}, acc {}'.format( epoch, loss_sum / cnt, acc_sum / cnt)) loss_sum = acc_sum = cnt = 0 if batch_id % args.save_interval == 0: state_dict = resnet.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def change_model_executor_to_dygraph(args): temp_image = fluid.layers.data(name='temp_image', shape=[3, 224, 224], dtype='float32') model = get_model(args) y = model(temp_image) if args.cuda: gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) model_path = args.save_model assert os.path.exists(model_path), "Please check whether the executor model file address {} exists. " \ "Note: the executor model file is multiple files.".format(model_path) fluid.io.load_persistables(exe, model_path, fluid.default_main_program()) print('load executor train model successful, start change!') param_list = fluid.default_main_program().block(0).all_parameters() param_name_list = [p.name for p in param_list] temp_dict = {} for name in param_name_list: tensor = fluid.global_scope().find_var(name).get_tensor() npt = np.asarray(tensor) temp_dict[name] = npt del model with fluid.dygraph.guard(): x = np.random.randn(1, 3, 224, 224).astype('float32') x = fluid.dygraph.to_variable(x) model = get_model(args) y = model(x) new_param_dict = {} for k, v in temp_dict.items(): value = v value_shape = value.shape name = k tensor = fluid.layers.create_parameter( shape=value_shape, name=name, dtype='float32', default_initializer=fluid.initializer.NumpyArrayInitializer( value)) new_param_dict[name] = tensor assert len(new_param_dict) == len( model.state_dict()), "The number of parameters is not equal. Loading parameters failed, " \ "Please check whether the model is consistent!" model.set_dict(new_param_dict) fluid.save_dygraph(model.state_dict(), model_path) del model del temp_dict print('change executor model to dygraph successful!')
def train(model): # 配置训练参数 use_gpu = False lr = 0.01 Epoches = 10 place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() print(place) with fluid.dygraph.guard(place): # 启动训练 model.train() # 获得数据读取器 data_loader = model.train_loader # 使用adam优化器,学习率使用0.01 opt = fluid.optimizer.Adam(learning_rate=lr, parameter_list=model.parameters()) for epoch in range(0, Epoches): for idx, data in enumerate(data_loader()): # 获得数据,并转为动态图格式 usr, mov, score = data usr[0] = usr[0].astype(np.int64) # print(usr[0][0], type(usr[0][0])) mov[0] = mov[0].astype(np.int64) # print(mov[0][0], type(mov[0][0])) usr_v = [dygraph.to_variable(var) for var in usr] mov_v = [dygraph.to_variable(var) for var in mov] scores_label = dygraph.to_variable(score) # print(usr_v, type(usr_v)) # print(mov_v, type(mov_v)) # 计算出算法的前向计算结果 _, _, scores_predict = model.forward(usr_v, mov_v) # 计算loss loss = fluid.layers.square_error_cost(scores_predict, scores_label) avg_loss = fluid.layers.mean(loss) if idx % 500 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format( epoch, idx, avg_loss.numpy())) # 损失函数下降,并清除梯度 avg_loss.backward() opt.minimize(avg_loss) model.clear_gradients() # 每个epoch 保存一次模型 fluid.save_dygraph(model.state_dict(), source_path + '/checkpoint/epoch' + str(epoch))
def build_dynamic_network(load_params=None, save_params=False, use_structured_name=False): with fluid.dygraph.guard(place): model = TestModel("test") if load_params: model_state_dict, _ = fluid.load_dygraph(load_params) model.load_dict(model_state_dict, use_structured_name=use_structured_name) model.eval() d = fluid.dygraph.to_variable(reader()) p = model(d) logger.info(p.numpy()) if save_params: fluid.save_dygraph(model.state_dict(), "dynamic_params")
def torch2dynamic(param_file, save_path=None): assert os.path.exists(param_file), "{} not exists!".format(param_file) logger.info("start to read torch params...") state_dict = _read_torch_dict(param_file) logger.info("found {} parameters. start to transform...".format( len(state_dict))) dynamic_state_dict = _make_dynamic_state_dict(state_dict) if save_path: with fluid.dygraph.guard(place): fluid.save_dygraph(dynamic_state_dict, save_path) logger.info("dynamic parameters has been saved to {}.pdparams.".format( save_path)) else: return dynamic_state_dict
def run(): with dygraph.guard(): model = Regressor() model.train() # 开启训练模式 training_data, test_data = preprocess() opt = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=model.parameters()) EPOCH_NUM = 100 # 设置外层循环次数 BATCH_SIZE = 33 # 设置batch大小 # 定义外层循环 for epoch_id in range(EPOCH_NUM): # 在每轮迭代开始之前,将训练数据的顺序随机的打乱 np.random.shuffle(training_data) # 将训练数据进行拆分,每个batch包含10条数据 mini_batches = [ training_data[k:k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) ] # 定义内层循环 for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]).astype( 'float32') # 获得当前批次训练数据 y = np.array(mini_batch[:, -1:]).astype( 'float32') # 获得当前批次训练标签(真实房价) # 将numpy数据转为飞桨动态图variable形式 house_features = dygraph.to_variable(x) prices = dygraph.to_variable(y) # 前向计算 predicts = model(house_features) # 计算损失 loss = fluid.layers.square_error_cost(predicts, label=prices) avg_loss = fluid.layers.mean(loss) if iter_id % BATCH_SIZE == 0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) # 反向传播 avg_loss.backward() # 最小化loss,更新参数 opt.minimize(avg_loss) # 清除梯度 model.clear_gradients() fluid.save_dygraph(model.state_dict(), 'LR_model')