class VisualHook(Hook): def __init__(self, priority=1): self.priority = priority def run_begin(self, trainer): rank = dist.get_rank() if rank != 0: return logdir = os.path.join(trainer.output_dir, 'visual_dl') if not os.path.exists(logdir): os.makedirs(logdir) self.writer = LogWriter(logdir=logdir) # app.run(logdir=logdir, port=8040, host="0.0.0.0") def train_epoch_end(self, trainer): rank = dist.get_rank() if rank != 0: return outputs = trainer.outputs for k in outputs.keys(): v = trainer.logs[k].avg self.writer.add_scalar(tag='train/{}'.format(k), step=trainer.current_epoch, value=v) with paddle.no_grad(): if dist.get_world_size() > 1: for name, param in trainer.model._layers.named_parameters(): if 'bn' not in name: self.writer.add_histogram(name, param.numpy(), trainer.current_epoch) else: for name, param in trainer.model.named_parameters(): if 'bn' not in name: self.writer.add_histogram(name, param.numpy(), trainer.current_epoch) def run_end(self, trainer): rank = dist.get_rank() if rank != 0: return self.writer.close()
# Training epoch_num = 10 report_freq = 10 base_acc = 0.0 model_save_path = r"E:\Projects\Engine_Inspection\VGG16CAM\ResNet50\model" var_save_path = r"E:\Projects\Engine_Inspection\VGG16CAM\ResNet50\variable" train_step, test_step = 0, 0 for epoch_id in range(epoch_num): for batch_id, data in enumerate(train_reader()): train_cost, train_acc, params = exe.run(program=fluid.default_main_program(),feed=feeder.feed(data),fetch_list=[avg_cost,batch_acc,params_name]) # Write the training data into the LogWriter log_writer.add_scalar("train_cost", train_cost[0], train_step) log_writer.add_scalar("train_acc", train_acc[0], train_step) log_writer.add_histogram("histogram", params.flatten(), train_step, buckets=50) train_step += 1 # Printing the result every 100 batch if batch_id % report_freq == 0: print("Pass:%d,Batch:%d,Cost:%0.5f,Accuracy:%0.5f"%(epoch_id,batch_id,train_cost[0],train_acc[0])) # Testing test_accs=[] test_costs=[] for batch_id,data in enumerate(test_reader()): test_cost,test_acc=exe.run(program=test_program,feed=feeder.feed(data),fetch_list=[avg_cost,batch_acc]) test_accs.append(test_acc[0]) test_costs.append(test_cost[0]) # Write the training data into the LogWriter
)[0].name # 训练10次 for pass_id in range(10): # 进行训练 for batch_id, data in enumerate(train_reader()): train_cost, train_acc, params = exe.run( program=fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc, params_name]) # 保存训练的日志数据 train_step += 1 writer.add_scalar(tag="训练/损失值", step=train_step, value=train_cost[0]) writer.add_scalar(tag="训练/准确率", step=train_step, value=train_acc[0]) writer.add_histogram(tag="训练/参数分布", step=train_step, values=params.flatten(), buckets=50) # 每100个batch打印一次信息 if batch_id % 100 == 0: print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' % (pass_id, batch_id, train_cost[0], train_acc[0])) # 进行测试 test_accs = [] test_costs = [] for batch_id, data in enumerate(test_reader()): test_cost, test_acc = exe.run(program=test_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc]) # 保存测试的日志数据
class Linear_Model(): def __init__(self): """ Initialize the Linear Model """ self.learning_rate = 0.001 self.epoches = 10000 self.loss_function = torch.nn.MSELoss() self.create_model() logdir = os.path.join('./runs', datetime.now().strftime('%b%d_%H-%M-%S')+'_line_reg') print("save to ", logdir) self.writer = LogWriter(logdir=logdir) print("initialize done") def create_model(self): self.model = LinearRegression() self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate) def train(self, data, model_save_path="model.pth"): """ Train the model and save the parameters Args: model_save_path: saved name of model data: (x, y) = data, and y = kx + b Returns: None """ x = data["x"] y = data["y"] for epoch in range(self.epoches): prediction = self.model(x) loss = self.loss_function(prediction, y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if epoch % 500 == 0: print("epoch: {}, loss is: {}".format(epoch, loss.item())) # show param for name, parameters in self.model.named_parameters(): # print(name, ":", parameters.view(torch.numel(parameters))) self.writer.add_histogram(tag='net_param/'+name, values=parameters.view(torch.numel(parameters)).detach().cpu().numpy(), step=epoch, buckets=200) torch.save(self.model.state_dict(), "linear.pth") def test(self, x, model_path="linear.pth"): """ Reload and test the model, plot the prediction Args: model_path: the model's path and name data: (x, y) = data, and y = kx + b Returns: None """ x = data["x"] y = data["y"] self.model.load_state_dict(torch.load(model_path)) prediction = self.model(x) plt.scatter(x.numpy(), y.numpy(), c=x.numpy()) plt.plot(x.numpy(), prediction.detach().numpy(), color="r") plt.show() def compare_epoches(self, data): x = data["x"] y = data["y"] num_pictures = 16 fig = plt.figure(figsize=(10,10)) current_fig = 0 for epoch in range(self.epoches): prediction = self.model(x) loss = self.loss_function(prediction, y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if epoch % (self.epoches/num_pictures) == 0: current_fig += 1 plt.subplot(4, 4, current_fig) plt.scatter(x.numpy(), y.numpy(), c=x.numpy()) plt.plot(x.numpy(), prediction.detach().numpy(), color="r") plt.show()