def run(model, train_loader, val_loader, optimizer, epochs, log_interval, log_dir, val=False, log=True): writer = create_summary_writer(log_dir) device = 'cpu' if torch.cuda.is_available(): device = 'cuda' loss_metric = Average() trainer = create_train_engine(model, optimizer, device=device) evaluator = create_supervised_evaluator(model, metrics={'loss': loss_metric}, device=device) @trainer.on(Events.ITERATION_COMPLETED(every=log_interval)) def log_training_loss(engine): # print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}" # "".format(engine.state.epoch, engine.state.iteration, len(train_loader), engine.state.output)) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) if log: @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): evaluator.run(train_loader) metrics = evaluator.state.metrics avg_mse = metrics['loss'] print("Training Results - Epoch: {} Avg loss: {:.2f}".format( engine.state.epoch, avg_mse)) writer.add_scalar("training/avg_loss", avg_mse, engine.state.epoch) if val: @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_mse = metrics['loss'] # print("Validation Results - Epoch: {} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_mse)) writer.add_scalar("validation/avg_loss", avg_mse, engine.state.epoch) # kick everything off trainer.run(train_loader, max_epochs=epochs) writer.close()
def stochastic_gradient_descent(X, Y, model, learning_rate=0.01, mini_batch_fraction=0.01, epoch=10000, tol=1.e-6): """ 利用随机梯度下降法训练模型。 参数 ---- X : np.array, 自变量数据 Y : np.array, 因变量数据 model : dict, 里面包含模型的参数,损失函数,自变量,应变量 """ # 确定最优化算法 method = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) optimizer = method.minimize(model["loss_function"]) # 增加日志 tf.summary.scalar("loss_function", model["loss_function"]) tf.summary.histogram("params", model["model_params"]) tf.summary.scalar("first_param", tf.reduce_mean(model["model_params"][0])) tf.summary.scalar("last_param", tf.reduce_mean(model["model_params"][-1])) summary = tf.summary.merge_all() # 在程序运行结束之后,运行如下命令,查看日志 # tensorboard --logdir logs/ # Windows下的存储路径与Linux并不相同 if os.name == "nt": summary_writer = create_summary_writer("logs\\stochastic_gradient_descent") else: summary_writer = create_summary_writer("logs/stochastic_gradient_descent") # tensorflow开始运行 sess = tf.Session() # 产生初始参数 init = tf.global_variables_initializer() # 用之前产生的初始参数初始化模型 sess.run(init) # 迭代梯度下降法 step = 0 batch_size = int(X.shape[0] * mini_batch_fraction) batch_num = int(math.ceil(1 / mini_batch_fraction)) prev_loss = np.inf diff = np.inf # 当损失函数的变动小于阈值或达到最大训练轮次,则停止迭代 while (step < epoch) & (diff > tol): for i in range(batch_num): # 选取小批次训练数据 batch_x = X[i * batch_size: (i + 1) * batch_size] batch_y = Y[i * batch_size: (i + 1) * batch_size] # 迭代模型参数 sess.run([optimizer], feed_dict={model["independent_variable"]: batch_x, model["dependent_variable"]: batch_y}) # 计算损失函数并写入日志 summary_str, loss = sess.run( [summary, model["loss_function"]], feed_dict={model["independent_variable"]: X, model["dependent_variable"]: Y}) # 将运行细节写入目录 summary_writer.add_summary(summary_str, step * batch_num + i) # 计算损失函数的变动 diff = abs(prev_loss - loss) prev_loss = loss if diff <= tol: break step += 1 summary_writer.close() # 在Windows下运行此脚本需确保Windows下的命令提示符(cmd)能显示中文 # 输出最终结果 print("模型参数:\n%s" % sess.run(model["model_params"])) print("训练轮次:%s" % step) print("损失函数值:%s" % loss)
def train_loop(model, params, ds, min_y, base_data, model_id, device, batch_size, max_epochs=2): ds_train, ds_valid = ds min_y_train, min_y_val = min_y with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() with torch.no_grad(): x, y = batch x = x.to(device) y = y.to(device) - min_y_train ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() with torch.no_grad(): x, y = batch x = x.to(device) y = y.to(device) - min_y_val ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=100)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 100) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 5), handler) trainer.run(ds_train, max_epochs=max_epochs)
def run_trainer(data_loader: dict, model: models, optimizer: optim, lr_scheduler: optim.lr_scheduler, criterion: nn, train_epochs: int, log_training_progress_every: int, log_val_progress_every: int, checkpoint_every: int, tb_summaries_dir: str, chkpt_dir: str, resume_from: str, to_device: object, to_cpu: object, attackers: object = None, train_adv_periodic_ops: int = None, *args, **kwargs): def mk_lr_step(loss): lr_scheduler.step(loss) def train_step(engine, batch): model.train() optimizer.zero_grad() x, y = map(lambda _: to_device(_), batch) if (train_adv_periodic_ops is not None) and ( engine.state.iteration % train_adv_periodic_ops == 0): random_attacker = random.choice(list(attackers)) x = attackers[random_attacker].perturb(x, y) y_pred = model(x) loss = criterion(y_pred, y) loss.backward() optimizer.step() return loss.item() def eval_step(engine, batch): model.eval() with torch.no_grad(): x, y = map(lambda _: to_device(_), batch) if random.choice(range(2)) % 2 == 0: random_attacker = random.choice(list(attackers)) x = attackers[random_attacker].perturb(x, y) y_pred = model(x) return y_pred, y def chkpt_score_func(engine): val_eval.run(data_loader['val']) y_pred, y = val_eval.state.output loss = criterion(y_pred, y) return np.mean(to_cpu(loss, convert_to_np=True)) # set up ignite engines trainer = Engine(train_step) train_eval = Engine(eval_step) val_eval = Engine(eval_step) @trainer.on(Events.ITERATION_COMPLETED(every=log_training_progress_every)) def log_training_results(engine): step = True run_type = 'train' train_eval.run(data_loader['train']) y_pred, y = train_eval.state.output loss = criterion(y_pred, y) log_results(to_cpu(y_pred, convert_to_np=True), to_cpu(y, convert_to_np=True), to_cpu(loss, convert_to_np=True), run_type, step, engine.state.iteration, total_train_steps, writer) @trainer.on(Events.ITERATION_COMPLETED(every=log_val_progress_every)) def log_val_results(engine): step = True run_type = 'val' val_eval.run(data_loader['val']) y_pred, y = val_eval.state.output loss = criterion(y_pred, y) mk_lr_step(loss) log_results(to_cpu(y_pred, convert_to_np=True), to_cpu(y, convert_to_np=True), to_cpu(loss, convert_to_np=True), run_type, step, engine.state.iteration, total_train_steps, writer) # set up vars total_train_steps = len(data_loader['train']) * train_epochs # reporter to identify memory usage # bottlenecks throughout network reporter = MemReporter() print_model(model, reporter) # set up tensorboard summary writer writer = create_summary_writer(model, data_loader['train'], tb_summaries_dir) # move model to device model = to_device(model) # set up progress bar RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss') pbar = ProgressBar(persist=True, bar_format="") pbar.attach(trainer, ['loss']) # set up checkpoint objects_to_checkpoint = { 'trainer': trainer, 'model': model, 'optimizer': optimizer, 'lr_scheduler': lr_scheduler } training_checkpoint = Checkpoint(to_save=objects_to_checkpoint, save_handler=DiskSaver( chkpt_dir, require_empty=False), n_saved=3, filename_prefix='best', score_function=chkpt_score_func, score_name='val_loss') # register events trainer.add_event_handler( Events.ITERATION_COMPLETED(every=checkpoint_every), training_checkpoint) # if resuming if resume_from and os.path.exists(resume_from): print(f'resume model from: {resume_from}') checkpoint = torch.load(resume_from) Checkpoint.load_objects(to_load=objects_to_checkpoint, checkpoint=checkpoint) # fire training engine trainer.run(data_loader['train'], max_epochs=train_epochs)
def adv_train_loop(model, params, ds, min_y, base_data, model_id, attack_type, device, batch_size, max_epochs=5): print('training adversarial:', attack_type) ds_train, ds_valid = ds min_y_train, min_y_val = min_y original_model = copy.deepcopy( model) # used to generate adv images for the trained model original_model.eval() model = copy.deepcopy( model) # making a copy so that original model is not changed model = model.to(device) model_id = f'{model_id}_{attack_type}' with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) classifier = PyTorchClassifier( model=original_model, clip_values=(0, 1), loss=nn.CrossEntropyLoss(), optimizer=optimizer, input_shape=(3, 64, 64), nb_classes=200, ) attack = None # if attack_type == "fgsm": # attack = FastGradientMethod(estimator=classifier, eps=0.2) # elif attack_type == "bim": # attack = BasicIterativeMethod(estimator=classifier, eps=0.2) # elif attack_type == "carlini": # attack = CarliniLInfMethod(classifier=classifier) # elif attack_type == "deepfool": # attack = DeepFool(classifier=classifier) if attack_type == "fgsm": attack = GradientSignAttack(model, loss_fn=loss, eps=0.2) elif attack_type == "ffa": attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3) elif attack_type == "carlini": attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000) elif attack_type == "lbfgs": attack = DeepFool(classifier=classifier) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with ctx_noparamgrad_and_eval(model): x_adv = attack.perturb(x, y) optimizer.zero_grad() x = torch.cat((x, x_adv)) y = torch.cat((y, y)) ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
def prune_train_loop(model, params, ds, min_y, base_data, model_id, prune_type, device, batch_size, max_epochs=5): assert prune_type in ['global_unstructured', 'structured'] total_prune_amount = 0.3 if prune_type == 'global_unstructured' else 0.1 ds_train, ds_valid = ds min_y_train, min_y_val = min_y model_id = f'{model_id}_{prune_type}_pruning' conv_layers = [model.conv1] for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]: for bottleneck in sequential: conv_layers.extend( [bottleneck.conv1, bottleneck.conv2, bottleneck.conv3]) def prune_model(model): remove_amount = total_prune_amount / (max_epochs * 10) print(f'pruned model by {remove_amount}') if prune_type == 'global_unstructured': parameters_to_prune = [(layer, 'weight') for layer in conv_layers] prune.global_unstructured( parameters_to_prune, pruning_method=prune.L1Unstructured, amount=remove_amount, ) else: for layer in conv_layers: prune.ln_structured(layer, name='weight', amount=remove_amount, n=1, dim=0) prune_model(model) with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train optimizer.zero_grad() ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() with torch.no_grad(): for layer in conv_layers: layer.weight *= layer.weight_mask # make sure pruned weights stay 0 return l.item() trainer = Engine(train_step) def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) prune_model(model) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
def gradient_descent(X, Y, model, learning_rate=0.01, max_iter=10000, tol=1.e-6): """ 利用梯度下降法训练模型。 参数 ---- X : np.array, 自变量数据 Y : np.array, 因变量数据 model : dict, 里面包含模型的参数,损失函数,自变量,应变量。 """ # 确定最优化算法 method = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) optimizer = method.minimize(model["loss_function"]) # 增加日志 tf.summary.scalar("loss_function", model["loss_function"]) tf.summary.histogram("params", model["model_params"]) tf.summary.scalar("first_param", tf.reduce_mean(model["model_params"][0])) tf.summary.scalar("last_param", tf.reduce_mean(model["model_params"][-1])) summary = tf.summary.merge_all() # 在程序运行结束之后,运行如下命令,查看日志 # tensorboard --logdir logs/ # Windows下的存储路径与Linux并不相同 if os.name == "nt": summary_writer = create_summary_writer("logs\\gradient_descent") else: summary_writer = create_summary_writer("logs/gradient_descent") # tensorflow开始运行 sess = tf.Session() # 产生初始参数 init = tf.global_variables_initializer() # 用之前产生的初始参数初始化模型 sess.run(init) # 迭代梯度下降法 step = 0 prev_loss = np.inf diff = np.inf # 当损失函数的变动小于阈值或达到最大循环次数,则停止迭代 while (step < max_iter) & (diff > tol): _, summary_str, loss = sess.run( [optimizer, summary, model["loss_function"]], feed_dict={ model["independent_variable"]: X, model["dependent_variable"]: Y }) # 将运行细节写入目录 summary_writer.add_summary(summary_str, step) # 计算损失函数的变动 diff = abs(prev_loss - loss) prev_loss = loss step += 1 summary_writer.close() # 在Windows下运行此脚本需确保Windows下的命令提示符(cmd)能显示中文 # 输出最终结果 print("模型参数:\n%s" % sess.run(model["model_params"])) print("迭代次数:%s" % step) print("损失函数值:%s" % loss)