def show_trace(res): # 设置绘制框的大小 n = max(abs(min(res)), abs(max(res)), 10) # 画线时的尺度 f_line = np.arange(-n, n, 0.1) plot.set_figsize() # 绘制x**2的线 plot.plt.plot(f_line, [x * x for x in f_line]) # 绘制结果线 plot.plt.plot(res, [x * x for x in res], '-o') plot.plt.xlabel('x') plot.plt.ylabel('f(x)') plot.plt.show()
import torch import math import numpy as np from PIL import Image import sys sys.path.append(r".") from d2lzh_pytorch import plot, detection """ 这一节介绍如何利用锚框来进行目标检测 """ # 首先先尝试生成多个锚框,要注意的一点是下面的每个像素是对应了多个锚框的 # 修改numpy的打印精度为2位 np.set_printoptions(2) plot.set_figsize() # 直接用PIL来读取图片并输出尺寸测试 img = Image.open(r"./Datasets"+'/Img/catdog.jpg') w, h = img.size print("w=%d, h=%d" % (w, h)) # 这里保存函数MultiBoxPrior来生成锚框 # 构造输⼊数据,这里先构造出图片尺寸X X = torch.Tensor(1, 3, h, w) # 然后输入希望的锚框的大小和宽高比,配合图片尺寸X,会自动返回所有生成的锚框 # 下面出现了三个尺寸指的是有三种可能的尺寸,三个宽高比是三种形状,是(3+3-1)种锚框(见书) # 这里的尺寸和宽高比都是先验的式子而已,一个锚框的真正宽为ws*sqrt(r),高为hs/sqrt(r) # 返回的尺寸为(批量大小,锚框个数,4),4表示描述锚框左上角和右下角的两个坐标,坐标以对 # 应图像宽高的比例来表示
def train_pytorch_ch7(optimizer_fn, optimizer_hyperparams, features, labels, batch_size=10, num_epochs=2): """ The training function of chapter7, but this is the pytorch library version Parameters ---------- optimizer_fn : [function] the optimizer function that wants to use optimizer_hyperparams : [pair] hyperparams delivered to optimizer features : [tensor] batch of features labels : [tensor] batch of labels batch_size : [int], optional size of a batch, by default 10 num_epochs : [int], optional summary of number of epochs, by default 2 """ # init the net, using one linear layer to simulate the linear regression net = nn.Sequential(nn.Linear(features.shape[-1], 1)) loss = nn.MSELoss() optimizer = optimizer_fn(net.parameters(), **optimizer_hyperparams) # get loss def eval_loss(): return loss(net(features).view(-1), labels).item() / 2 # prepare data and strutures ls = [eval_loss()] data_iter = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( features, labels), batch_size, shuffle=True) # for each epochs for _ in range(num_epochs): start = time.time() # for each batches for batch_i, (X, y) in enumerate(data_iter): # divided by 2 is used to make sure the loss is equal to train_ch7's l = loss(net(X).view(-1), y) / 2 optimizer.zero_grad() l.backward() optimizer.step() # save current loss if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) # output results print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) plot.set_figsize() plot.plt.plot(np.linspace(0, num_epochs, len(ls)), ls) plot.plt.xlabel('epoch') plot.plt.ylabel('loss') plot.plt.show()
def train_ch7(optimizer_fn, states, hyperparams, features, labels, batch_size=10, num_epochs=2): """ The training function of chapter7, it is a more useful function. In Chapter7, it is used with some different optimizer functions. Parameters ---------- optimizer_fn : [function] the optimizer function that wants to use states : [int] states delivered to optimizer hyperparams : [pair] hyperparams delivered to optimizer features : [tensor] batch of features labels : [tensor] batch of labels batch_size : [int], optional size of a batch, by default 10 num_epochs : [int], optional summary of number of epochs, by default 2 """ # using linear regression and squared loss for training net, loss = linear_reg.linreg, linear_reg.squared_loss # init params w = torch.nn.Parameter(torch.tensor(np.random.normal( 0, 0.01, size=(features.shape[1], 1)), dtype=torch.float32), requires_grad=True) b = torch.nn.Parameter(torch.zeros(1, dtype=torch.float32), requires_grad=True) # get loss def eval_loss(): return loss(net(features, w, b), labels).mean().item() # prepare data and strutures ls = [eval_loss()] data_iter = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( features, labels), batch_size, shuffle=True) # for each epochs for _ in range(num_epochs): start = time.time() for batch_i, (X, y) in enumerate(data_iter): # using avg loss l = loss(net(X, w, b), y).mean() # clean gradients if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() optimizer_fn([w, b], states, hyperparams) # save current loss if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) # output results print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) plot.set_figsize() plot.plt.plot(np.linspace(0, num_epochs, len(ls)), ls) plot.plt.xlabel('epoch') plot.plt.ylabel('loss') plot.plt.show()
import numpy as np """ 这一节介绍了一些关于深度学习中优化目标函数的知识 """ # 首先是由于很多问题在优化途中没有解析解,所以需要用机器学习方法得到数值解 # 而对于数值解,求解过程中容易掉入鞍点或局部最小值,这里是局部最小值的示例 # 函数在x的值比邻近区域都要小就称为局部最小值,此时优化算法可能会被困住 def f(x): # 示例函数,f(x) = x*cos(pi*x) return x*np.cos(np.pi*x) plot.set_figsize((4.5, 2.5)) # 列出一组输入用的x x = np.arange(-1.0, 2.0, 0.1) # 将输入代入得到一组输出然后绘制到fig上 fig, = plot.plt.plot(x, f(x)) # 绘制两条指示线,指向的点是提前看出来的 fig.axes.annotate('local minium', xy=(-0.3, -0.25), xytext=(-0.77, -1.0), arrowprops=dict(arrowstyle='->')) fig.axes.annotate('global minium', xy=(1.1, -0.95), xytext=(0.6, 0.8), arrowprops=dict(arrowstyle='->')) plot.plt.xlabel('x') plot.plt.ylabel('f(x)') # plot.plt.show() print('————————————————————————————') # 除了局部最小值由于周边梯度为0会卡住外,鞍点的周边梯度也是0,同样会困住优化器