Exemplo n.º 1
0
def start_train():
    '''
    训练
    '''
    use_amp = True
    # 前向反传N次,再更新参数  目的:增大batch(理论batch= batch_size * N)
    iter_size = 8

    myNet = MyNet(use_amp).to("cuda:0")
    myNet = torch.nn.DataParallel(myNet, device_ids=[0, 1])  # 数据并行
    myNet.train()
    # 训练开始前初始化 梯度缩放器
    scaler = GradScaler() if use_amp else None

    # 加载预训练权重
    if resume_train:
        scaler.load_state_dict(checkpoint['scaler'])  # amp自动混合精度用到
        optimizer.load_state_dict(checkpoint['optimizer'])
        myNet.load_state_dict(checkpoint["model"])

    for epoch in range(1, 100):
        for batch_idx, (input, target) in enumerate(dataloader_train):

            # 数据 转到每个并行模型的主卡上
            input = input.to("cuda:0")
            target = target.to("cuda:0")

            # 自动混合精度训练
            if use_amp:
                # 自动广播 将支持半精度操作自动转为FP16
                with autocast():
                    # 提取特征
                    feature = myNet(input)
                    losses = loss_function(target, feature)
                    loss = losses / iter_size
                scaler.scale(loss).backward()
            else:
                feature = myNet(input, target)
                losses = loss_function(target, feature)
                loss = losses / iter_size
                loss.backward()

            # 梯度累积,再更新参数
            if (batch_idx + 1) % iter_size == 0:
                # 梯度更新
                if use_amp:
                    scaler.step(optimizer)
                    scaler.update()
                else:
                    optimizer.step()
                # 梯度清零
                optimizer.zero_grad()
        # scaler 具有状态。恢复训练时需要加载
        state = {
            'net': myNet.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scaler': scaler.state_dict()
        }
        torch.save(state, "filename.pth")
Exemplo n.º 2
0
class Detector:
    def __init__(self, net_path, board_size, n):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.net = MyNet().to(self.device)
        self.net.load_state_dict(torch.load(net_path))
        # self.board_size = args.board_size
        # self.n = args.number
        self.number_playout = args.n_playout
        self.env = GameState(board_size, n)
        self.net.eval()
        self.mcts_player = Player(policy=self.policy,
                                  number_playout=1000,
                                  is_self_play=False,
                                  print_detail=True)

    def policy(self, env):
        # 获取可用动作 15*15=225
        action_avail = env.action_avail
        # 获得当前状态
        state = torch.from_numpy(env.get_state).unsqueeze(0).to(self.device)

        # 放入神经网络得到预测的log动作概率以及当前状态的胜率
        log_action_probs, value = self.net(state)

        # 把 log 动作概率转换为动作概率并过滤不可用动作
        act_probs = torch.exp(
            log_action_probs).detach().cpu().numpy().flatten()
        act_probs = zip(action_avail, act_probs[action_avail])
        value = value.item()

        # 返回动作概率,当前局面价值
        return act_probs, value

    def detect(self):
        while True:
            action = None
            # 当玩家切换到人机以及游戏未结束时,人机使用MCTS算法得到最优动作
            if self.env.current_player == 1 and not self.env.pause:
                action = self.mcts_player.get_action(self.env.game)
            self.env.step(action)
Exemplo n.º 3
0
class Trainer:
    def __init__(self, net_path, board_size=15, n=5):
        # 游戏棋盘大小
        self.board_size = board_size
        # 连五子胜利
        self.n = n
        # 环境实例化
        self.env = Game(board_size, n)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.number_playout = args.n_playout
        # 记忆库大小
        self.buffer_size = args.buffer_size
        self.buffer = deque(maxlen=self.buffer_size)
        self.batch_size = args.batch_size
        # 自我对局1次后进行训练
        self.n_games = args.n_games
        # 自我对局后进行5次训练
        self.epochs = args.epochs
        # 打印保存模型间隔
        self.check_freq = args.check_freq
        # 总共游戏次数
        self.game_num = args.game_num
        self.net_path = net_path
        self.net = MyNet().to(self.device)
        self.MSELoss = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(),
                                          weight_decay=1e-4)
        # 实例化蒙特卡洛玩家,参数:游戏策略,探索常数,模拟次数,是否自我对弈(测试时为False)
        self.mcts_player = Player(policy=self.policy,
                                  number_playout=self.number_playout,
                                  is_self_play=True)
        self.writer = SummaryWriter()
        if os.path.exists(net_path):
            self.net.load_state_dict(torch.load(net_path))
        else:
            self.net.apply(self.weight_init)

    def weight_init(self, net):
        if isinstance(net, nn.Linear) or isinstance(net, nn.Conv2d):
            nn.init.normal_(net.weight, mean=0., std=0.1)
            nn.init.constant_(net.bias, 0.)

    def train(self):
        for i in range(self.game_num):
            # 环境先自我对弈获得棋局状态,动作概率以及玩家可以赢的概率值
            for _ in range(self.n_games):
                winner, data = self.env.self_play(self.mcts_player, temp=1.0)
                # 打印每局对局信息
                print(self.env, "\n", "------------------xx--------")
                # 将获得的数据多样化存入样本池
                self.extend_sample(data)

            # 取样训练
            batch = random.sample(self.buffer,
                                  min(len(self.buffer), self.batch_size))
            # 解包
            state_batch, mcts_probs_batch, winner_value_batch = zip(*batch)
            loss = 0.
            for _ in range(self.epochs):
                # 数据处理
                state_batch = torch.tensor(state_batch).to(self.device)
                mcts_probs_batch = torch.tensor(mcts_probs_batch).to(
                    self.device)
                winner_value_batch = torch.tensor(winner_value_batch).to(
                    self.device)

                # 通过神经网络输出动作概率,价值用于训练
                log_act_probs, value = self.net(state_batch)

                # 计算损失
                # 价值损失:输出价值与该状态所在对局最终胜负的值(-1/0/1)(均方差)
                # 策略损失:蒙特卡洛树模拟的概率值与神经网络模拟的概率值的相似度 (-log(pi) * p)(交叉熵)
                value_loss = self.MSELoss(value,
                                          winner_value_batch.view_as(value))
                policy_loss = -torch.mean(
                    torch.sum(mcts_probs_batch * log_act_probs, dim=-1))
                loss = value_loss + policy_loss

                # 反向传播
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            print(f"epoch:{i},loss:{loss}")
            self.writer.add_scalar("loss", loss, i)
            self.net.add_histogram(self.writer, i)
            if (i + 1) % self.check_freq == 0:
                torch.save(self.net.state_dict(), self.net_path)

    # 多样化数据样本
    def extend_sample(self, data):
        extend_data = []
        for state, mcts_prob, winner_value in data:
            extend_data.append((state, mcts_prob, winner_value))
            # 分别旋转 90度/180度/270度,增加数据多样性
            for i in range(1, 4):
                # 同时旋转棋盘状态和概率值
                state_ = np.rot90(state, i, (1, 2))
                mcts_prob_ = np.rot90(
                    mcts_prob.reshape(self.env.height, self.env.width), i)
                extend_data.append(
                    (state_, mcts_prob_.flatten(), winner_value))

                # 翻转棋盘,将矩阵中的每一位玩家的状态进行翻转
                state_ = np.array([np.fliplr(s) for s in state_])
                mcts_prob_ = np.fliplr(mcts_prob_)
                extend_data.append(
                    (state_, mcts_prob_.flatten(), winner_value))
        # 将样本存入样本池
        self.buffer.extend(extend_data)

    # 用于player调用神经网络获得动作概率,当前局面价值
    def policy(self, env):
        # 获取可用动作 15*15=225
        action_avail = env.action_avail
        # 获得当前状态
        state = torch.from_numpy(env.get_state).unsqueeze(0).to(self.device)

        # 放入神经网络得到预测的log动作概率以及当前状态的胜率
        log_action_probs, value = self.net(state)

        # 把 log 动作概率转换为动作概率并过滤不可用动作
        act_probs = torch.exp(
            log_action_probs).detach().cpu().numpy().flatten()
        act_probs = zip(action_avail, act_probs[action_avail])
        value = value.item()

        # 返回动作概率,当前局面价值
        return act_probs, value
Exemplo n.º 4
0
testset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=10000,
                                         shuffle=False,
                                         num_workers=2)

# Define the classes in the dataset
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

# Reload the neural net
net = MyNet()
net.load_state_dict(torch.load("./" + net_name + ".pth"))

# Get accuracy of the trained net as a whole
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' %
      (100 * correct / total))
Exemplo n.º 5
0
args, _ = parser.parse_known_args()

device = 'cuda'
gpus = get_gpus(args.gpus)

if __name__ == '__main__':
    model = MyNet()
    model = torch.nn.DataParallel(model, device_ids=gpus)
    batch_size = args.batch_size * len(gpus)

    if os.path.exists(args.data_dir):
        download = False
    else:
        download = True

    val_loader = get_dataloader(args.data_dir,
                                batch_size,
                                num_workers=args.num_workers,
                                shuffle=False,
                                train=False,
                                download=download)

    model.to(device)
    criterion = torch.nn.CrossEntropyLoss().cuda()
    if not os.path.exists(args.pretrained):
        print('Pretrained model do not exist!')
    model.load_state_dict(torch.load(args.pretrained))
    acc1, acc5 = evaluate(val_loader, model, criterion)
    print('acc1={},acc5={}'.format(acc1, acc5))