Пример #1
0
class Board(GameObject):

    # Directions
    LEFT = 0
    RIGHT = 1
    UP = 2
    DOWN = 3

    TILE_MOVE_DURATION_MS = 100

    def __init__(self, rows, cols, iterable=None):
        super().__init__(gs.BOARD_POS)
        self.rows = rows
        self.cols = cols
        self.m = [[0 for c in range(cols)] for r in range(rows)]
        self.tiles = [[None for c in range(cols)] for r in range(rows)]
        self.tiles_to_destroy = []
        self.tiles_to_spawn = []
        self.should_wait_for_move_finished = False
        self.tile_factory = TileFactory(self)
        self.scorer = Scorer((10, 5))

        board_image = pygame.image.load("data/images/board.png")
        board_image_size = (gs.BOARD_WIDTH + gs.BOARD_BORDER,
                            gs.BOARD_HEIGHT + gs.BOARD_BORDER)
        self.board_image = pygame.transform.scale(board_image,
                                                  board_image_size)

        if iterable != None:
            for n, (i, j) in enumerate(
                    itertools.product(range(self.rows), range(self.cols))):
                val = iterable[n]
                if val:
                    self.m[i][j] = val
                    self.tiles[i][j] = self.tile_factory.create(val, i, j)

    def val_spawner(self):
        return 2 * random.randint(1, 2)

    def pos_selector(self, select_from):
        return select_from[random.randint(0, len(select_from) - 1)]

    def __repr__(self):
        s = "Board:\n"
        for i in range(self.rows):
            for j in range(self.cols):
                s += f"{self.m[i][j]}, "
            s += "\n"
        return s

    def get_rect(self):
        return pygame.Rect(gs.BOARD_POS, gs.BOARD_SIZE)

    def get_state(self):
        return list([
            self.m[i][j]
            for i, j in itertools.product(range(self.rows), range(self.cols))
        ])

    def check_state(self, state):
        cur_state = self.get_state()
        for i in range(len(cur_state)):
            if cur_state[i] != state[i]:
                return False
        return True

    def is_deadend(self):
        m = self.m

        # check empty cells
        for i, j in itertools.product(range(self.rows), range(self.cols)):
            if m[i][j] == 0:
                return False

        # check adjacent cells
        for i, j in itertools.product(range(self.rows), range(self.cols - 1)):
            if m[i][j] == m[i][j + 1]:
                return False
        for i, j in itertools.product(range(self.rows - 1), range(self.cols)):
            if m[i][j] == m[i + 1][j]:
                return False
        return True

    def is_complete(self):
        for i, j in itertools.product(range(self.rows), range(self.cols)):
            if self.m[i][j] == 2048:
                return True
        return False

    def spawn(self, value, row, col):
        self.m[row][col] = value
        self.tiles[row][col] = self.tile_factory.create(value, row, col)
        self.tiles[row][col].parent = self

    def spawn_random(self, val_spawner=None, pos_selector=None):
        if val_spawner is None:
            val_spawner = self.val_spawner
        if pos_selector is None:
            pos_selector = self.pos_selector
        empty_cells = []
        for i, j in itertools.product(range(self.rows), range(self.cols)):
            if self.m[i][j] == 0:
                empty_cells.append((i, j))
        assert empty_cells
        row, col = pos_selector(empty_cells)
        self.spawn(val_spawner(), row, col)

    def delayed_call(self, duration, call):
        time_left = duration

        def delayed_call_process(dtime):
            nonlocal time_left
            time_left -= dtime
            if time_left <= 0:
                call()
                return "DONE"
            return "INPROGRESS"

        GameObject.next_proc_id += 1
        self.processes[GameObject.next_proc_id] = delayed_call_process

    def handle_post_move(self):
        for tile in self.tiles_to_destroy:
            self.scorer.add(tile.value)
            tile.destroy()
        self.tiles_to_destroy.clear()
        for val, row, col in self.tiles_to_spawn:
            self.spawn(val, row, col)
        self.tiles_to_spawn.clear()
        self.spawn_random()
        self.should_wait_for_move_finished = False

    def draw(self, surface):
        surface.blit(self.board_image, (self.gpos.x - 10, self.gpos.y - 10))

    '''
    Algorithm:
    1. Take line
    2. Collapse
    3. Replace line with collapsed one
    '''

    def move(self, direction):
        if self.should_wait_for_move_finished:
            return
        lines = self.get_lines(direction)
        new_lines, moves = self.collapse(lines)
        self.handle_moves(moves, direction)
        self.update_lines(new_lines, direction)

    def get_lines(self, direction):
        lines = []
        if direction == Board.LEFT:
            for row in range(self.rows):
                lines.append([i for i in self.m[row][:]])
        elif direction == Board.RIGHT:
            for row in range(self.rows):
                lines.append([i for i in self.m[row][::-1]])
        elif direction == Board.UP:
            for col in range(self.cols):
                lines.append([self.m[i][col] for i in range(self.rows)])
        elif direction == Board.DOWN:
            for col in range(self.cols):
                lines.append(
                    [self.m[i][col] for i in reversed(range(self.rows))])
        return lines

    def get_positions(self, direction):
        positions = []
        if direction == Board.LEFT:
            for row in range(self.rows):
                positions.append([(row, i) for i in range(self.cols)])
        elif direction == Board.RIGHT:
            for row in range(self.rows):
                positions.append([(row, i)
                                  for i in reversed(range(self.cols))])
        elif direction == Board.UP:
            for col in range(self.cols):
                positions.append([(i, col) for i in range(self.rows)])
        elif direction == Board.DOWN:
            for col in range(self.cols):
                positions.append([(i, col)
                                  for i in reversed(range(self.rows))])
        return positions

    def handle_moves(self, moves, direction):
        if direction == Board.LEFT:
            for row in range(self.rows):
                for move in moves[row]:
                    self.handle_move((row, move[0]), (row, move[1]))
        elif direction == Board.RIGHT:
            for row in range(self.rows):
                for move in moves[row]:
                    self.handle_move((row, self.cols - 1 - move[0]),
                                     (row, self.cols - 1 - move[1]))
        elif direction == Board.UP:
            for col in range(self.cols):
                for move in moves[col]:
                    self.handle_move((move[0], col), (move[1], col))
        elif direction == Board.DOWN:
            for col in range(self.cols):
                for move in moves[col]:
                    self.handle_move((self.rows - 1 - move[0], col),
                                     (self.rows - 1 - move[1], col))
        if self.should_wait_for_move_finished:
            self.delayed_call(Board.TILE_MOVE_DURATION_MS,
                              self.handle_post_move)

    def handle_move(self, cell_from, cell_to):
        self.should_wait_for_move_finished = True
        tiles = self.tiles
        tile1 = tiles[cell_from[0]][cell_from[1]]
        tile2 = tiles[cell_to[0]][cell_to[1]]
        tile1.move_to(cell_to[0], cell_to[1], Board.TILE_MOVE_DURATION_MS)
        tiles[cell_from[0]][cell_from[1]] = None
        if tile2 is not None:
            tiles[cell_to[0]][cell_to[1]] = None
            self.tiles_to_destroy.append(tile1)
            self.tiles_to_destroy.append(tile2)
            self.tiles_to_spawn.append(
                (tile1.value * 2, cell_to[0], cell_to[1]))
        else:
            tiles[cell_to[0]][cell_to[1]] = tile1

    def update_lines(self, new_lines, direction):
        if direction == Board.LEFT:
            for row in range(self.rows):
                for col in range(self.cols):
                    self.m[row][col] = new_lines[row][col]
        elif direction == Board.RIGHT:
            for row in range(self.rows):
                for col in range(self.cols):
                    self.m[row][col] = new_lines[row][-1 - col]
        elif direction == Board.UP:
            for col in range(self.cols):
                for row in range(self.rows):
                    self.m[row][col] = new_lines[col][row]
        elif direction == Board.DOWN:
            for col in range(self.cols):
                for row in range(self.rows):
                    self.m[row][col] = new_lines[col][-1 - row]

    def collapse(self, lines):
        new_lines = []
        moves = []
        for line in lines:
            new_line, line_moves = self.collapse_one_line(line)
            new_lines.append(new_line)
            moves.append(line_moves)
        return new_lines, moves

    def collapse_one_line(self, line):
        # [0, 2, 2, 4] -> [2, 0, 2, 4] -> [4, 0, 0, 4] -> [4, 4, 0, 0]
        # [2, 4, 4, 2] -> [2, 4, 4, 2] -> [2, 8, 0, 2] -> [2, 8, 2, 0]
        # [2, 4, 2, 4] -> [2, 4, 2, 4] (not a move)
        moves = []
        target_cell = 0
        for i in range(1, len(line)):
            if line[i]:
                if line[target_cell] == 0:
                    # move to empty cell
                    line[target_cell], line[i] = line[i], line[target_cell]
                    moves.append((i, target_cell))
                elif line[target_cell] == line[i]:
                    # move and collapse
                    line[target_cell] *= 2
                    line[i] = 0
                    moves.append((i, target_cell))
                    target_cell += 1
                else:
                    # move to empty cell
                    target_cell += 1
                    if target_cell != i:
                        line[target_cell], line[i] = line[i], line[target_cell]
                        moves.append((i, target_cell))
        return line, moves

    def test_collapse_algo(self):
        lines = [[0, 2, 2, 4], [2, 4, 4, 2], [2, 4, 2, 4], [2, 2, 2, 2],
                 [2, 2, 4, 4], [0, 0, 4, 2], [2, 0, 0, 2], [0, 2, 0, 2]]

        check = [[4, 4, 0, 0], [2, 8, 2, 0], [2, 4, 2, 4], [4, 4, 0, 0],
                 [4, 8, 0, 0], [4, 2, 0, 0], [4, 0, 0, 0], [4, 0, 0, 0]]
        for line, check in zip(lines, check):
            new_line, _ = self.collapse_one_line(line)
            for i, v in enumerate(new_line):
                if v != check[i]:
                    print(
                        f"Board/test_collapse: {new_line} != {check} at pos {i}"
                    )
                    return
        print(".", end="")
Пример #2
0
    def train_icfat(self):
        # TODO ###### ##  ###   ## ###############
        # TODO   ##  #  # #  # #  # ##############
        # TODO   ##  #  # #  # #  # ##############
        # TODO   ##  #  # #  # #  # ##############
        # TODO   ##   ##  ###   ## ###############
        """Train StarGAN within a single dataset."""
        # # Set data loader.
        # if self.dataset == 'CelebA':
        #     data_loader = self.celeba_loader
        # elif self.dataset == 'RaFD':
        #     data_loader = self.rafd_loader
        #
        # # Fetch fixed inputs for debugging.
        # data_iter = iter(data_loader)
        # x_fixed, c_org = next(data_iter)
        # x_fixed = x_fixed.to(self.device)
        # c_fixed_list = self.create_labels(c_org, self.c_dim, self.dataset,
        #                                   self.selected_attrs)

        # Learning rate cache for decaying.
        g_lr = self.g_lr
        d_lr = self.d_lr

        # Start training from scratch or resume training.
        # FIXME this will need to change
        start_epoch = 0
        start_iters = 0
        if self.resume_training is not None:
            # recover status
            with open(self.resume_training, 'r') as f:
                status = json.load(f)['status']
            start_epoch = status['epoch']
            start_iters = status['iteration']
            g_lr = status['g_lr']
            d_lr = status['d_lr']
            # reload models
            self.restore_model(start_epoch, start_iters)

        # start_iters = 0
        # if self.resume_iters:
        #     start_iters = self.resume_iters
        #     self.restore_model(self.resume_iters)

        fixed_x = []
        total = 50
        for i, (batchA, batchP, batchN) in enumerate(self.celeba_loader):
            for imageA, imageP, imageN in zip(batchA, batchP, batchN):
                print('Reading debugging images', i, total)
                fixed_x.append((imageA.unsqueeze(0), imageN.unsqueeze(0)))
                total -= 1
                if total == 0: break
            if total == 0: break
        black_size = [1]
        black_size.extend(imageA.size())

        scorer = Scorer(self.batch_size,
                        variables=('D_cls/distance_same',
                                   'D_cls/distance_different',
                                   'G/distance_same', 'G/distance_different'))

        criterion = torch.nn.MarginRankingLoss(margin=self.margin)

        self.data_loader = self.celeba_loader
        iters_per_epoch = len(self.data_loader)

        # Start training.
        print('Start training...')
        start_time = time.time()
        for e in range(start_epoch, self.num_epochs):
            for i, (batchA, batchP, batchN) in enumerate(self.data_loader):
                # current_iteration is used to keep the global iteration in
                # the case of resuming training
                current_iteration = i + start_iters + 1 if e == start_epoch else i
                if e == start_epoch and i > (len(self.data_loader) -
                                             start_iters):
                    break
                imageA = batchA.to(self.device)
                imageP = batchP.to(self.device)
                imageN = batchN.to(self.device)

                # ================== Train D_cls on CelebA ================== #
                # Compute loss with real images
                _, idA = self.D_cls(imageA)
                _, idP = self.D_cls(imageP)
                _, idN = self.D_cls(imageN)

                d_loss_cls = criterion(
                    F.pairwise_distance(idA, idN),
                    F.pairwise_distance(idA, idP),
                    torch.ones((idA.size(0), 1), device=self.device))

                # Compute classification accuracy of D_cls
                d = {
                    'D_cls/distance_same':
                    torch.mean(F.pairwise_distance(idA, idP)).item(),
                    'D_cls/distance_different':
                    torch.mean(F.pairwise_distance(idA, idN)).item()
                }
                log = []
                log.extend(
                    [d['D_cls/distance_same'], d['D_cls/distance_different']])
                scorer.add(d)
                if (i + 1) % self.log_step == 0:
                    print('Classification distances (same/different): ',
                          end='')
                    print(log)

                # Logging
                loss = OrderedDict()
                loss['D_cls/loss_cls'] = d_loss_cls.item()

                # ================== Train D_src on CelebA ================== #

                # Compute loss with real images
                out_srcA, _ = self.D_src(imageA)
                d_loss_real = -torch.mean(out_srcA)

                # Compute loss with fake images
                fake_x = self.G(imageA, idN)
                out_src_fake, _ = self.D_src(fake_x)
                d_loss_fake = torch.mean(out_src_fake)

                # ================== Optimization =========================== #

                # Backward + Optimize
                d_loss = d_loss_real + d_loss_fake

                # Backward + Optimize D's
                e_loss = d_loss + self.lambda_cls * d_loss_cls
                self.reset_grad()
                e_loss.backward()
                self.dsrc_optimizer.step()
                self.dcls_optimizer.step()

                # Compute gradient penalty
                alpha = torch.rand(imageA.size(0), 1, 1, 1).to(self.device)
                x_hat = (alpha * imageA.data +
                         (1 - alpha) * fake_x.data).requires_grad_(True)
                out_src, _ = self.D_src(x_hat)
                d_loss_gp = self.gradient_penalty(out_src, x_hat)

                # FIXME don't we have to optimize D_cls as well with GP?!
                # FIXME if not, it could explain why at the end of training
                # the GP error increases so much!
                _, out_cls = self.D_cls(x_hat)
                d_loss_gp += self.gradient_penalty(out_cls, x_hat)

                # Backward + Optimize

                d_loss = self.lambda_gp * d_loss_gp
                self.reset_grad()
                d_loss.backward()
                self.dsrc_optimizer.step()

                # Logging
                loss = OrderedDict()
                loss['D_src/loss_real'] = d_loss_real.item()
                loss['D_src/loss_fake'] = d_loss_fake.item()
                loss['D/loss_gp'] = d_loss_gp.item()

                # ================== Train G ================== #

                if (i + 1) % self.n_critic == 0:
                    # FIXME calling the network here is unnecessary
                    _, idA = self.D_cls(imageA)
                    _, idP = self.D_cls(imageP)
                    _, idN = self.D_cls(imageN)

                    # Original-to-target and target-to-original domain
                    fake_c = idN
                    real_c = idP
                    fake_x = self.G(imageA, fake_c)
                    rec_x = self.G(fake_x, real_c)

                    # Compute losses
                    out_src, _ = self.D_src(fake_x)
                    _, idG = self.D_cls(fake_x)
                    g_loss_fake = -torch.mean(out_src)
                    g_loss_rec = torch.mean(torch.abs(imageA - rec_x))

                    # fake_label_AG = self.to_var(torch.zeros((len(idA), 1)))
                    fake_label_BG = torch.ones((len(idA), 1))

                    g_loss_cls = criterion(
                        F.pairwise_distance(idG, idA),
                        F.pairwise_distance(idG, idN),
                        torch.ones((idA.size(0), 1), device=self.device))

                    # Backward + Optimize
                    g_loss = g_loss_fake \
                             + self.lambda_rec * g_loss_rec \
                             + self.lambda_cls * g_loss_cls
                    self.reset_grad()
                    g_loss.backward()
                    self.g_optimizer.step()

                    # Logging
                    loss['G/loss_fake'] = g_loss_fake.item()
                    loss['G/loss_rec'] = g_loss_rec.item()
                    loss['G/loss_cls'] = g_loss_cls.item()

                    # Compute classification accuracy of the discriminator
                    # FIXME I think there's a problem here, shouldn't it be
                    # positive_distance the one between idN and idG?
                    positive_distance = torch.sum(F.pairwise_distance(
                        idP, idG)).item()
                    negative_distance = torch.sum(F.pairwise_distance(
                        idA, idG)).item()
                    d = {
                        'G/distance_same': positive_distance / len(idG),
                        'G/distance_different': negative_distance / len(idG)
                    }
                    scorer.add(d)

                # Print log info
                if (i + 1) % self.log_step == 0:
                    elapsed = time.time() - start_time
                    elapsed = str(datetime.timedelta(seconds=elapsed))

                    log = "Elapsed [{}], Epoch [{}/{}], Iter [{}/{}]".format(
                        elapsed, e + 1, self.num_epochs, i + 1,
                        iters_per_epoch)
                    for tag, value in loss.items():
                        log += ", {}: {:.4f}".format(tag, value)
                    print(log)

                    scores = scorer.get_scores()
                    for key, value in scores.items():
                        loss[key] = value
                    if self.use_tensorboard:
                        for tag, value in loss.items():
                            self.logger.scalar_summary(
                                tag, value,
                                e * iters_per_epoch + current_iteration + 1)

                # save model
                if not self.save_epochs and \
                        (i + 1) % self.model_save_step == 0:
                    self.generate_debugging_images(fixed_x, e,
                                                   current_iteration)
                    self.generate_validation_images(e, current_iteration)
                    self.save_training(e, current_iteration, self.g_lr,
                                       self.d_lr)
                    print("Saved models..!")

            # ================== Debugging images ================== #
            if (e + 1) % self.sample_step == 0:
                self.generate_debugging_images(fixed_x, e, current_iteration)

            if (e + 1) % self.sample_step == 0:
                self.generate_validation_images(e, current_iteration)

            # ================== Checkpoints and lr decay ================== #
            if (e + 1) > (self.num_epochs - self.num_epochs_decay):
                if (e - (self.num_epochs - self.num_epochs_decay)) % \
                        self.decay_step == 0:
                    g_lr -= (self.g_lr / float(self.decay_rate))
                    d_lr -= (self.d_lr / float(self.decay_rate))
                    print("Decay learning rate to g_lr: {}, d_lr: {"
                          "}".format(g_lr, d_lr))
                    assert g_lr > 0.0
                    assert d_lr > 0.0

            # Save model checkpoints
            if not self.save_epochs and \
                    (i + 1) % self.model_save_step == 0:
                self.save_training(e, 0, self.g_lr, self.d_lr)
                print("Saved models..!")

        # Save model checkpoints when training is done
        torch.save(
            self.G.state_dict(),
            os.path.join(self.model_save_dir,
                         "{}_{}_G.pth".format(e + 1, i + 1)))
        torch.save(
            self.D_src.state_dict(),
            os.path.join(self.model_save_dir,
                         "{}_{}_D_src.pth".format(e + 1, i + 1)))
        torch.save(
            self.D_cls.state_dict(),
            os.path.join(self.model_save_dir,
                         "{}_{}_D_cls.pth".format(e + 1, i + 1)))
        print("Saved models..!")

        print("Train finished")