Exemplo n.º 1
0
    def get(self):
        logging.info("SummaryTask starting...")

        # init class and variables
        bucket_name = os.environ.get(
            'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
        bucket = '/' + bucket_name
        trendManager = TrendManager()
        dataModelConverter = DataModelConverter()
        csvUtils = CsvUtils()
        cloudStorageUtils = CloudStorageUtils()

        previous_day_timestamp = int(time.time()) - Globals._1_DAY
        q_futures = []
        for region in self.getRegions():
            try:
                date = TimezoneAwareDate(region, self.request.get('date'))
                trendsJson = self.getTrends(region, trendManager)
                self.saveToCloudStorage(dataModelConverter, csvUtils,
                                        cloudStorageUtils, trendsJson, region,
                                        bucket, date)
                self.saveToDatastore(q_futures, trendsJson, region, date)
                self.deleteFromDatastore(q_futures, region,
                                         previous_day_timestamp)

            except Exception, e:
                traceback.print_exc()
                Error(msg=str(e), timestamp=int(time.time())).put()
                SendEmail().send('Error on SummaryTask', str(e))
                self.retry()
Exemplo n.º 2
0
def run():
    game = ple.games.flappybird.FlappyBird()
    # game = ple.games.snake.Snake(width=512, height=512)
    # game = ple.games.pong.Pong(width=512, height=512)
    p = ple.PLE(game, fps=30, display_screen=args.is_render)
    p.init()
    plt.figure()

    all_scores = []
    all_losses = []
    all_t = []

    agent = PGAgent(len(p.getGameState()), len(p.getActionSet()))
    is_end = p.game_over()

    for e in range(args.episodes):
        p.reset_game()
        s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32)
        reward_total = 0
        pipes = 0

        transitions = []
        for t in range(args.max_steps):
            a_t0_idx = agent.act(s_t0)
            a_t0 = p.getActionSet()[a_t0_idx]
            r_t1 = p.act(a_t0)
            is_end = p.game_over()
            s_t1 = np.asarray(list(p.getGameState().values()),
                              dtype=np.float32)
            reward_total += r_t1

            if r_t1 == 1.0:
                pipes += 1

            if t == args.max_steps - 1:
                r_t1 = -100
                is_end = True

            transitions.append([s_t0, a_t0_idx, r_t1])
            s_t0 = s_t1

            if is_end:
                all_scores.append(reward_total)
                break

        for t in range(len(transitions)):
            R = 0
            for t_c, (s_t0, a_t0_idx, r_t) in enumerate(transitions[t:]):
                R += args.gamma**t_c * r_t

            s_t0, a_t0_idx, r_t1 = transitions[t]
            tr = [s_t0, a_t0_idx, R]
            agent.replay_memory.push(tr)

        loss = 0
        if len(agent.replay_memory) > args.batch_size:
            loss = agent.replay()
            all_losses.append(loss)

        all_t.append(t)

        metrics_episode = {
            'loss': loss,
            'score': reward_total,
            't': t,
            'e': agent.epsilon,
            'pipes': pipes
        }

        if args.is_csv is True:
            CsvUtils.add_hparams(sequence_dir=os.path.join(
                '.', args.sequence_name),
                                 sequence_name=args.sequence_name,
                                 run_name=args.run_name,
                                 args_dict=args.__dict__,
                                 metrics_dict=metrics_episode,
                                 global_step=e)
        else:
            logging.info(f'episode: {e}/{args.episodes} ', metrics_episode)
            print(f'episode: {e}/{args.episodes} ', metrics_episode)

        if e % 100 == 0 and not args.is_inference:
            # save logs, graphics and weights during training
            plt.clf()

            plt.subplot(3, 1, 1)
            plt.ylabel('Score')
            plt.plot(all_scores)

            plt.subplot(3, 1, 2)
            plt.ylabel('Loss')
            plt.plot(all_losses)

            plt.subplot(3, 1, 3)
            plt.ylabel('Steps')
            plt.plot(all_t)

            plt.xlabel('Episode')
            plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png'))
            torch.save(agent.p_model.cpu().state_dict(),
                       os.path.join(seq_run_name, f'model-{e}.pt'))
Exemplo n.º 3
0
def run():
    # environment name
    env = gym.make('LunarLander-v2')
    plt.figure()

    all_scores = []
    all_losses = []
    all_t = []

    agent = DDQNAgent(
        env.observation_space.shape[0],
        # first 2 are position in x axis and y axis(hieght) , other 2 are the x,y axis velocity terms,
        # lander angle and angular velocity, left and right left contact points (bool)
        env.action_space.n,
        args)
    is_end = False

    t_total = 0

    for e in range(args.episodes):
        s_t0 = env.reset()
        reward_total = 0
        episode_loss = []
        is_win = False
        for t in range(args.max_steps):
            t_total += 1
            if t_total % args.target_update == 0:
                agent.update_q_t_model()

            if args.is_render and len(all_scores):  # and all_scores[-1] > 0:
                # if e % 10 == 0 and all_scores[-1] > 0:
                env.render()
            a_t0 = agent.act(s_t0)
            s_t1, r_t1, is_end, _ = env.step(a_t0)

            reward_total += r_t1

            if t == args.max_steps - 1:
                r_t1 = -100
                is_end = True

            agent.replay_memory.push((s_t0, a_t0, r_t1, s_t1, is_end))
            s_t0 = s_t1

            if len(agent.replay_memory) > args.batch_size:
                loss = agent.replay()
                episode_loss.append(loss)

            if is_end:
                all_scores.append(reward_total)
                all_losses.append(np.mean(episode_loss))
                '''
                if terminal reward is =100 => landed
                https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py#L381
                '''
                if r_t1 >= 100:
                    is_win = True
                break

        all_t.append(t)
        metrics_episode = {
            'loss': all_losses[-1],
            'score': reward_total,
            't': t,
            'e': agent.epsilon,
            'is_win': is_win
        }

        if args.is_csv is True:
            CsvUtils.add_hparams(sequence_dir=os.path.join(
                '.', args.sequence_name),
                                 sequence_name=args.sequence_name,
                                 run_name=args.run_name,
                                 args_dict=args.__dict__,
                                 metrics_dict=metrics_episode,
                                 global_step=e)
        else:
            logging.info(f'episode: {e}/{args.episodes} ', metrics_episode)
            print(f'episode: {e}/{args.episodes} ', metrics_episode)

        if e % 100 == 0 and not args.is_inference:
            # save logs, graphics and weights during training
            plt.clf()

            plt.subplot(3, 1, 1)
            plt.ylabel('Score')
            plt.plot(all_scores)

            plt.subplot(3, 1, 2)
            plt.ylabel('Loss')
            plt.plot(all_losses)

            plt.subplot(3, 1, 3)
            plt.ylabel('Steps')
            plt.plot(all_t)

            plt.xlabel('Episode')
            plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png'))
            torch.save(agent.q_model.cpu().state_dict(),
                       os.path.join(seq_run_name, f'model-{e}.pt'))
    env.close()
Exemplo n.º 4
0
def main():
    data_loader_train = torch.utils.data.DataLoader(
        dataset=DatasetFashionMNIST(is_train=True,
                                    dataset_path=args.dataset_path),
        batch_size=BATCH_SIZE,
        shuffle=True)

    data_loader_test = torch.utils.data.DataLoader(dataset=DatasetFashionMNIST(
        is_train=False, dataset_path=args.dataset_path),
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=False)

    model = ResNet(in_channels=1, n_classes=10)
    model = model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    metrics = {}
    for stage in ['train', 'test']:
        for metric in ['loss', 'acc']:
            metrics[f'{stage}_{metric}'] = []

    for epoch in range(EPOCHS):
        metrics_epoch = {key: [] for key in metrics.keys()}
        for data_loader in [data_loader_train, data_loader_test]:
            stage = 'train'
            torch.set_grad_enabled(True)
            if data_loader == data_loader_test:
                stage = 'test'
                torch.set_grad_enabled(False)

            # inference
            for x, y in data_loader:
                x = x.to(DEVICE)
                y = y.to(DEVICE)
                y_prim = model.forward(x)
                loss = -torch.mean(y * torch.log(y_prim + 1e-8))
                if data_loader == data_loader_train:
                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()
                # calculate metrics per batch
                metrics_epoch[f'{stage}_loss'].append(
                    loss.cpu().item())  # Tensor(0.1) => 0.1f
                metrics_epoch[f'{stage}_acc'].append(acc(y_prim, y))

        # calculate metrics per epoch
        metrics_epoch_str = []
        for key in metrics_epoch.keys():
            metrics_epoch[key] = np.mean(metrics_epoch[key])
            metrics_epoch_str.append(f'{key}: {round(metrics_epoch[key], 2)}')
        summary_writer.flush()
        print(f'epoch: {epoch} {" ".join(metrics_epoch_str)}')

        # add hparams
        summary_writer.add_hparams(hparam_dict=args.__dict__,
                                   metric_dict=metrics_epoch,
                                   name=args.run_name,
                                   global_step=epoch)
        if args.is_csv is True:
            CsvUtils.add_hparams(sequence_dir=os.path.join(
                '.', f'{args.sequence_name}-csv'),
                                 sequence_name=args.sequence_name,
                                 run_name=args.run_name,
                                 args_dict=args.__dict__,
                                 metrics_dict=metrics_epoch,
                                 global_step=epoch)
        # append metrics per epoch to global metrics
        for key in metrics_epoch.keys():
            metrics[key].append(metrics_epoch[key])
        summary_writer.flush()
    summary_writer.close()
Exemplo n.º 5
0
 def write(self, data, output_csv):
     CsvUtils.write_to_csv(data, output_csv, self.csv_delimiter)
Exemplo n.º 6
0
def run():
    game = ple.games.flappybird.FlappyBird()
    # game = ple.games.snake.Snake(width=512, height=512)
    # game = ple.games.pong.Pong(width=512, height=512)
    p = ple.PLE(game, fps=30, display_screen=args.is_render)
    p.init()
    plt.figure()

    all_scores = []
    all_losses = []
    all_losses_a = []
    all_losses_c = []
    all_t = []

    agent = A2CAgent(len(p.getGameState()), len(p.getActionSet()))
    is_end = p.game_over()

    for e in range(args.episodes):
        p.reset_game()
        s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32)
        reward_total = 0
        pipes = 0

        transitions = []
        states_t1 = []
        end_t1 = []
        for t in range(args.max_steps):
            a_t0_idx = agent.act(s_t0)
            a_t0 = p.getActionSet()[a_t0_idx]
            r_t1 = p.act(a_t0)
            is_end = p.game_over()
            s_t1 = np.asarray(list(p.getGameState().values()), dtype=np.float32)
            end_t1.append(is_end)
            reward_total += r_t1

            if r_t1 == 1.0:
                pipes += 1

            transitions.append([s_t0, a_t0_idx, r_t1])
            states_t1.append(s_t1)
            s_t0 = s_t1

            if is_end:
                all_scores.append(reward_total)
                break

        t_states_t1 = torch.FloatTensor(states_t1).to(args.device)
        v_t1 = agent.model_c.forward(t_states_t1)
        np_v_t1 = v_t1.cpu().data.numpy().squeeze()
        for t in range(len(transitions)):
            s_t0, a_t0_idx, r_t1 = transitions[t]
            is_end = end_t1[t]
            delta = r_t1
            if not is_end:
                delta = r_t1 + args.gamma * np_v_t1[t]
            agent.replay_memory.push([s_t0, a_t0_idx, delta])

        loss = loss_a = loss_c = 0
        if len(agent.replay_memory) > args.batch_size:
            loss_a, loss_c = agent.replay()
            loss = loss_a + loss_c

            all_losses.append(loss)
            all_losses_a.append(loss_a)
            all_losses_c.append(loss_c)

        all_t.append(t)

        metrics_episode = {
            'loss': loss,
            'loss_a': loss_a,
            'loss_c': loss_c,
            'score': reward_total,
            't': t,
            'e': agent.epsilon,
            'pipes': pipes
        }

        if args.is_csv is True:
            CsvUtils.add_hparams(
                sequence_dir=os.path.join('.', args.sequence_name),
                sequence_name=args.sequence_name,
                run_name=args.run_name,
                args_dict=args.__dict__,
                metrics_dict=metrics_episode,
                global_step=e
            )
        else:
            logging.info(f'episode: {e}/{args.episodes} ', metrics_episode)
            print(f'episode: {e}/{args.episodes} ', metrics_episode)

        if e % 100 == 0:
            plt.clf()

            plt.subplot(5, 1, 1)
            plt.ylabel('Score')
            plt.plot(all_scores)

            plt.subplot(5, 1, 2)
            plt.ylabel('Loss')
            plt.plot(all_losses)

            plt.subplot(5, 1, 3)
            plt.ylabel('Loss Actor')
            plt.plot(all_losses_a)

            plt.subplot(5, 1, 4)
            plt.ylabel('Loss Critic')
            plt.plot(all_losses_c)

            plt.subplot(5, 1, 5)
            plt.ylabel('Steps')
            plt.plot(all_t)

            plt.xlabel('Episode')
            plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png'))
            torch.save(agent.model_c.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}-c.pt'))
            torch.save(agent.model_a.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}-a.pt'))
Exemplo n.º 7
0
def run():
    game = ple.games.flappybird.FlappyBird()
    # game = ple.games.snake.Snake(width=512, height=512)
    # game = ple.games.pong.Pong(width=512, height=512)
    p = ple.PLE(game, fps=30, display_screen=args.is_render)
    p.init()

    plt.figure()

    all_scores = []
    all_losses = []
    all_t = []

    agent = DQNAgent(len(p.getGameState()), len(p.getActionSet()), args)
    is_end = p.game_over()

    for e in range(args.episodes):
        p.reset_game()
        s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32)
        reward_total = 0
        pipes = 0
        episode_loss = []
        for t in range(args.max_steps):
            a_t0_idx = agent.act(s_t0)
            a_t0 = p.getActionSet()[a_t0_idx]
            r_t1 = p.act(a_t0)
            is_end = p.game_over()
            s_t1 = np.asarray(list(p.getGameState().values()), dtype=np.float32)

            reward_total += r_t1

            '''
            from /PyGame-Learning-Environment/ple/games/base/pygamewrapper.py
            self.rewards = {
            "positive": 1.0,
            "negative": -1.0,
            "tick": 0,
            "loss": -5.0,
            "win": 5.0
            }
            '''
            if r_t1 == 1.0:
                pipes += 1

            if t == args.max_steps - 1:
                r_t1 = -100
                is_end = True

            agent.replay_memory.push(
                (s_t0, a_t0_idx, r_t1, s_t1, is_end)
            )
            s_t0 = s_t1

            if len(agent.replay_memory) > args.batch_size:
                loss = agent.replay()
                episode_loss.append(loss)

            if is_end:
                all_scores.append(reward_total)
                all_losses.append(np.mean(episode_loss))
                break

        all_t.append(t)

        metrics_episode = {
            'loss': all_losses[-1],
            'score': reward_total,
            't': t,
            'e': agent.epsilon,
            'pipes': pipes
        }

        if args.is_csv is True:
            CsvUtils.add_hparams(
                sequence_dir=os.path.join('.', args.sequence_name),
                sequence_name=args.sequence_name,
                run_name=args.run_name,
                args_dict=args.__dict__,
                metrics_dict=metrics_episode,
                global_step=e
            )
        else:
            logging.info(f'episode: {e}/{args.episodes} ', metrics_episode)
            print(f'episode: {e}/{args.episodes} ', metrics_episode)

        if e % 100 == 0 and not args.is_inference:
            # save logs, graphics and weights during training
            plt.clf()

            plt.subplot(3, 1, 1)
            plt.ylabel('Score')
            plt.plot(all_scores)

            plt.subplot(3, 1, 2)
            plt.ylabel('Loss')
            plt.plot(all_losses)

            plt.subplot(3, 1, 3)
            plt.ylabel('Steps')
            plt.plot(all_t)

            plt.xlabel('Episode')
            plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png'))
            torch.save(agent.q_model.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}.pt'))