Exemplo n.º 1
0
    def preprocess(self):
        data = pd.read_csv(self.dataset_path)
        message = 'Columns found in the dataset {}'.format(data.columns)
        print_and_log_message(message, self.logger)
        data = data.dropna()
        start_time_stamp = data['Timestamp'][0]
        timestamps = data['Timestamp'].apply(lambda x:
                                             (x - start_time_stamp) / 60)
        timestamps = timestamps - range(timestamps.shape[0])
        data.insert(0, 'blocks', timestamps)
        blocks = data.groupby('blocks')
        message = 'Number of blocks of continuous prices found are {}'.format(
            len(blocks))
        print_and_log_message(message, self.logger)

        self._data_blocks = []
        distinct_episodes = 0
        for name, indices in blocks.indices.items():
            if len(indices) > (self.history_length + self.horizon):
                self._data_blocks.append(blocks.get_group(name))
                distinct_episodes = distinct_episodes + (
                    len(indices) - (self.history_length + self.horizon) + 1)

        data = None
        message_list = [
            'Number of usable blocks obtained from the dataset are {}'.format(
                len(self._data_blocks))
        ]
        message_list.append(
            'Number of distinct episodes for the current configuration are {}'.
            format(distinct_episodes))
        print_and_log_message_list(message_list, self.logger)
Exemplo n.º 2
0
    def sample(self):
        if self.count <= self.history_length:
            print_and_log_message(REPLAY_MEMORY_INSUFFICIENT, self.logger)
        
        else:
            indexes = []
            while len(indexes) < self.batch_size:
                # find random index 
                while True:
                    # sample one index (ignore states wraping over) 
                    index = random.randint(self.history_length, self.count - 1)
                    # if wraps over current pointer, then get new one
                    if index >= self.current and index - self.history_length < self.current:
                        continue
                    # if wraps over episode end, then get new one
                    # NB! poststate (last screen) can be terminal state!
                    if self.terminals[(index - self.history_length):index].any():
                        continue
                    # otherwise use this index
                    break
                
                # NB! having index first is fastest in C-order matrices
                self.prestates[len(indexes), ...] = self.getState(index - 1)
                self.poststates[len(indexes), ...] = self.getState(index)
                indexes.append(index)

            actions = self.actions[indexes]
            rewards = self.rewards[indexes]
            terminals = self.terminals[indexes]

            return self.prestates, actions, rewards, self.poststates, terminals
Exemplo n.º 3
0
    def preprocess(self):
        data = pd.read_csv(self.dataset_path)
        message = 'Columns found in the dataset {}'.format(data.columns)
        print_and_log_message(message, self.logger)
        data = data.dropna()
        start_time_stamp = data['Timestamp'][0]
        timestamps = data['Timestamp'].apply(lambda x: (x - start_time_stamp) / 60)
        timestamps = timestamps - range(timestamps.shape[0])
        data.insert(0, 'blocks', timestamps)
        blocks = data.groupby('blocks')
        message = 'Number of blocks of continuous prices found are {}'.format(len(blocks))
        print_and_log_message(message, self.logger)
        
        self._data_blocks = []
        distinct_episodes = 0
        for name, indices in blocks.indices.items():
            ''' 
            Length of the block should exceed the history length and horizon by 1.
            Extra 1 is required to normalize each price block by previos time stamp
            '''
            if len(indices) > (self.history_length + self.horizon + 1):
                
                self._data_blocks.append(blocks.get_group(name))
                # similarly, we subtract an extra 1 to calculate the number of distinct episodes
                distinct_episodes = distinct_episodes + (len(indices) - (self.history_length + self.horizon) + 1 + 1)

        data = None
        message_list = ['Number of usable blocks obtained from the dataset are {}'.format(len(self._data_blocks))]
        message_list.append('Number of distinct episodes for the current configuration are {}'.format(distinct_episodes))
        print_and_log_message_list(message_list, self.logger)
Exemplo n.º 4
0
    def save(self):
        message = "Saving replay memory to {}".format(self._model_dir)
        for idx, (name, array) in enumerate(
            zip([ACTIONS, REWARDS, SCREENS, TERMINALS, PRESTATES, POSTSTATES],
                [self.actions, self.rewards, self.screens, self.terminals, self.prestates, self.poststates])):
            save_npy(array, join(self._model_dir, name))

        message = "Replay memory successfully saved to {}".format(self._model_dir)
        print_and_log_message(message, self.logger)
Exemplo n.º 5
0
 def getState(self, index):
     if self.count == 0:
         print_and_log_message(REPLAY_MEMORY_ZERO, self.logger)
     else:
         index = index % self.count
         if index >= self.history_length - 1:
             return self.screens[(index - (self.history_length - 1)):(index + 1), ...]
         else:
             indexes = [(index - i) % self.count for i in reversed(range(self.history_length))]
             return self.screens[indexes, ...]
Exemplo n.º 6
0
 def add(self, screen, reward, action, terminal):
     if screen.shape != (self.num_channels):
         print_and_log_message(INVALID_TIMESTEP, self.logger)
     else:
         self.actions[self.current] = action
         self.rewards[self.current] = reward
         self.screens[self.current, ...] = screen
         self.terminals[self.current] = terminal
         self.count = max(self.count, self.current + 1)
         self.current = (self.current + 1) % self.memory_size
Exemplo n.º 7
0
    def load_model(self):
        message = "Loading checkpoint from {}".format(self.checkpoint_dir)
        print_and_log_message(message, self.logger)

        ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            fname = os.path.join(self.checkpoint_dir, ckpt_name)
            self.saver.restore(self.sess, fname)
            message = "Checkpoint successfully loaded from {}".format(fname)
            print_and_log_message(message, self.logger)
            return True
        else:
            message = "Checkpoint could not be loaded from {}".format(self.checkpoint_dir)
            print_and_log_message(message, self.logger)
            return False
    def set_history(self, history):
        if history.shape != self._history:
            print_and_log_message(INVALID_HISTORY, self.logger)

        self._history = history
    def add(self, screen):
        if screen.shape != (self.num_channels):
            print_and_log_message(INVALID_TIMESTEP, self.logger)

        self._history[:-1] = self._history[1:]
        self._history[-1] = screen
Exemplo n.º 10
0
 def save_model(self, step=None):
     message = "Saving checkpoint to {}".format(self.checkpoint_dir)
     print_and_log_message(message, self.logger)
     self.saver.save(self.sess, self.checkpoint_dir, global_step=step)
Exemplo n.º 11
0
def load_npy(path, logger):
    obj = np.load(path)
    message = "  [*] loaded from {}".format(path)
    print_and_log_message(message, logger)
    return obj
Exemplo n.º 12
0
def save_npy(obj, path, logger):
    np.save(path, obj)
    message = "  [*] saved at {}".format(path)
    print_and_log_message(message, logger)
Exemplo n.º 13
0
    def train(self):
        start_step = self.step_op.eval()

        num_episodes, self.update_count, ep_reward = 0, 0, 0.
        total_reward, self.total_loss, self.total_q = 0., 0., 0.
        max_avg_ep_reward = 0
        ep_rewards, actions = [], []

        self.env.new_random_episode(self.history)

        for self.step in tqdm(range(start_step, self.max_step),
                              ncols=70,
                              initial=start_step):
            if self.step == self.learn_start:
                num_episodes, self.update_count, ep_reward = 0, 0, 0.
                total_reward, self.total_loss, self.total_q = 0., 0., 0.
                ep_rewards, actions = [], []

            # 1. predict
            action = self.predict(self.history.get())
            # 2. act
            screen, reward, terminal = self.env.act(action)
            # 3. observe
            self.observe(screen, reward, action, terminal)

            if terminal:
                self.env.new_random_episode(self.history)
                num_episodes += 1
                ep_rewards.append(ep_reward)
                ep_reward = 0.

            else:
                ep_reward += reward

            actions.append(action)
            total_reward += reward

            if self.step >= self.learn_start:
                if self.step % self.test_step == self.test_step - 1:
                    avg_reward = total_reward / self.test_step
                    avg_loss = self.total_loss / self.update_count
                    avg_q = self.total_q / self.update_count

                    try:
                        max_ep_reward = np.max(ep_rewards)
                        min_ep_reward = np.min(ep_rewards)
                        avg_ep_reward = np.mean(ep_rewards)
                    except:
                        max_ep_reward, min_ep_reward, avg_ep_reward = 0, 0, 0

                    message = 'avg_r: %.4f, avg_l: %.6f, avg_q: %3.6f, avg_ep_r: %.4f, max_ep_r: %.4f, min_ep_r: %.4f, # game: %d' \
                        % (avg_reward, avg_loss, avg_q, avg_ep_reward, max_ep_reward, min_ep_reward, num_game)
                    print_and_log_message(message, self.logger)

                    if max_avg_ep_reward * 0.9 <= avg_ep_reward:
                        self.step_assign_op.eval(
                            {self.step_input: self.step + 1})
                        self.save_model(self.step + 1)

                        max_avg_ep_reward = max(max_avg_ep_reward,
                                                avg_ep_reward)

                    if self.step > 180:
                        self.inject_summary(
                            {
                                'average.reward':
                                avg_reward,
                                'average.loss':
                                avg_loss,
                                'average.q':
                                avg_q,
                                'episode.max reward':
                                max_ep_reward,
                                'episode.min reward':
                                min_ep_reward,
                                'episode.avg reward':
                                avg_ep_reward,
                                'episode.num of game':
                                num_game,
                                'episode.rewards':
                                ep_rewards,
                                'episode.actions':
                                actions,
                                'training.learning_rate':
                                self.learning_rate_op.eval(
                                    {self.learning_rate_step: self.step}),
                            }, self.step)

                    num_game = 0
                    total_reward = 0.
                    self.total_loss = 0.
                    self.total_q = 0.
                    self.update_count = 0
                    ep_reward = 0.
                    ep_rewards = []
                    actions = []