Exemplo n.º 1
0
    def ema_checks(stats: HourData, history: History, ema_threshold: float,
                   reset_perc: float):
        if stats.ema_percent_diff_positive > ema_threshold:
            logging.info(
                f"Current price is outside threshold difference ({stats.formatted_info()})"
            )
            return False

        logging.info(
            f"Current price not outside threshold ({stats.formatted_info()})")

        # If EMA hasn't been reset then check whether we should reset it
        if not history.ema_reset:
            if history.rising:
                target = history.price * (1 - reset_perc / 100)
                should_reset = stats.ema > target
            else:
                target = history.price * (1 + reset_perc / 100)
                should_reset = stats.ema < target

            if should_reset:
                logging.info("Resetting EMA")
                history.ema_reset = True
                history.save()

        return True
Exemplo n.º 2
0
 def __init__(self, config):
     super(DQNAgent, self).__init__(config)
     self.history = History(config)
     self.replay_memory = DQNReplayMemory(config)
     self.net = DQN(self.env_wrapper.action_space.n, config)
     self.net.build()
     self.net.add_summary(["average_reward", "average_loss", "average_q", "ep_max_reward", "ep_min_reward", "ep_num_game", "learning_rate"], ["ep_rewards", "ep_actions"])
Exemplo n.º 3
0
class TestHistory(unittest.TestCase):

    def setUp(self):
        self.history = History()

    def test_get_last_n_commands(self):
        self.history.commands = ["foo1", "foo2", "foo3"]
        expected = ["foo2", "foo3"]
        self.assertEquals(expected, self.history.get_last_n_commands(2))

    def test_get_last_n_formatted(self):
        self.history.commands = ["foo1", "foo2", "foo3"]
        expected = "foo1\nfoo2\nfoo3"
        self.assertEquals(self.history.get_last_n_formatted(3), expected)

    def test_add_line(self):
        self.assertEquals(0, len(self.history.commands))
        self.history.add_line("foo line")
        self.assertEquals(1, len(self.history.commands))
Exemplo n.º 4
0
 def setUp(self):
     self.history = History()
Exemplo n.º 5
0
# history = SGD(model, optimizer, data_loader, test_loader, num_epochs=1, log_every=50, test_every=1)

# %% DUAL TRAINING
if continuation:
    history, model, ngram, optimizer_primal, optimizer_dual = load()
    print(model.ngram.n)
else:
    model = Model(ngram, output_size=4)
    model.to(DEVICE)
    model.init_weights()

    optimizer_primal = torch.optim.Adam(model.primal.parameters(),
                                        lr=primal_lr)
    optimizer_dual = torch.optim.Adam(model.dual.parameters(), lr=dual_lr)

    history = History()
    for idx in model.dual:
        history['dual ' + str(idx)] = []

epochs_done = 0
while epochs_done < num_epochs:
    history = SPDG(model,
                   optimizer_primal,
                   optimizer_dual,
                   sequence_loader,
                   data_loader,
                   test_loader,
                   save_every,
                   log_every,
                   test_every,
                   sequence_test_loader=sequence_test_loader,
Exemplo n.º 6
0
def SPDG(model, optimizer_primal, optimizer_dual, sequence_loader, data_loader, test_loader, num_epochs=5, log_every=1, test_every=1,
         eval_predictions_on_data=False, eval_predictions_on_sequences=False, show_dual=False, history=None, sequence_test_loader=None,
         eval_ngram_data_stats=False, eval_ngram_test_stats=False):
    if history is None:
        history = History()
        for idx in model.dual:
            history['dual ' + str(idx)] = []
    model.train()
    iter_ = 0
    epoch_ = 0
    try:
        while epoch_ < num_epochs:
            if epoch_ % test_every == 0:
                msg = "Minibatch |   p-loss   |    loss    | err rate | steps/s |"
                if show_dual:
                    for i in model.dual:
                        msg += " {:>7d} |".format(i)
                print(msg)
            epoch_ += 1
            stime = time.time()
            siter = iter_
            for x, y in sequence_loader:
                iter_ += 1
                x = x.to(DEVICE).view(-1, model.n, 28*28).float()
                y = y.to(DEVICE)
                out = model.forward_sequences(x)
                ploss = model.loss_primal(out, y)
                dloss = model.loss_dual(out, y)
                ploss_ = ploss.item()
                loss_ = -dloss.item()

                optimizer_primal.zero_grad()
                ploss.backward(retain_graph=True)
                optimizer_primal.step()

                optimizer_dual.zero_grad()
                dloss.backward()
                optimizer_dual.step()

                with torch.no_grad():
                    _, predictions = out.max(dim=2)
                    a = predictions.view(-1) != y.view(-1)
                    err_rate = 100.0 * a.sum().item() / (out.size(1) * out.size(0))
                    history.err_rate.append(err_rate)
                    history.primal_loss.append(ploss_)
                    history.loss.append(loss_)
                    for idx in model.dual:
                        history['dual ' + str(idx)].append(model.dual[idx].item())

                    if iter_ % log_every == 0:
                        num_iter = iter_ - siter
                        msg = "{:>8}  | {:>10.2f} | {:>10.2f} | {:>7.2f}% | {:>7.2f} |".format(iter_, ploss_, loss_, err_rate,
                                                                                               num_iter / (time.time() - stime))
                        if show_dual:
                            for idx in model.dual:
                                msg += " {:>7.2f} |".format(model.dual[idx].item())
                        print(msg)
                        siter = iter_
                        stime = time.time()
            if epoch_ % test_every == 0:
                epmsg = "Epoch {:>3} | Test errors for: ".format(epoch_)
                history.predictions_test.append(get_statistics(model, data_loader=test_loader))
                for i in range(model.output_size):
                    accuracy = 100.0 - 100.0 * history.predictions_test[-1][i, i] / history.predictions_test[-1][i].sum()
                    epmsg += " {}: {:.2f}, ".format(i, accuracy)
                epmsg = epmsg[:-2]
                if eval_predictions_on_data:
                    epmsg += "\n          | Data errors for: ".format(epoch_)
                    history.predictions_data.append(get_statistics(model, data_loader=data_loader))
                    for i in range(model.output_size):
                        accuracy = 100.0 - 100.0 * history.predictions_data[-1][i, i] / history.predictions_data[-1][i].sum()
                        epmsg += " {}: {:.2f}, ".format(i, accuracy)
                    epmsg = epmsg[:-2]
                if eval_predictions_on_sequences:
                    epmsg += "\n          | Seqs errors for: ".format(epoch_)
                    history.predictions_sequences.append(get_statistics(model, data_loader=sequence_loader, sequences=True))
                    for i in range(model.output_size):
                        accuracy = 100.0 - 100.0 * history.predictions_sequences[-1][i, i] / history.predictions_sequences[-1][i].sum()
                        epmsg += " {}: {:.2f}, ".format(i, accuracy)
                    epmsg = epmsg[:-2]
                if eval_ngram_data_stats:
                    history.ngram_data_stats.append(get_ngram_stats(model, sequence_loader))
                if eval_ngram_test_stats:
                    history.ngram_test_stats.append(get_ngram_stats(model, sequence_test_loader))
                test_err_rate = model.compute_error_rate(test_loader)
                history.test_err_rate.append(test_err_rate)
                print('{0}+\n{1}\n{0}+'.format('-' * (len(msg) - 1), epmsg))
    except KeyboardInterrupt:
        pass
    return history
Exemplo n.º 7
0
class DQNAgent(BaseAgent):

    def __init__(self, config):
        super(DQNAgent, self).__init__(config)
        self.history = History(config)
        self.replay_memory = DQNReplayMemory(config)
        self.net = DQN(self.env_wrapper.action_space.n, config)
        self.net.build()
        self.net.add_summary(["average_reward", "average_loss", "average_q", "ep_max_reward", "ep_min_reward", "ep_num_game", "learning_rate"], ["ep_rewards", "ep_actions"])

    def observe(self):
        reward = max(self.min_reward, min(self.max_reward, self.env_wrapper.reward))
        screen = self.env_wrapper.screen
        self.history.add(screen)
        self.replay_memory.add(screen, reward, self.env_wrapper.action, self.env_wrapper.terminal)
        if self.i < self.config.epsilon_decay_episodes:
            self.epsilon -= self.config.epsilon_decay
        if self.i % self.config.train_freq == 0 and self.i > self.config.train_start:
            state, action, reward, state_, terminal = self.replay_memory.sample_batch()
            q, loss= self.net.train_on_batch_target(state, action, reward, state_, terminal, self.i)
            self.total_q += q
            self.total_loss += loss
            self.update_count += 1
        if self.i % self.config.update_freq == 0:
            self.net.update_target()

    def policy(self):
        if np.random.rand() < self.epsilon:
            return self.env_wrapper.random_step()
        else:
            state = self.history.get()/255.0
            a = self.net.q_action.eval({
                self.net.state : [state]
            }, session=self.net.sess)
            return a[0]


    def train(self, steps):
        render = False
        self.env_wrapper.new_random_game()
        num_game, self.update_count, ep_reward = 0,0,0.
        total_reward, self.total_loss, self.total_q = 0.,0.,0.
        ep_rewards, actions = [], []
        t = 0

        for _ in range(self.config.history_len):
            self.history.add(self.env_wrapper.screen)
        for self.i in tqdm(range(self.i, steps)):
            action = self.policy()
            self.env_wrapper.act(action)
            self.observe()
            if self.env_wrapper.terminal:
                t = 0
                self.env_wrapper.new_random_game()
                num_game += 1
                ep_rewards.append(ep_reward)
                ep_reward = 0.
            else:
                ep_reward += self.env_wrapper.reward
                t += 1
            actions.append(action)
            total_reward += self.env_wrapper.reward

            if self.i >= self.config.train_start:
                if self.i % self.config.test_step == self.config.test_step -1:
                    avg_reward = total_reward / self.config.test_step
                    avg_loss = self.total_loss / self.update_count
                    avg_q = self.total_q / self.update_count

                    try:
                        max_ep_reward = np.max(ep_rewards)
                        min_ep_reward = np.min(ep_rewards)
                        avg_ep_reward = np.mean(ep_rewards)
                    except:
                        max_ep_reward, min_ep_reward, avg_ep_reward = 0, 0, 0

                    sum_dict = {
                        'average_reward': avg_reward,
                        'average_loss': avg_loss,
                        'average_q': avg_q,
                        'ep_max_reward': max_ep_reward,
                        'ep_min_reward': min_ep_reward,
                        'ep_num_game': num_game,
                        'learning_rate': self.net.learning_rate,
                        'ep_rewards': ep_rewards,
                        'ep_actions': actions
                    }
                    self.net.inject_summary(sum_dict, self.i)
                    num_game = 0
                    total_reward = 0.
                    self.total_loss = 0.
                    self.total_q = 0.
                    self.update_count = 0
                    ep_reward = 0.
                    ep_rewards = []
                    actions = []

            if self.i % 500000 == 0 and self.i > 0:
                j = 0
                self.save()
            if self.i % 100000 == 0:
                j = 0
                render = True

            if render:
                self.env_wrapper.env.render()
                j += 1
                if j == 1000:
                    render = False

    def play(self, episodes, net_path):
        self.net.restore_session(path=net_path)
        self.env_wrapper.new_game()
        i = 0
        for _ in range(self.config.history_len):
            self.history.add(self.env_wrapper.screen)
        episode_steps = 0
        while i < episodes:
            a = self.net.q_action.eval({
                self.net.state : [self.history.get()/255.0]
            }, session=self.net.sess)
            action = a[0]
            self.env_wrapper.act_play(action)
            self.history.add(self.env_wrapper.screen)
            episode_steps += 1
            if episode_steps > self.config.max_steps:
                self.env_wrapper.terminal = True
            if self.env_wrapper.terminal:
                episode_steps = 0
                i += 1
                self.env_wrapper.new_play_game()
                for _ in range(self.config.history_len):
                    screen = self.env_wrapper.screen
                    self.history.add(screen)
Exemplo n.º 8
0
class DQNAgent(BaseAgent):
    #Se inicializa la clase padre con el objeto config, un History,
    #una memoria de repetición, una red neuronal y su arquitectura.
    def __init__(self, config):
        super(DQNAgent, self).__init__(config)
        self.history = History(config)
        self.replay_memory = DQNReplayMemory(config)
        self.net = DQN(self.env_wrapper.action_space.n, config)
        self.net.build()
        self.net.add_summary(["average_reward", "average_loss", "average_q", "ep_max_reward", "ep_min_reward", "ep_num_game", "learning_rate"], ["ep_rewards", "ep_actions"])

    #Se encarga de procesar una observación del agente.
    def observe(self):
        reward = max(self.min_reward, min(self.max_reward, self.env_wrapper.reward)) #Se encarga de que puntaje en el estado esté entre 1 y -1
        screen = self.env_wrapper.screen #Se toma la screen actual del wrapper
        self.history.add(screen) #Se agrega la screen a la historial
        self.replay_memory.add(screen, reward, self.env_wrapper.action, self.env_wrapper.terminal) #Se agrega los valores a la memoria de repetición
        if self.i < self.config.epsilon_decay_episodes: #Se decrementa el valor de epsilon de acuerdo a la cantidad de timesteps
            self.epsilon -= self.config.epsilon_decay
        if self.i % self.config.train_freq == 0 and self.i > self.config.train_start: #se actualizan los pesos ed la red neuronal de acuerdo a los valores en el objeto config.
            state, action, reward, state_, terminal = self.replay_memory.sample_batch() #Se toma una muestra de la memoria de repetición
            q, loss= self.net.train_on_batch_target(state, action, reward, state_, terminal, self.i) #Se actualiza la red neuronal con los valores de la muestra
            self.total_q += q #se incrementa al valor de q
            self.total_loss += loss #se incrementa el valor de pérdida
            self.update_count += 1 #Se contabiliza el entrenamiento 
        if self.i % self.config.update_freq == 0: #Se verifica si es necesario actualiza la red neuronal objetivo 
            self.net.update_target() #Se actualiza la red neurona objetivo

    #Se encarga de definir la política bajo la cual el agente ejecuta un acción.
    def policy(self):
        if np.random.rand() < self.epsilon: #Si epsilon es mayo a un aleatorio
            return self.env_wrapper.random_step() #Se ejecuta una acción aleatoria
        else: #Si no
            state = self.history.get()/255.0 
            a = self.net.q_action.eval({
                self.net.state : [state]
            }, session=self.net.sess)
            return a[0] #Calcula la acción que ofrece mejor puntaje a futuro de acuerdo a la red neuronal

    #Se encarga de realizan el proceso de entrenamiento del agente. 
    def train(self, steps):
        render = False #Se setea la variable para renderiza en false
        self.env_wrapper.new_random_game() #Se inicia un juego y se lleva a un estado aleatorio
        num_game, self.update_count, ep_reward = 0,0,0. #Se inicializan las variables en 0.
        total_reward, self.total_loss, self.total_q = 0.,0.,0. #Se inicializan las varaibles en o
        ep_rewards, actions = [], [] #Se inicializan arreglos vacpios
        t = 0

        for _ in range(self.config.history_len): #Se itera de acuerdo a la longitud de historial
            self.history.add(self.env_wrapper.screen) #Se agregan la primeras iguales screens al historial
        for self.i in tqdm(range(self.i, steps)): #Se itera de acuerdo a los timesteps de entrenamiento
            action = self.policy() #Se selecciona la acción del agente de acuerdo a la poĺítica 
            self.env_wrapper.act(action) #Se eejecuta la acción en el ambiente
            self.observe() #Se procesa la observación.
            if self.env_wrapper.terminal: #Si se llega a un estado terminal 
                t = 0 #reinicia el conteo de timesteps del episodio
                self.env_wrapper.new_random_game() #inicializa un juego y lo lleva un estado aleatorio
                num_game += 1 #Se incrementa la cantidad de episodios jugados
                ep_rewards.append(ep_reward) #Se almacena la recompensa del episodio
                ep_reward = 0. #Se reinicia la recompensa para el sigueinte episodio
            else: #si no es un estado terminal.
                ep_reward += self.env_wrapper.reward #Incrementa la recompensa en el episodio.
                t += 1 #contabiliza el timestep transcurrido
            actions.append(action) #almacena la acción ejecutada 
            total_reward += self.env_wrapper.reward #Incrementa la recomentan total.

            
            if self.i >= self.config.train_start:  #si ya han trasncurridos más timesteps que los especificados en el objeto config
                if self.i % self.config.test_step == self.config.test_step -1: #Si se cumple la condición de acuerdo a los teststeps
                    avg_reward = total_reward / self.config.test_step #Sedeine el promedio de las recompensas sobre los lostest steps
                    avg_loss = self.total_loss / self.update_count #Se define el promedio de pérdida de acuerdo los updates en la red 
                    avg_q = self.total_q / self.update_count #Se define el valor q promedio de acuerdo a los updates en la red

                    try:
                        max_ep_reward = np.max(ep_rewards) #Se almacena la recompensa máxima de los episodios
                        min_ep_reward = np.min(ep_rewards) #Se almacena la recompensa minima de los episodios
                        avg_ep_reward = np.mean(ep_rewards)#Se almaena la recompensa promedio de los episodios
                    except:
                        max_ep_reward, min_ep_reward, avg_ep_reward = 0, 0, 0 #Si hay un errores se setean a 0

                    sum_dict = { #crea un diccionari con la información definida.
                        'average_reward': avg_reward,
                        'average_loss': avg_loss,
                        'average_q': avg_q,
                        'ep_max_reward': max_ep_reward,
                        'ep_min_reward': min_ep_reward,
                        'ep_num_game': num_game,
                        'learning_rate': self.net.learning_rate,
                        'ep_rewards': ep_rewards,
                        'ep_actions': actions
                    }
                    self.net.inject_summary(sum_dict, self.i) #injecta el diccionario a la red para crear un objeto tf.Summary
                    num_game = 0 #Reinicia los valores guardados en el diccionario
                    total_reward = 0.
                    self.total_loss = 0.
                    self.total_q = 0.
                    self.update_count = 0
                    ep_reward = 0.
                    ep_rewards = []
                    actions = []

            if self.i % 500000 == 0 and self.i > 0: #Cada 500000 timesteps guarda un checkpoint del proceso de entrenamiento de la red
                j = 0
                self.save()
            if self.i % 100000 == 0: #Cada 10000 timesteps renderiza 1000 steps del entrenamiento
                j = 0
                render = True

            # if render:
            #     self.env_wrapper.env.render()
            #     j += 1
            #     if j == 1000:
            #         render = False


    #Se encarga de jugar episodios de acuerdo a la red neuronal entrenada
    def play(self, episodes, net_path):
        self.net.restore_session(path=net_path) #carga los pesos de la red neuronal
        self.env_wrapper.new_game() #Inicia un juego en un estado aleatorio
        i = 0
        for _ in range(self.config.history_len): #sse itera de acuerdo al tamaño del historial
            self.history.add(self.env_wrapper.screen)  #Se almacena las pprimera historias en el historial
        episode_steps = 0
        while i < episodes: 
            a = self.net.q_action.eval({
                self.net.state : [self.history.get()/255.0]
            }, session=self.net.sess) #Calcula la acción que ofrece mejor retorno de acuerdo a la red neuronal
            action = a[0] #Se toma la primera posición, que ofrece el mejor retorno
            self.env_wrapper.act_play(action) #Ejecuta la accion en el ambiente y verifica si perdió vida
            self.history.add(self.env_wrapper.screen) #Almacena el nuevo screen
            episode_steps += 1
            if episode_steps > self.config.max_steps: #Si se alcana la cantidad máxima de steps por episodio
                self.env_wrapper.terminal = True #Marca el episodio como terminado
            if self.env_wrapper.terminal: #Si el episodio está terminado
                episode_steps = 0  #Reiniicia los steps para el nuevo episodio
                i += 1 #contabiliza el episodio jugado
                self.env_wrapper.new_play_game() #Crear un nuevo ambiente en un estado aleatorio
                for _ in range(self.config.history_len): #Se itera sobre el tamaño del historial
                    screen = self.env_wrapper.screen
                    self.history.add(screen) #Almacena la screen en el historia
Exemplo n.º 9
0
def SPDG(model,
         optimizer_primal,
         optimizer_dual,
         sequence_loader,
         data_loader,
         test_loader,
         num_epochs=5,
         log_every=1,
         test_every=1,
         predictions_on_data=False,
         predictions_on_sequences=False,
         show_dual=False,
         history=None,
         sequence_test_loader=None,
         ngram_data_stats=False,
         ngram_test_stats=False,
         loss_on_test=False,
         remember_dual=False):
    if history is None:
        history = History()
        if remember_dual:
            if model.ngram.size() > 20:
                warnings.warn(
                    f"Model\'s n-gram is large ({model.ngram.size()} entries). Remebering dual variables \
                              will occupy significant amount of memory. Suggest adding \'remember_dual=False\' to method\'s parameters.'"
                )
        for idx in model.dual:
            history['dual ' + str(idx)] = []
    model.train()
    iter_ = 0
    epoch_ = 0
    try:
        while epoch_ < num_epochs:
            if epoch_ % test_every == 0:
                msg = "Minibatch |   p-loss   |    loss    | err rate | steps/s |"
                if show_dual:
                    for i in model.dual:
                        msg += " {:>7d} |".format(i)
                print(msg)
            epoch_ += 1
            stime = time.time()
            siter = iter_
            for x, y in sequence_loader:
                iter_ += 1
                x = x.to(DEVICE).view(-1, model.n, 28 * 28).float()
                y = y.to(DEVICE)
                out = model.forward_sequences(x)
                ploss = model.loss_primal(out)
                dloss = model.loss_dual(out)
                ploss_ = ploss.item()
                loss_ = -dloss.item()

                optimizer_primal.zero_grad()
                ploss.backward(retain_graph=True)
                optimizer_primal.step()

                optimizer_dual.zero_grad()
                dloss.backward()
                optimizer_dual.step()

                with torch.no_grad():
                    _, predictions = out.max(dim=2)
                    a = predictions.view(-1) != y.view(-1)
                    err_rate = 100.0 * a.sum().item() / (out.size(1) *
                                                         out.size(0))
                    history.err_rate.append(err_rate)
                    history.primal_loss.append(ploss_)
                    history.loss.append(loss_)
                    if remember_dual:
                        for idx in model.dual:
                            history['dual ' + str(idx)].append(
                                model.dual[idx].item())

                    if iter_ % log_every == 0:
                        num_iter = iter_ - siter
                        msg = "{:>8}  | {:>10.2f} | {:>10.2f} | {:>7.2f}% | {:>7.2f} |".format(
                            iter_, ploss_, loss_, err_rate,
                            num_iter / (time.time() - stime))
                        if show_dual:
                            for idx in model.dual:
                                msg += " {:>7.2f} |".format(
                                    model.dual[idx].item())
                        print(msg)
                        siter = iter_
                        stime = time.time()
            if epoch_ % test_every == 0:
                epmsg = "Epoch {:>3} | Test errors for: ".format(
                    epoch_ + history.epochs_done)
                history.predictions_test.append(
                    get_statistics(model, data_loader=test_loader))
                for i in range(model.output_size):
                    accuracy = 100.0 - 100.0 * history.predictions_test[-1][
                        i, i] / history.predictions_test[-1][i].sum()
                    epmsg += " {}: {:.2f}, ".format(i, accuracy)
                epmsg = epmsg[:-2]
                if predictions_on_data:
                    epmsg += "\n          | Data errors for: ".format(epoch_)
                    history.predictions_data.append(
                        get_statistics(model, data_loader=data_loader))
                    for i in range(model.output_size):
                        accuracy = 100.0 - 100.0 * history.predictions_data[
                            -1][i, i] / history.predictions_data[-1][i].sum()
                        epmsg += " {}: {:.2f}, ".format(i, accuracy)
                    epmsg = epmsg[:-2]
                if predictions_on_sequences:
                    epmsg += "\n          | Seqs errors for: ".format(epoch_)
                    if ngram_data_stats:
                        predictions, ngram_data = get_statistics(
                            model,
                            data_loader=sequence_loader,
                            sequences=True,
                            return_ngram=True)
                        history.predictions_sequences.append(predictions)
                        history.ngram_data_stats.append(ngram_data)
                    else:
                        history.predictions_sequences.append(
                            get_statistics(model,
                                           data_loader=sequence_loader,
                                           sequences=True))
                    for i in range(model.output_size):
                        accuracy = 100.0 - 100.0 * history.predictions_sequences[
                            -1][i, i] / history.predictions_sequences[-1][
                                i].sum()
                        epmsg += " {}: {:.2f}, ".format(i, accuracy)
                    epmsg = epmsg[:-2]
                if ngram_data_stats and not predictions_on_sequences:
                    history.ngram_data_stats.append(
                        get_ngram_stats(model, sequence_loader))
                if ngram_test_stats:
                    history.ngram_test_stats.append(
                        get_ngram_stats(model, sequence_test_loader))
                if loss_on_test:
                    for x, y in sequence_test_loader:
                        with torch.no_grad():
                            x = x.to(DEVICE).view(-1, model.n, 28 * 28).float()
                            out = model.forward_sequences(x)
                            history.test_primal_loss.append(
                                model.loss_primal(out).item())
                            history.test_loss.append(
                                -model.loss_dual(out).item())
                if not remember_dual:
                    for idx in model.dual:
                        history['dual ' + str(idx)].append(
                            model.dual[idx].item())
                print('{0}+\n{1}\n{0}+'.format('-' * (len(msg) - 1), epmsg))
    except KeyboardInterrupt:
        pass
    history.epochs_done += num_epochs
    return history
Exemplo n.º 10
0
BOT_NAME = args.name
SLACK_CHANNEL = args.channel

# 'Hard' Constants - may rely on other values set but shouldn't be changed
SLACK_URL = args.url
DATA_FILE = f"last_post_data_{args.script_name}.json"
# endregion

# Get data and convert accordingly
cb = Coinbase(Currencies.default(), args.interval)
prices = cb.price_list()
cur_price = prices[0]

# Get history from last runs, use it to work out what test to make
history = History(DATA_FILE)

# Get stats from coinbase data
# If change isn't large enough, then update history and exit
stats = HourData(prices, EMA_NUM_HOURS)
if Analysis.ema_checks(stats, history, EMA_THRESHOLD_PERCENT,
                       EMA_RESET_PERCENT):
    sys.exit(1)

if not Analysis.should_post(history, stats, prices, EMA_THRESHOLD_PERCENT):
    sys.exit(1)

logging.info("Message should be posted, generating attachment")
attachments = Slack.generate_post(prices, stats, Currencies.default())
image_url = SlackImages.get_image(stats.is_diff_positive)
logging.info("Posting to slack")
Exemplo n.º 11
0
class DQNAgent(BaseAgent):
    def __init__(self, config):
        super(DQNAgent, self).__init__(config)
        self.history = History(config)
        self.replay_memory = DQNReplayMemory(config)
        self.net = DQN(4, config)
        self.net.build()
        self.net.add_summary([
            "average_reward", "average_loss", "average_q", "ep_max_reward",
            "ep_avg_reward", "ep_min_reward", "ep_num_game", "learning_rate"
        ], ["ep_rewards", "ep_actions"])

    def observe(self):
        reward = max(self.min_reward,
                     min(self.max_reward, self.env_wrapper.reward))
        color = self.env_wrapper.color
        self.history.add(color)
        self.replay_memory.add(color, reward, self.env_wrapper.action,
                               self.env_wrapper.terminal)
        if self.i < self.config.epsilon_decay_episodes:
            self.epsilon -= self.config.epsilon_decay
        if self.i % self.config.train_freq == 0 and self.i > self.config.train_start:
            #print('----> i',self.i)
            ### training starts only after train_start=20K steps, mem_size is 800K, train_freq is 8,
            ### I guess thats why its okay to not worry about sampling with repititions
            state, action, reward, state_, terminal = self.replay_memory.sample_batch(
            )
            q, loss = self.net.train_on_batch_target(state, action, reward,
                                                     state_, terminal, self.i)
            ### self.i is passed to implement lr decay
            self.total_q += q
            self.total_loss += loss
            self.update_count += 1
        if self.i % self.config.update_freq == 0:
            self.net.update_target()

    def policy(self):
        if np.random.rand() < self.epsilon:
            return self.env_wrapper.random_step()
        else:
            state = self.history.get()
            a = self.net.q_action.eval({self.net.state: [state]},
                                       session=self.net.sess)
            return a[0]

    def train(self, steps):
        f = open('dqn2.txt', 'w')
        render = False
        self.env_wrapper.new_random_game()
        num_game, self.update_count, ep_reward = 0, 0, 0.
        total_reward, self.total_loss, self.total_q = 0., 0., 0.
        ep_rewards, actions = [], []
        t = 0
        print(self.net.number_of_trainable_parameters())

        for _ in range(self.config.history_len):
            self.history.add(self.env_wrapper.color)
            ### So, the first state is just the first color, repeated a number of times
        for self.i in tqdm(range(self.i, steps)):
            #take action, observe
            action = self.policy()
            self.env_wrapper.act(action)
            self.observe()
            if self.env_wrapper.terminal:
                t = 0
                self.env_wrapper.new_random_game()
                num_game += 1
                ep_rewards.append(ep_reward)
                ep_reward = 0.
            else:
                ep_reward += self.env_wrapper.reward
                t += 1
            actions.append(action)
            total_reward += self.env_wrapper.reward
            #print(self.i,action,total_reward, self.env_wrapper.terminal)
            #total_reward, max_ep_reward, min_ep_reward, avg_ep_reward keep track of reward earned every self.config.test_step=5000 steps
            if self.i >= self.config.train_start:
                if self.i % self.config.test_step == self.config.test_step - 1:
                    avg_reward = total_reward / self.config.test_step
                    avg_loss = self.total_loss / self.update_count
                    avg_q = self.total_q / self.update_count

                    try:
                        max_ep_reward = np.max(ep_rewards)
                        min_ep_reward = np.min(ep_rewards)
                        avg_ep_reward = np.mean(ep_rewards)
                    except:
                        max_ep_reward, min_ep_reward, avg_ep_reward = 0, 0, 0

                    sum_dict = {
                        'average_reward': avg_reward,
                        'average_loss': avg_loss,
                        'average_q': avg_q,
                        'ep_max_reward': max_ep_reward,
                        'ep_min_reward': min_ep_reward,
                        'ep_num_game': num_game,
                        'ep_avg_reward': avg_ep_reward,
                        'learning_rate': self.net.learning_rate,
                        'ep_rewards': ep_rewards,
                        'ep_actions': actions
                    }
                    self.net.inject_summary(sum_dict, self.i)
                    num_game = 0
                    total_reward = 0.
                    self.total_loss = 0.
                    self.total_q = 0.
                    self.update_count = 0
                    ep_reward = 0.
                    ep_rewards = []
                    actions = []
                    f.write(str(avg_ep_reward))
            if self.i % 50000 == 0 and self.i > 0:
                j = 0
                print('saving..')
                self.save()
                #play_score = self.play(episodes=self.config.num_episodes_for_play_scores_summary, net_path=self.net.dir_model)
                #self.net.inject_summary({'play_score':play_score}, self.i)
            if self.i % 100000 == 0:
                j = 0
                render = True

            if render:
                #self.env_wrapper.env.render()
                j += 1
                if j == 1000:
                    render = False
        f.close()

    def play(self, episodes, net_path, verbose=False, print_average=True):
        d = []
        self.net.restore_session(path=net_path)
        self.env_wrapper.new_game()
        i = 0
        for _ in range(self.config.history_len):
            self.history.add(self.env_wrapper.color)
        episode_steps = 0
        ###EDIT (LJ): added rewards calculation
        episode_reward = 0
        actions_list = []
        while i < episodes:
            #Chose Action:
            a = self.net.q_action.eval({self.net.state: [self.history.get()]},
                                       session=self.net.sess)
            action = a[0]
            actions_list.append(action)
            #Take Action
            self.env_wrapper.act_play(action)
            self.history.add(self.env_wrapper.color)
            episode_steps += 1
            episode_reward += self.env_wrapper.reward
            if episode_steps > self.config.max_steps:
                self.env_wrapper.terminal = True
            if self.env_wrapper.terminal:
                if verbose:
                    print('episode terminated in ' + str(episode_steps) +
                          ' steps with reward ' + str(episode_reward))
                    print('ACTIONS TAKEN:')
                    print(actions_list)
                actions_list = []
                d.append(episode_reward)
                episode_steps = 0
                episode_reward = 0
                i += 1
                self.env_wrapper.new_play_game()
                for _ in range(self.config.history_len):
                    color = self.env_wrapper.color
                    self.history.add(color)
        if verbose:
            print('ALL, AVERAGE:', [d, sum(d) / len(d)])
        if print_average:
            print('AVERAGE:', sum(d) / len(d))
        return sum(d) / len(d)