示例#1
0
 def test_no_5_actions_0_1_what_is_terrible_idea(self):
     agents: List[SmartAgent] = get_SmartAgents()
     for agent in agents:
         agent.model = agent._build_model(layers=[20, 50, 30, 18])
     env = Env(agents)
     env.u = env_settings.u_all_2
     max_time = 90
     Globals().time = 0
     for t in range(max_time):
         actions = [0]
         # if t == 60 or t >= 63:
         #     actions = [1]
         # if t == 61 or t == 62:
         #     actions = [orange]
         # env.step(actions)
     for agent in agents:
         agent.reshape_rewards()
     # exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories,
     #                         netName='net4',
     #                         densityName='test_learn_no_1')
     # exportData.saveToJson()
     while True:
         agents[0].train_full(epochs=15000, learning_rate=0.001)
     x = [4, 4, 0]
     predictions = agents[0].model.predict(np.array([x]))
     self.assertTrue(predictions[0][0] > predictions[0][1])
示例#2
0
 def test_no_1_111_then_222_what_is_brilliant_idea(self):
     agents: List[SmartAgent] = get_SmartAgents()
     for agent in agents:
         agent.model = agent._build_model(layers=[20, 50, 30, 18])
     env = Env(agents)
     env.u = env_data.u_all_2
     max_time = 90
     Globals().time = 0
     for t in range(max_time):
         # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
         actions = [1, 1, 1]
         if t == 60 or t >= 62:
             actions = [2, 2, 2]
         if t == 61:
             actions = [0, 0, 0]
         env.step(actions)
     for agent in agents:
         agent.reshape_rewards()
     exportData = ExportData(learningMethod='DQN',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net4',
                             densityName='test_learn_no_1')
     exportData.saveToJson()
     agents[0].train_full(epochs=7000, learning_rate=0.001)
     x = [2, 2, 2] + [0.6, 0.6, 28.88] + [0.126, 0.126, 0.18] + [1]
示例#3
0
def train(learntAgents=True, max_time_learn=20):
    if not learntAgents:
        agents = get_SmartAgents()
    else:
        agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    start_time = timer()
    x_batch = batches[0]['x_batch']
    y_batch = batches[0]['y_batch']
    model = models[0]
    val_loss = 5000
    escape_flag = False
    while timer() - start_time < max_time_learn and not escape_flag:
        res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2)
        if res.history['val_loss'][-1] > val_loss:
            escape_flag = True
            loss = res.history['val_loss'][-1]
            print(f'wynik sieci: {loss} straty')
            val_loss = 5000
        else:
            val_loss = res.history['val_loss'][-1]
        x = [4, 20]
        pred = model.predict(np.array([x]))
        Globals().pred_plot_memory.append(pred)
    model.save('static_files/model-agent' + str(0) + '.h5')
    plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0')
    plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1')
    plt.legend()
    plt.title('Nagrody przewidziane dla akcji podjętych \n podczas monitorowanego stanu [4, 20]')
    plt.savefig('images_generated/state_predictions.png')
    plt.close()
示例#4
0
 def test_no_2_pass_action_0_long_time_then_1(self):
     # TESTUJEMY: zmiana faz
     max_time = 90
     agents = get_SmartAgents()
     for agent in agents:
         agent.yellow_phase_duration = 2
     Globals().time = 0
     env = Env(agents)
     Globals().u_value = 2
     env.u = Globals().get_u(max_time)
     env.yellow_phase_duration = 2
     for t in range(max_time):
         actions = [0, 0, 0]
         if t == 60 or t > 62:
             actions = [1, 1, 1]
         if t == 61 or t == 62:
             actions = [yellow, yellow, yellow]
         time = Globals().time  # time = t
         env.step(actions)
         time = Globals().time  # time = t + 1
         if t in range(3, 60):
             self.assertEqual([agent.actual_phase for agent in agents],
                              [0, 0, 0])
         if t == 60 or t == 61:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [yellow, yellow, yellow])
         if t >= 62:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [1, 1, 1])
         exportData = ExportData(learningMethod='Nothing',
                                 learningEpochs=0,
                                 nets=env.global_memories,
                                 netName='net14',
                                 densityName='test_no_2')
         exportData.saveToJson()
示例#5
0
    def test_no_4_pass_action_0_1_0(self):
        # TESTUJEMY: actual_phase - czy jest ono zgodne rowniez w memories
        max_time = 90
        agents = get_SmartAgents()
        for agent in agents:
            agent.yellow_phase_duration = 2
        Globals().time = 0
        env = Env(agents)
        env.u = env_settings.u_all_2
        env.yellow_phase_duration = 2
        for t in range(max_time):
            # actions = [0, 0, 0] if t < 60 elif 60 == t [1, 1, 1]
            actions = [0]
            if t == 60 or 63 <= t < 70:
                actions = [1]
            if t == 61 or t == 62:
                actions = [yellow]
            if t == 70 or t >= 73:
                actions = [0]
            if t == 71 or t == 72:
                actions = [yellow]
            env.step(actions)
            # print(f't:{t}, {agents[0].actual_phase}')
            if t in range(3, 60):
                self.assertEqual(
                    [agent.actual_phase for agent in agents], [0]
                )  # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60
                self.assertEqual(
                    [agent.memories[t].state.actual_phase
                     for agent in agents], [0]
                )  # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60
            if t == 60 or t == 61:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [yellow])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [yellow])
            if 62 <= t <= 69:
                self.assertEqual([agent.actual_phase for agent in agents], [1])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [1])
            if t == 70 or t == 71:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [yellow])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [yellow])
            if t >= 72:
                self.assertEqual([agent.actual_phase for agent in agents], [0])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [0])

        exportData = ExportData(learningMethod='Monte Carlo TODO',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='net14',
                                densityName='test_no_4')
        exportData.saveToJson()
示例#6
0
def run_random(epochs, agents=None) -> List[SmartAgent]:
    Globals().max_epsilon = 0
    if agents == None:
        agents: List[SmartAgent] = get_SmartAgents()
    for e in epochs:
        env: Env = epoch(agents)
    return agents
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1)):
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for agent in agents:
        agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents,
                         u=Globals().get_u(Globals().vp.max_time_learn),
                         time=Globals().vp.max_time_learn)
        for agent in env.agents:
            agent.reshape_rewards()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='politechnika',
                                    densityName='random_now' +
                                    str(Globals().greedy_run_no))
            exportData.saveToJson()
        env.remember_memory()
    save_batches(agents)
    return agents
示例#8
0
    def test_no_6_starting_phase_actual_phase(self):
        agents: List[SmartAgent] = get_SmartAgents()
        for agent in agents:
            agent.model = agent._build_model(layers=[20, 50, 30, 18])
        env = Env(agents)
        env.u = env_data.u_all_2
        max_time = 90
        Globals().time = 0
        for t in range(max_time):
            # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
            actions = [1, 1, 1]
            if t == 60 or t >= 62:
                actions = [2, 2, 2]
            if t == 61 or t == 71:
                actions = [0, 0, 0]
            if t == 70 or t >= 72:
                actions = [3,3,3]
            env.step(actions)
        for agent in agents:
            agent.reshape_rewards()
        print(agents[0].memories[0])
        self.assertEqual(agents[0].memories[60].state.starting_actual_phase,1)
        self.assertEqual(agents[0].memories[61].state.starting_actual_phase,0)
        self.assertEqual(agents[0].memories[62].state.starting_actual_phase,0)
        self.assertEqual(agents[0].memories[63].state.starting_actual_phase,2)

        self.assertEqual(agents[0].memories[70].state.starting_actual_phase,2)
        self.assertEqual(agents[0].memories[71].state.starting_actual_phase,0)
        self.assertEqual(agents[0].memories[72].state.starting_actual_phase,0)
        self.assertEqual(agents[0].memories[73].state.starting_actual_phase,3)

        exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories,
                                netName='net4',
                                densityName='test_learn_no_1')
        exportData.saveToJson()
示例#9
0
 def test_no_5_reshaping_pass_action_0_1_0(self):
     # TESTUJEMY: rewardy
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     Globals().gamma = 0
     env = Env(agents)
     env.u = env_settings.u_all_9
     for t in range(max_time):
         # actions = [0,0,0] if t < 60 elif 60 == t [1,1,1]
         actions = [0]
         if t == 1 or t == 2:
             actions = [orange]
         if t == 60 or 63 <= t < 70:
             actions = [1]
         if t == 61 or t == 62:
             actions = [orange]
         if t == 70 or t >= 73:
             actions = [0]
         if t == 71 or t == 72:
             actions = [orange]
         env.step(actions)
     env.agents[0].save_batch()
     env.agents[0].reshape_rewards()
     # self.assertAlmostEqual(env.agents[0].memories[60].reward, 2, 0)
     # self.assertAlmostEqual(env.agents[0].memories[61].reward, 4, 0)
     # self.assertAlmostEqual(env.agents[0].memories[62].reward, 6, 0)
     # env.update_memory_rewards()
     exportData = ExportData(learningMethod='Monte Carlo TODO',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net11',
                             densityName='test_no_6')
     exportData.saveToJson()
示例#10
0
 def test_no_2_pass_action_1_1_1_long_time_then_2_2_2(self):
     # TESTUJEMY: zmiana faz
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
         actions = [1, 1, 1]
         if t == 60 or t >= 62:
             actions = [2, 2, 2]
         if t == 61:
             actions = [0, 0, 0]
         # time = Globals().time # time = t
         env.step(actions)
         # time = Globals().time # time = t + 1
         if t in range(60):
             self.assertEqual([agent.actual_phase for agent in agents],
                              [1, 1, 1])
         if t == 60 or t == 61:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [0, 0, 0])
         if t >= 63:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [2, 2, 2])
示例#11
0
 def test_no_1_pass_action_1_1_1_all_time(self):
     # u - wplywaja co chwile 2 pojazdy wszedzie
     # caly czasz dajemy akcje [1,1,1]
     # Przechodza wszystkie pojazdy w time stepie
     # orange_phase_duration = 0 tak samo jak phase_duration
     # TESTUJEMY: rewardy
     max_time = 90
     agents = get_SmartAgents()
     for agent in agents:
         agent.orange_phase_duration = 0
     Globals().time = 0
     env = Env(agents)
     env.u = env_data.u_all_2
     for t in range(max_time):
         actions = [1, 1, 1]
         env.step(actions)
         # print(f't:{t} {env.global_rewards[t]}')
         if 0 <= t <= 2:
             self.assertEqual(
                 env.global_rewards[t],
                 [0, 0, 0])  # nic nie przeplywa jeszcze w ogole
         if 3 <= t <= 11:
             self.assertEqual(env.global_rewards[t][0],
                              2)  # jedynie agent 0 ma przeplyw 2
     exportData = ExportData(learningMethod='Monte Carlo TODO',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net4',
                             densityName='test_no_1')
     exportData.saveToJson()
示例#12
0
 def test_no_5_reshaping_pass_action_1_1_1_long_time_then_2_2_2_long_time_then_3_3_3(
         self):
     # TESTUJEMY: rewardy
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     Globals().gamma = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
         actions = [1, 1, 1]
         if t == 60 or 63 <= t < 70:
             actions = [2, 2, 2]
         if t == 61 or t == 62:
             actions = [0, 0, 0]
         if t == 70 or t >= 73:
             actions = [3, 3, 3]
         if t == 71 or t == 72:
             actions = [0, 0, 0]
         env.step(actions)
     env.agents[0].save_batch()
     env.agents[0].reshape_rewards()
     self.assertAlmostEqual(env.agents[0].memories[60].reward, 23.3, 0)
     self.assertAlmostEqual(env.agents[0].memories[61].reward, 37, 0)
     self.assertAlmostEqual(env.agents[0].memories[62].reward, 39.1, 0)
     env.update_global_memory_rewards()
     exportData = ExportData(learningMethod='Monte Carlo TODO',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net4',
                             densityName='test_no_5')
     exportData.saveToJson()
示例#13
0
def epoch_random(env):
    Globals().epsilon = 0
    agents: List[SmartAgent] = get_SmartAgents()
    for t in range(max_time):
        actions: List[ActionInt] = [random.choice(agent.local_action_space) for agent in agents]
        env.step(actions)
    return agents
示例#14
0
    def test_no_4_pass_action_1_1_1_long_time_then_2_2_2_long_time_then_3_3_3(
            self):
        # TESTUJEMY: actual_phase - czy jest ono zgodne rowniez w memories
        max_time = 90
        agents = get_SmartAgents()
        Globals().time = 0
        env = Env(agents)
        env.u = env_settings.u_all_2
        for t in range(max_time):
            # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
            actions = [1, 1, 1]
            if t == 60 or 63 <= t < 70:
                actions = [2, 2, 2]
            if t == 61 or t == 62:
                actions = [0, 0, 0]
            if t == 70 or t >= 73:
                actions = [3, 3, 3]
            if t == 71 or t == 72:
                actions = [0, 0, 0]
            env.step(actions)
            if t in range(60):
                self.assertEqual(
                    [agent.actual_phase for agent in agents], [1, 1, 1]
                )  # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60
                self.assertEqual(
                    [agent.memories[t].state.actual_phase
                     for agent in agents], [1, 1, 1]
                )  # Po operacji w chwili 60 mamy taka faze dla stanu w chwili 60
            if t == 60 or t == 61:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [0, 0, 0])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [0, 0, 0])
            if 69 >= t >= 62:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [2, 2, 2])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [2, 2, 2])
            if t == 70 or t == 71:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [0, 0, 0])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [0, 0, 0])
            if t >= 72:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [3, 3, 3])
                self.assertEqual(
                    [agent.memories[t].state.actual_phase for agent in agents],
                    [3, 3, 3])

        exportData = ExportData(learningMethod='Monte Carlo TODO',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='net4',
                                densityName='test_no_4')
        exportData.saveToJson()
示例#15
0
 def test_no_5_some_batch_data(self):
     agents = get_SmartAgents()
     actual_phase = 0
     orange_phase_duration = 2
     phase_duration = orange_phase_duration + 1
     den = [0, 0, 10, 8]
     den_pred = [2, 2, 2, 78]
     state_to_predict = LearningState(
         actual_phase=actual_phase,
         starting_actual_phase=actual_phase,
         phase_duration=phase_duration,
         global_densities=den_pred + [0, 0],
         densities=den,
         orange_phase_duration=orange_phase_duration)
     for i in range(150):
         # utrzymanie fazy - gorszy wybór
         action_0 = [0]
         env = single_simulate(agents,
                               actual_phase,
                               phase_duration,
                               den,
                               orange_phase_duration=orange_phase_duration,
                               actions=action_0,
                               u=env_settings.u_all_2)
         for x in range(40):
             simulate_from_env(env, action_0)
             action_1 = [1]
         # zmiana fazy - lepszy wybór
         # print('stan bazowy', env.x[env.t])
         # print(env.global_memories[-1])
         simulate_from_env(env, action_1)
         # print('stan', env.x[env.t])
         # print('stan po', env.x[env.t])
         simulate_from_env(env, [orange])
         # print('stan', env.x[env.t])
         simulate_from_env(env, [orange])
         # print('stan', env.x[env.t])
         for x in range(20):
             simulate_from_env(env, action_1)
             # print('stan', env.x[env.t])
         memories = env.global_memories
         memories += env.global_memories
         save_batches(agents)
         batches = get_batches()
         train(agents=agents)
         env.global_memories = []
         for agent in env.agents:
             agent.memories = []
         prediction = agents[0].model.predict(
             state_to_predict.to_learn_array())
         # zbiega to 10,20, cokolwiek
         print('prediction :)', prediction)
         exportData = ExportData(learningMethod='DQN',
                                 learningEpochs=0,
                                 nets=memories,
                                 netName='net14',
                                 densityName='test_fixed_no_4')
         exportData.saveToJson()
         a = 23
示例#16
0
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(200),
                           plotting=False):
    # learntAgents = True
    # save_json = True
    # plotting=True
    cars_outs = []
    rewards = []
    rewards_mean = []

    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=env_settings.u_all_4)
        for agent in env.agents:
            agent.reshape_rewards()
        env.update_memory_rewards()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net4',
                                    densityName='random_' + str(e))
            exportData.saveToJson()
        x_batch, y_batch = agents[0].memory_to_minibatch_with_oranges()
        if plotting:
            cars_outs.append(env.cars_out)
            print('rew', env.count_summed_rewards()[0])
            print('cars_out', env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])

    for i in range(len(agents)):
        # print('i',i)
        filename = 'static_files/x_batch_agent_' + str(i) + '.txt'
        x_batch, y_batch = agents[i].full_batch()
        np.savetxt(filename, x_batch, delimiter=',')
        filename = 'static_files/y_batch_agent_' + str(i) + '.txt'
        np.savetxt(filename, y_batch, delimiter=',')
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
示例#17
0
def train(learntAgents=True, max_time_learn=20,agents = None):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    for i in range(1):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 5000
        val_loss_best = 5000
        escape_flag = False
        escape_val = 0
        a = 0
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size,
                            initial_epoch=Globals().epochs_done,
                            epochs=Globals().epochs_done+Globals().epochs_learn,
                            verbose=0, validation_split=0.2, callbacks=[Globals().tensorboard,agents[i].weights_history_callback])
            Globals().epochs_done+=Globals().epochs_learn
            if res.history['val_loss'][-1] < val_loss_best:
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] > val_loss:
                escape_val += 1
                # print('escape_val',escape_val)
                # print('val loss',res.history['val_loss'][-1])
                if escape_val > 2:
                    escape_flag = True
                #     print('przerwalbym!!!!!!')
                # print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            if i == 0:
                x = [0, 0, 10, 15, 1, 0, 0, 0]
                pred = model.predict(np.array([x]))
                try:
                    diff = abs(pred[0][0] - Globals().pred_plot_memory[-1][0][0]) + abs(
                        pred[0][1] - Globals().pred_plot_memory[-1][0][1])
                    if a == 0:
                        # print('diff', diff)
                        a += 1
                except:
                    a = 23
                Globals().pred_plot_memory.append(pred)
        # print('najlepszy loss',val_loss_best)
        # print('koniec', model.get_weights())
        Globals().last_weights == model.get_weights()
        model.set_weights(weights_best)
        model.save('static_files/model-agent' + str(i) + '.h5')
def generate_my_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False,
                       actions=None, clear_memory=True,actual_number=''):
    save_front_json = True
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    if clear_memory:
        for agent in agents:
            agent.memories = []
    # print(agents[0].orange_phase_duration)
    for e in epochs:
        Globals().epsilon = 1
        env: Env = my_epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn),
                            time=Globals().vp().max_time_learn)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]]
        action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]]
        if save_front_json:
            save_front_json=False
            exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories,
                                    netName='net16',
                                    densityName='my_epochs' + str(Globals().greedy_run_no))
            exportData.saveToJson()
        env.remember_memory()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1
    save_batches(agents,actual_number)
    if plotting:
        plt.plot(cars_outs)
        plt.title('Ilość pojazdów opuszczających układ - losowe akcje')
        plt.savefig('img_cars_out_random.png')
        plt.close()
        plt.plot(rewards_mean)
        plt.title('Średnia nagroda za akcję - losowe akcje')
        plt.savefig('img_rewards_mean_random.png')
        plt.close()
        plt.plot(rewards)
        plt.title('Suma nagród - losowe akcje')
        plt.savefig('img_rewards_random.png')
        plt.close()
    # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1):
    #     print("weeeeeeeeeeee")
    return agents
示例#19
0
 def test_no_1_(self):
     epochs = range(1)
     xy_20_all = []
     for e in epochs:
         agents: List[SmartAgent] = get_SmartAgents()
         Globals().epsilon = 1
         env: Env = epoch(agents)
         env.u = env_settings.u_all_2
         for agent in env.agents:
             agent.reshape_rewards()
         env.update_memory_rewards()
         env.remember_memory()
示例#20
0
    def test_no_3_pass_action_0_0_0_long_time_then_1_1_1_long_time_then_2_2_2(self):
        # TESTUJEMY: zmiana faz, phase_duration
        max_time = 90
        agents = get_SmartAgents()
        Globals().time = 0
        env = Env(agents)
        env.u = env_settings.u_all_2
        orange = 'orange'
        for t in range(max_time):
            # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
            actions = [0, 0, 0]
            if t == 60 or 63 <= t < 70:
                actions = [1, 1, 1]
            if t == 61 or t == 62:
                actions = [orange, orange, orange]
            if t == 70 or t >= 73:
                actions = [2, 2, 2]
            if t == 71 or t == 72:
                actions = [orange, orange, orange]
            env.step(actions)
            if t == 60 or t == 61:
                self.assertEqual([agent.actual_phase for agent in agents], [orange, orange, orange])
            if 63 >= t >= 69:
                self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1])
            if t == 70 or t == 71:
                self.assertEqual([agent.actual_phase for agent in agents], [orange, orange, orange])
            if t >= 73:
                self.assertEqual([agent.actual_phase for agent in agents], [2, 2, 2])
            #
            #
            # phase_duration Testujemy
            if t == 60:
                self.assertEqual([agent.phase_duration for agent in agents], [0, 0, 0])
            if t == 61:
                self.assertEqual([agent.phase_duration for agent in agents], [1, 1, 1])
            if t == 62:
                self.assertEqual([agent.phase_duration for agent in agents], [2, 2,
                                                                              2])  # faza 0 trwa juz 2, przelaczylismy wlasnie na faze 0, ale nie musimy zerowac phase_duration
            if t == 63:
                self.assertEqual([agent.phase_duration for agent in agents], [3, 3, 3])  # w chwili
            if t == 70:
                self.assertEqual([agent.phase_duration for agent in agents], [0, 0, 0])
            if t == 71:
                self.assertEqual([agent.phase_duration for agent in agents], [1, 1, 1])
            if t == 72:
                self.assertEqual([agent.phase_duration for agent in agents], [2, 2, 2])
            if t == 73:
                self.assertEqual([agent.phase_duration for agent in agents], [3, 3, 3])

        exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories,
                                netName='net4',
                                densityName='test_no_3')
        exportData.saveToJson()
示例#21
0
def train(learntAgents=True, max_time_learn=20):
    if not learntAgents:
        agents = get_SmartAgents()
    else:
        print('get learnt!')
        agents = get_LearnSmartAgents()
        a = 7
    models = [agent.model for agent in agents]
    batches = get_batches()
    for i in range(1):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        val_loss = 5000
        escape_flag = False
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=100,
                            epochs=1,
                            verbose=0,
                            validation_split=0.2)
            if res.history['val_loss'][-1] > val_loss:
                escape_flag = True
                print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            if i == 0:
                x = [4, 40, 0]
                pred = model.predict(np.array([x]))
                Globals().pred_plot_memory.append(pred)
        model.save('static_files/model-agent' + str(i) + '.h5')
        if i == 0:
            plt.plot([pred[0][0] for pred in Globals().pred_plot_memory],
                     color='red',
                     label='0')
            plt.plot([pred[0][1] for pred in Globals().pred_plot_memory],
                     color='green',
                     label='1')
            plt.plot([pred[0][2] for pred in Globals().pred_plot_memory],
                     color='blue',
                     label='2')
            plt.legend()
            plt.title(
                'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu'
            )
            plt.savefig('foo' + str(Globals().run_no) + '.png')
            plt.close()
示例#22
0
    def test_learn_no_1_pass_action_1_1_1_long_time_then_2_2_2(self):
        # u - wplywaja co chwile 2 pojazdy wszedzie
        # caly czas dajemy akcje [1,1,1]
        # w momencie 60 dajemy akcje [2,2,2]
        # Przechodza wszystkie pojazdy w time stepie
        # orange_phase_duration = 0 tak samo jak phase_duration
        # TESTUJEMY: czy SmartAgenci sie ucza na podstawie poprawnych nagrod i stanow
        max_time = 90
        agents = get_SmartAgents()
        for agent in agents:
            agent.orange_phase_duration = 0
        Globals().time = 0
        env = Env(agents)
        env.u = env_data.u_all_2
        for t in range(max_time):
            actions = [1, 1, 1] if t < 60 else [2, 2, 2]
            env.step(actions)
            if t < 60:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [1, 1, 1])
            if t >= 60:
                self.assertEqual([agent.actual_phase for agent in agents],
                                 [2, 2, 2])
        self.assertEqual(agents[0].memories[60].action, 2)
        self.assertAlmostEqual(agents[0].memories[60].reward, 20.0, 0)

        # Moment 60
        real_state = agents[0].memories[
            60].state.to_learn_nd_array_densities_group()
        expected_state = np.array(
            [[1, 4, 1]])  # densities are 2.0,28.88,0.18 so groups are 1,5,1
        np.testing.assert_almost_equal(real_state, expected_state, decimal=0)

        # Moment 61
        real_state = agents[0].memories[
            61].state.to_learn_nd_array_densities_group()
        expected_state = np.array(
            [[1, 3, 1]])  # densities are 2.6,10.8,0.3 so groups are 1,5,1
        np.testing.assert_almost_equal(real_state, expected_state, decimal=0)

        Globals().batch_size = 90
        for agent in env.agents:
            agent.train()

        exportData = ExportData(learningMethod='Monte Carlo TODO',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='net4',
                                densityName='test_learn_no_2')
        exportData.saveToJson()
示例#23
0
 def test_no_4_batch_create(self):
     agents = get_SmartAgents()
     j = 1
     predictions = []
     while True:
         j += 1
         for agent in agents:
             agent.orange_phase_duration = 2
             agent.actual_phase = 0
             agent.starting_actual_phase = 0
             agent.memories = []
         action_1 = [1]
         action_0 = [0]
         env = Env(agents)
         for i in range(100):
             simulate_from_env(env, action_0)  # t=0
         for i in range(50):
             simulate_from_env(env, action_1)  # t=0
         exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories,
                                 netName='net14',
                                 densityName='test_batching_4')
         exportData.saveToJson()
         save_batches(agents)
         # train(agents=agents)
         pred = agents[0].model.predict(np.array([[3, 3, 3, 297, 1, 0]]))
         predictions.append(pred)
         batches = agents[0].full_batch_no_orange()
         agents[0].model.fit(np.array(batches[0]),np.array(batches[1]),epochs=1000,validation_split=0)
         a = 435
         ########### THAT IS OK
         # model = keras.models.Sequential()
         # model.add(keras.layers.Dense(7, activation='relu', input_dim=5))
         # model.add(keras.layers.Dense(12, activation='relu'))
         # model.add(keras.layers.Dense(10, activation='relu'))
         # model.add(keras.layers.Dense(2))
         # model.compile(loss='mse', optimizer=Adam())
         # y=batches[1]
         # model.fit(np.array(batches[0]),np.array(batches[1]),epochs=1,validation_split=0)
         # predictions=model.predict(np.array(batches[0]))
         # diffs = [(predictions[i] - y[i]) ** 2 for i in range(len(predictions))]
         # mse = sum(sum(diffs)) / len(predictions) / len(predictions[0])
         # print('mse',mse)
         ###########
         name = 'pred.png'
         plt.plot([pred[0][0] for pred in predictions], label='0')
         plt.plot([pred[0][1] for pred in predictions], label='1')
         plt.legend()
         plt.savefig(name)
         plt.close()
示例#24
0
def train(learntAgents=True,
          max_time_learn=60,
          agents=None,
          shuffle=True,
          batches=None,
          actual_number=''):
    if agents is None:
        if not learntAgents:
            agents = get_SmartAgents()
        else:
            agents = get_LearnSmartAgents()
    if batches is None:
        batches = get_batches(agents, actual_number)
    models = [agent.model for agent in agents]
    for i in range(len(agents)):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        weights_best = model.get_weights()
        val_loss = 10**10
        val_loss_best = 10**10
        escape_flag = False
        escape_val = 0
        start_flag = True
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=Globals().vp().batch_size,
                            initial_epoch=Globals().epochs_learn_done,
                            epochs=Globals().epochs_learn_done +
                            Globals().vp().epochs_learn,
                            validation_split=0.2,
                            verbose=0)
            Globals().epochs_learn_done += Globals().vp().epochs_learn
            if start_flag:
                start_flag = False
            if res.history['val_loss'][-1] < val_loss_best:
                val_loss_best = res.history['val_loss'][-1]
                weights_best = model.get_weights()
            if res.history['val_loss'][-1] >= val_loss:
                escape_val += 1
                if escape_val > 10:
                    escape_flag = True
                val_loss = 10**10
            else:
                val_loss = res.history['val_loss'][-1]
        model.set_weights(weights_best)
        model.save('static_files/model-agent' + str(i) + '.h5')
示例#25
0
 def test_no_0_pass_action_0_all_time(self):
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         if t >= 3:
             self.assertEqual([agent.actual_phase for agent in agents], [0])
         actions = [0]
         env.step(actions)
     exportData = ExportData(learningMethod='None', learningEpochs=0, nets=env.global_memories,
                             netName='net11',
                             densityName='test_no_0')
     exportData.saveToJson()
示例#26
0
def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1),
                           plotting=False,
                           u=Globals().u,
                           clear_memory=True):
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    if clear_memory:
        for agent in agents:
            agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents, u=u)
        if reshaping:
            for agent in env.agents:
                agent.reshape_rewards()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_updated' + str(e))
            exportData.saveToJson()
        env.remember_memory()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='net14',
                                    densityName='random_' + str(e))

            exportData.saveToJson()
        if plotting:
            cars_outs.append(env.cars_out)
            rewards.append(env.count_summed_rewards()[0])
            rewards_mean.append(env.count_summed_rewards()[1])
        Globals().actual_epoch_index += 1
    save_batches(agents)
    return agents
示例#27
0
 def test_no_1_000_then_111(self):
     # Testujemy dla akcji ktora rzeczywiscie byla i powinno przewidziec bardzo dobrze
     agents: List[SmartAgent] = get_SmartAgents()
     env = Env(agents)
     env.u = env_settings.u_all_2
     max_time = 15
     Globals().time = 0
     for t in range(max_time):
         actions = [0, 0, 0]
         if t == 60 or t >= 63:
             actions = [1, 1, 1]
         if t == 61 or t == 62 or t == 1 or t == 2:
             actions = [orange, orange, orange]
         env.step(actions)
         if t == 10:
             dry_run(env, env.agents, [1, 2, 1])
     for agent in agents:
         agent.reshape_rewards()
示例#28
0
 def test_no_1_pass_action_1_1_1_all_time(self):
     # TESTUJEMY: caly czas powinna byc aktualna faza = [1,1,1]
     max_time = 90
     agents = get_SmartAgents()
     for agent in agents:
         agent.orange_phase_duration = 2
     Globals().time = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         if t >= 3:
             self.assertEqual([agent.actual_phase for agent in agents], [1, 1, 1])
         actions = [1, 1, 1]
         env.step(actions)
     exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories,
                             netName='net4',
                             densityName='test_no_1')
     exportData.saveToJson()
示例#29
0
 def test_no_5_reshaping_pass_action_0_0_0_long_time_then_1_1_1_long_time_then_2_2_2(self):
     # TESTUJEMY - starting_actual_phase
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     Globals().gamma = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         actions = [0, 0, 0]
         if t == 60 or 63 <= t < 70:
             actions = [1, 1, 1]
         if t == 61 or t == 62:
             actions = [orange, orange, orange]
         if t == 70 or t >= 73:
             actions = [2, 2, 2]
         if t == 71 or t == 72:
             actions = [orange, orange, orange]
         env.step(actions)
         print(f't:{t}, actual_phase:{agents[0].actual_phase}')
         if 3 <= t < 60:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [0, 0, 0])
             self.assertEquals([agent.actual_phase for agent in agents], [0, 0, 0])
         if t == 60:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [0, 0, 0])
             self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange])
         if t == 61:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange])
             self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange])
         if t == 62:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange])
             self.assertEquals([agent.actual_phase for agent in agents], [1, 1, 1])
         if 63 <= t <= 69:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [1, 1, 1])
             self.assertEquals([agent.actual_phase for agent in agents], [1, 1, 1])
         if t == 70:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [1, 1, 1])
             self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange])
         if t == 71:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange])
             self.assertEquals([agent.actual_phase for agent in agents], [orange, orange, orange])
         if t == 72:
             self.assertEquals([agent.starting_actual_phase for agent in agents], [orange, orange, orange])
             self.assertEquals([agent.actual_phase for agent in agents], [2, 2, 2])
示例#30
0
    def test_no_6_starting_phase_actual_phase(self):
        # Testujemy stargin_actual_phase - jest potrzebne do learningState
        agents: List[SmartAgent] = get_SmartAgents()
        for agent in agents:
            agent.yellow_phase_duration = 2
        env = Env(agents)
        env.yellow_phase_duration = 2
        max_time = 90
        Globals().time = 0
        for t in range(max_time):
            # actions = [0] if t < 60 elif 60 == t [1]
            actions = [0] * 3
            if t == 1 or t == 2:
                actions = [yellow] * 3
            if t == 60 or t >= 63:
                actions = [1] * 3
            if t == 61 or t == 62 or t == 71 or t == 72:
                actions = [yellow] * 3
            if t == 70 or t >= 72:
                actions = [0] * 3
            env.step(actions)
        for agent in agents:
            agent.reshape_rewards()
        self.assertEqual(agents[0].memories[60].state.starting_actual_phase, 0)
        self.assertEqual(agents[0].memories[61].state.starting_actual_phase,
                         yellow)
        self.assertEqual(agents[0].memories[62].state.starting_actual_phase,
                         yellow)
        self.assertEqual(agents[0].memories[63].state.starting_actual_phase, 1)

        self.assertEqual(agents[0].memories[70].state.starting_actual_phase, 1)
        self.assertEqual(agents[0].memories[71].state.starting_actual_phase,
                         yellow)
        self.assertEqual(agents[0].memories[72].state.starting_actual_phase,
                         yellow)
        self.assertEqual(agents[0].memories[73].state.starting_actual_phase, 0)

        exportData = ExportData(learningMethod='Monte Carlo TODO',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='net14',
                                densityName='test_no_6')
        exportData.saveToJson()