def generate_random_epochs(learntAgents=False,
                           save_front_json=False,
                           epochs=range(1)):
    reshaping = True
    cars_outs = []
    rewards = []
    rewards_mean = []
    if learntAgents:
        agents: List[SmartAgent] = get_LearnSmartAgents()
    else:
        agents: List[SmartAgent] = get_SmartAgents()
    for agent in agents:
        agent.memories = []
    for e in epochs:
        Globals().epsilon = 1
        env: Env = epoch(agents,
                         u=Globals().get_u(Globals().vp.max_time_learn),
                         time=Globals().vp.max_time_learn)
        for agent in env.agents:
            agent.reshape_rewards()
        if save_front_json:
            exportData = ExportData(learningMethod='DQN',
                                    learningEpochs=0,
                                    nets=env.global_memories,
                                    netName='politechnika',
                                    densityName='random_now' +
                                    str(Globals().greedy_run_no))
            exportData.saveToJson()
        env.remember_memory()
    save_batches(agents)
    return agents
示例#2
0
def epoch_greedy(env):
    Globals().time = 0
    Globals().epsilon = 0
    for t in range(max_time):
        actions: List[ActionInt] = [agent.get_action(agent.local_state) for agent in env.agents]
        env.step(actions)
    return env
示例#3
0
 def full_batch(self, only_learn_usable=False):
     x_batch = []
     y_batch = []
     i = 0
     l_rate = Globals().learning_rate
     gamma = Globals().vp().gamma
     memories = self.memories if not only_learn_usable else [
         mem for mem in self.memories if mem.learn_usable
     ]
     for memory in memories:
         state = memory.state.to_learn_array()
         action = 2 if memory.action == 'orange' else memory.action
         y_target = self.model.predict(state)
         new_state_possible_actions_value_predictions = self.model.predict(
             memory.new_state.to_learn_array())
         if memory.action == 1 and memory.new_state.actual_phase == 'orange':
             print('akcja 1, nastepnej akcji value',
                   new_state_possible_actions_value_predictions)
             print('')
         max_next_action_value = max(
             new_state_possible_actions_value_predictions[0]
         ) if memory.state.starting_actual_phase != 'orange' else new_state_possible_actions_value_predictions[
             0][-1]
         target = (1 - l_rate) * y_target[0][action] + l_rate * (
             memory.reward + gamma * max_next_action_value)
         i += 1
         y_target[0][action] = target
         x_batch.append(state[0])
         y_batch.append(y_target[0])
     return x_batch, y_batch
示例#4
0
def epoch_greedy(env) -> Env:
    Globals().time = 0
    actions_count_0 = [0, 0, 0, 0, 0]
    actions_count_1 = [0, 0, 0, 0, 0]
    actions_count_2 = [0, 0, 0, 0, 0]
    actions_count_3 = [0, 0, 0, 0, 0]
    for t in range(Globals().vp.max_time_greedy):
        actions: List[ActionInt] = [
            agent.get_action(state=agent.local_state, greedy=True)
            for agent in env.agents
        ]
        env.step(actions)
        if actions[0] != yellow:
            actions_count_0[int(actions[0])] += 1
        else:
            actions_count_0[-1] += 1
        if actions[1] != yellow:
            actions_count_1[int(actions[1])] += 1
        else:
            actions_count_1[-1] += 1
        if actions[2] != yellow:
            actions_count_2[int(actions[2])] += 1
        else:
            actions_count_2[-1] += 1
        if actions[3] != yellow:
            actions_count_3[int(actions[3])] += 1
        else:
            actions_count_3[-1] += 1
    return env.u
    print('akcje podjete',
          [actions_count_0, actions_count_1, actions_count_2, actions_count_3])
示例#5
0
 def test_no_5_reshaping_pass_action_1_1_1_long_time_then_2_2_2_long_time_then_3_3_3(
         self):
     # TESTUJEMY: rewardy
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     Globals().gamma = 0
     env = Env(agents)
     env.u = env_settings.u_all_2
     for t in range(max_time):
         # actions = [1, 1, 1] if t < 60 elif 60 == t [2, 2, 2]
         actions = [1, 1, 1]
         if t == 60 or 63 <= t < 70:
             actions = [2, 2, 2]
         if t == 61 or t == 62:
             actions = [0, 0, 0]
         if t == 70 or t >= 73:
             actions = [3, 3, 3]
         if t == 71 or t == 72:
             actions = [0, 0, 0]
         env.step(actions)
     env.agents[0].save_batch()
     env.agents[0].reshape_rewards()
     self.assertAlmostEqual(env.agents[0].memories[60].reward, 23.3, 0)
     self.assertAlmostEqual(env.agents[0].memories[61].reward, 37, 0)
     self.assertAlmostEqual(env.agents[0].memories[62].reward, 39.1, 0)
     env.update_global_memory_rewards()
     exportData = ExportData(learningMethod='Monte Carlo TODO',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net4',
                             densityName='test_no_5')
     exportData.saveToJson()
示例#6
0
def run_learnt_greedy(saveJson=True):
    Globals().cars_out_memory = []
    model_file_names = [
        'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
        'static_files/model-agent2.h5', 'static_files/model-agent3.h5'
    ]
    agents = get_LearnSmartAgents(model_file_names)
    env = Env(agents)
    u = epoch_greedy(env)
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='env4',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    maximum_possible_cars_out = Globals().u_value * Globals(
    ).vp.max_time_greedy * 8
    cars_out_percentage = round(100 * cars_out / maximum_possible_cars_out, 2)
    print(
        f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewar'
        f'ds_sum:{round(rewards_sum, 0)}. Do układu wjechało {round(sum(sum(u)), 0)} pojazdów.'
        f' Wyjechało {round(cars_out, 0)}. Układ opuściło pr'
        f'ocentowo pojazdów:{cars_out_percentage}')
    Globals().greedy_run_no += 1
    return rewards_mean, rewards_sum, cars_out, agents, sum(
        sum(u)), cars_out_percentage
示例#7
0
 def test_no_2_pass_action_0_long_time_then_1(self):
     # TESTUJEMY: zmiana faz
     max_time = 90
     agents = get_SmartAgents()
     for agent in agents:
         agent.yellow_phase_duration = 2
     Globals().time = 0
     env = Env(agents)
     Globals().u_value = 2
     env.u = Globals().get_u(max_time)
     env.yellow_phase_duration = 2
     for t in range(max_time):
         actions = [0, 0, 0]
         if t == 60 or t > 62:
             actions = [1, 1, 1]
         if t == 61 or t == 62:
             actions = [yellow, yellow, yellow]
         time = Globals().time  # time = t
         env.step(actions)
         time = Globals().time  # time = t + 1
         if t in range(3, 60):
             self.assertEqual([agent.actual_phase for agent in agents],
                              [0, 0, 0])
         if t == 60 or t == 61:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [yellow, yellow, yellow])
         if t >= 62:
             self.assertEqual([agent.actual_phase for agent in agents],
                              [1, 1, 1])
         exportData = ExportData(learningMethod='Nothing',
                                 learningEpochs=0,
                                 nets=env.global_memories,
                                 netName='net14',
                                 densityName='test_no_2')
         exportData.saveToJson()
示例#8
0
def run_learnt_greedy(saveJson=True):
    model_file_names = [
        'static_files/model-agent0.h5', 'static_files/model-agent1.h5',
        'static_files/model-agent2.h5'
    ]
    agents = get_LearnSmartAgents(model_file_names)
    env = Env(agents)
    epoch_greedy(env)
    rewards_sum, rewards_mean = count_rewards(env)
    cars_out = env.cars_out
    if saveJson:
        exportData = ExportData(learningMethod='DQN',
                                learningEpochs=0,
                                nets=env.global_memories,
                                netName='env3',
                                densityName='learnt_' +
                                str(Globals().greedy_run_no))
        exportData.saveToJson()
    maximum_possible_cars_out = Globals().u_value * Globals().vp(
    ).max_time_greedy * 3
    print(
        f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewards_sum:{round(rewards_sum,0)} cars_out:{round(cars_out, 0)} układ opuściło procentowo pojazdów:{cars_out / maximum_possible_cars_out}'
    )
    Globals().greedy_run_no += 1

    return rewards_mean, rewards_sum, cars_out, agents
示例#9
0
 def save_batch(self):
     gamma = Globals().gamma
     batch_size = Globals().batch_size
     # minibatch = random.sample(self.memories, batch_size)
     x_batch = []
     y_batch = []
     i = 0
     for memory in self.memories:
         if memory.action == 0:
             continue
         i += 1
         state = memory.state.to_9_densities_learn_array()
         new_state = memory.new_state.to_9_densities_learn_array()
         y = self.model.predict(state)
         future_actions_values_predictions = self.model.predict(new_state)
         possible_actions = memory.state.possible_actions(
             self.orange_phase_duration)
         best_possible_future_action_value = np.amax([
             future_actions_values_predictions[0][i]
             for i in possible_actions
         ])
         target_action = (
             memory.reward +
             gamma *  # (target) = reward + (discount rate gamma) *
             best_possible_future_action_value
         )  # (maximum target Q based on future action a')
         # so this is the q value for action made in state leading to new_state
         # counted basing on - reward and reward of future best action
         y[0][memory.action] = target_action
         x_batch.append(state[0])
         y_batch.append(y[0])
         if self.index == 0:
             Globals().x_batch.append(state[0])
             Globals().y_batch.append(y[0])
示例#10
0
def plot_pred_memory(name=str(Globals().run_no)):
    plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0')
    plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1')
    plt.legend()
    plt.title('Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu')
    plt.savefig('plot' + name + '.png')
    plt.close()
示例#11
0
def train(learntAgents=True, max_time_learn=20):
    if not learntAgents:
        agents = get_SmartAgents()
    else:
        agents = get_LearnSmartAgents()
    models = [agent.model for agent in agents]
    batches = get_batches()
    start_time = timer()
    x_batch = batches[0]['x_batch']
    y_batch = batches[0]['y_batch']
    model = models[0]
    val_loss = 5000
    escape_flag = False
    while timer() - start_time < max_time_learn and not escape_flag:
        res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2)
        if res.history['val_loss'][-1] > val_loss:
            escape_flag = True
            loss = res.history['val_loss'][-1]
            print(f'wynik sieci: {loss} straty')
            val_loss = 5000
        else:
            val_loss = res.history['val_loss'][-1]
        x = [4, 20]
        pred = model.predict(np.array([x]))
        Globals().pred_plot_memory.append(pred)
    model.save('static_files/model-agent' + str(0) + '.h5')
    plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0')
    plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1')
    plt.legend()
    plt.title('Nagrody przewidziane dla akcji podjętych \n podczas monitorowanego stanu [4, 20]')
    plt.savefig('images_generated/state_predictions.png')
    plt.close()
示例#12
0
 def save_batch(self):
     gamma = Globals().gamma
     batch_size = Globals().batch_size
     # minibatch = random.sample(self.memories, batch_size)
     x_batch = []
     y_batch = []
     i = 0
     for memory in self.memories:
         if memory.action == 'orange':
         # if memory.action == 'orange' or memory.state.starting_actual_phase == 'orange':
             continue
         i += 1
         print('i',i)
         state = memory.state.to_9_densities_learn_array()
         print('state', state)
         print('memory',memory)
         y = self.model.predict(state)
         target_action = memory.reward
         # target_action = (memory.reward + gamma *  # (target) = reward + (discount rate gamma) *
         #                  best_possible_future_action_value)  # (maximum target Q based on future action a')
         # so this is the q value for action made in state leading to new_state
         # counted basing on - reward and reward of future best action
         y[0][memory.action] = target_action
         x_batch.append(state[0])
         y_batch.append(y[0])
         if self.index == 0:
             Globals().x_batch.append(state[0])
             Globals().y_batch.append(y[0])
示例#13
0
文件: Env.py 项目: johny1614/magazyn
 def __attrs_post_init__(self):
     Globals().time = 0
     max_time = Globals().vp.max_time_greedy
     self.u = Globals().get_u(max_time)
     self.A = []
     self.cars_out = 0
     self.assign_local_states_to_agents()
示例#14
0
def epoch_random(env) -> Env:
    Globals().epsilon = 0
    agents: List[SmartAgent] = get_SmartAgents()
    for t in range(Globals().vp().max_time_learn):
        actions: List[ActionInt] = [random.choice(agent.local_action_space) for agent in agents]
        env.step(actions)
    return Env
示例#15
0
 def test_no_5_reshaping_pass_action_0_1_0(self):
     # TESTUJEMY: rewardy
     max_time = 90
     agents = get_SmartAgents()
     Globals().time = 0
     Globals().gamma = 0
     env = Env(agents)
     env.u = env_settings.u_all_9
     for t in range(max_time):
         # actions = [0,0,0] if t < 60 elif 60 == t [1,1,1]
         actions = [0]
         if t == 1 or t == 2:
             actions = [orange]
         if t == 60 or 63 <= t < 70:
             actions = [1]
         if t == 61 or t == 62:
             actions = [orange]
         if t == 70 or t >= 73:
             actions = [0]
         if t == 71 or t == 72:
             actions = [orange]
         env.step(actions)
     env.agents[0].save_batch()
     env.agents[0].reshape_rewards()
     # self.assertAlmostEqual(env.agents[0].memories[60].reward, 2, 0)
     # self.assertAlmostEqual(env.agents[0].memories[61].reward, 4, 0)
     # self.assertAlmostEqual(env.agents[0].memories[62].reward, 6, 0)
     # env.update_memory_rewards()
     exportData = ExportData(learningMethod='Monte Carlo TODO',
                             learningEpochs=0,
                             nets=env.global_memories,
                             netName='net11',
                             densityName='test_no_6')
     exportData.saveToJson()
示例#16
0
文件: Env.py 项目: johny1614/magazyn
 def save_motions(self):
     old_time = Globals().time - 1
     new_time = Globals().time
     self.last_flows = []
     for agent in self.agents:
         actual_moves = (
         ) if agent.actual_phase == 'orange' else agent.moves[
             agent.actual_phase]
         for move in actual_moves:
             if move[0] == 404:
                 continue
             A_cell = self.A[Globals().time - 1][move]
             section_from_index = move[1]
             previous_density = self.x[self.t - 1][section_from_index]
             value = A_cell * previous_density
             flow = {
                 'agent_index': agent.index,
                 'old_time': old_time,
                 'new_time': new_time,
                 'move': move,
                 'value': value
             }
             self.last_flows.append(flow)
     for flow in self.last_flows:
         self.flow_memories.append(flow)
示例#17
0
def train(learntAgents=True, max_time_learn=20):
    l_rate = 0.0001
    layers = [15, 25, 20, 15]
    optimizer = 'relu'
    regularizers_ = [0.2, 0.2, 0.2]
    print('train learntAgents', learntAgents)
    agents = get_LearnSmartAgents()

    # create_model(layers, optimizer, l_rate)
    # for i in range(3)
    models = [agent.model for agent in agents]
    batches = get_batches()
    # for i in range(len(models)):
    for i in range(3):
        start_time = timer()
        x_batch = batches[i]['x_batch']
        y_batch = batches[i]['y_batch']
        model = models[i]
        x2 = []
        y2 = []
        val_loss = 5000
        escape_flag = False
        while timer() - start_time < max_time_learn and not escape_flag:
            res = model.fit(x_batch,
                            y_batch,
                            batch_size=100,
                            epochs=1,
                            verbose=0,
                            validation_split=0.2)
            if res.history['val_loss'][-1] > val_loss:
                escape_flag = True
                print('wynik sieci', res.history['val_loss'][-1])
                val_loss = 5000
            else:
                val_loss = res.history['val_loss'][-1]
            # res = model.fit(np.array(x2), np.array(y2), batch_size=20, epochs=1, verbose=0)
            if i == 0:
                # x = [7, 10, 10] + [10, 10, 20] + [6, 5, 4] + [2]
                x = [4, 4, 62] + [10, 10, 49] + [0, 10, 10] + [0]
                pred = model.predict(np.array([x]))
                Globals().pred_plot_memory.append(pred)
            # model.evaluate(np.array(x2), np.array(y2))
        model.save('static_files/model-agent' + str(i) + '.h5')
        if i == 0:
            plt.plot([pred[0][0] for pred in Globals().pred_plot_memory],
                     color='red',
                     label='0')
            plt.plot([pred[0][1] for pred in Globals().pred_plot_memory],
                     color='green',
                     label='1')
            plt.plot([pred[0][2] for pred in Globals().pred_plot_memory],
                     color='blue',
                     label='2')
            plt.legend()
            plt.title(
                'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu'
            )
            plt.savefig('foo' + str(Globals().run_no) + '.png')
            plt.close()
示例#18
0
 def remember(self, densities, reward):
     state = self.local_state
     action = self.action
     self.assign_local_state(densities)
     new_state = self.local_state
     times = {'old': Globals().time - 1, 'new': Globals().time}
     memory = Memory(state=state, action=action, new_state=new_state, reward=reward, times=times)
     self.memories.append(memory)
示例#19
0
 def __attrs_post_init__(self):
     if self.model == 0:
         l_rate = Globals().vp().nn_l_rate
         layers = Globals().vp().layers
         activation = 'relu'
         self.model = self._build_model(layers=layers,
                                        activation=activation,
                                        l_rate=l_rate)
示例#20
0
def epoch():
    Globals().time = 0
    env = Env(agents)
    for t in range(max_time):
        actions: List[ActionInt] = getActions(t)
        # actions = best_actions[t] if len(best_actions)>=t else [0,0,0]
        env.step(actions)
    Globals().epochs_done += 1
    return env
示例#21
0
def epoch(agents):
    Globals().time = 0
    env = Env(agents)
    for t in range(max_time):
        actions: List[ActionInt] = [random.choice(agent.local_action_space) for agent in agents]
        # actions = best_actions[t] if len(best_actions)>=t else [0,0,0]
        env.step(actions)
    Globals().epochs_done += 1
    return env
def epoch(agents, u=env_settings.u_all_2):
    Globals().time = 0
    env = Env(agents)
    env.u = u
    for t in range(max_time):
        actions: List[ActionInt] = [agent.get_action(agent.local_state) for agent in agents]
        env.step(actions)
    Globals().epochs_done += 1
    return env
def epoch_random(agents, u=Globals().u):
    Globals().time = 0
    env = Env(agents)
    env.u = u
    for t in range(max_time):
        actions: List[ActionInt] = [agent.get_action(state=agent.local_state,full_random=True) for agent in agents]
        env.step(actions)
    Globals().epochs_done += 1
    return env
示例#24
0
 def __attrs_post_init__(self):
     self.weights_history_callback = LambdaCallback(
         on_epoch_end=self.add_weight_history)
     if self.model == 0:
         l_rate = Globals().vp().nn_l_rate
         layers = Globals().vp().layers
         activation = 'relu'
         self.model = self._build_model(layers=layers,
                                        activation=activation,
                                        l_rate=l_rate)
示例#25
0
 def add_returns(self, G):
     for i in range(len(G)):
         if ((self.epoch_local_state_storage[i],
              self.epoch_local_action_storage[i]) in self.returns.keys()):
             self.returns[self.epoch_local_state_storage[i],
                          self.epoch_local_action_storage[i]].append(G[i])
             Globals().state_repeats += 1
         else:
             Globals().new_states += 1
             self.returns[self.epoch_local_state_storage[i],
                          self.epoch_local_action_storage[i]] = [G[i]]
示例#26
0
 def remember_global_memory(self):
     times = Times(old_time=Globals().time - 1, new_time=Globals().time)
     actions = [agent.action for agent in self.agents]
     rewards = self.global_rewards[self.t - 1]
     densities = self.x[self.t - 1]
     lights = self.A[self.t-1]
     net = Net(times=times, densities=densities,
               rewards=rewards,
               actions=actions,
               lights=lights)
     self.global_memories.append(net)
示例#27
0
 def save_batch(self):
     x_batch = []
     y_batch = []
     for memory in self.memories:
         if memory.action == 'orange':
             continue
         state = memory.state.to_learn_array()
         y = memory.reward
         x_batch.append(state[0])
         y_batch.append(y)
         if self.index == 0:
             Globals().x_batch.append(state[0])
             Globals().y_batch.append(y)
示例#28
0
def epoch(agents, time, u=None):
    if u is None:
        u = Globals().get_u(time)
    Globals().time = 0
    env = Env(agents)
    env.u = u
    for t in range(time):
        actions: List[ActionInt] = [agent.get_action(agent.local_state) for agent in agents]
        if actions[0] != yellow:
            Globals().actions_memory[int(actions[0])] += 1
        env.step(actions)
    Globals().epochs_learn_done += 1
    return env
示例#29
0
 def get_action(self, state):
     if state.to_learn_tuple_used()[-1] == 'orange' and Globals().time != 0:
         return 'orange'
     s = state.to_learn_tuple_used()
     if random.random() < Globals().epsilon:
         random_action = random.choice([0, 1])
         return random_action
     if s not in self.Pi:
         self.Pi[s] = random.choice([0, 1])
         # print(f'time {Globals().time} wylosowane a: {self.Pi[s]}')
     # else:
     #     print(f'time {Globals().time} mamy a: {self.Pi[s]}')
     return self.Pi[s]
示例#30
0
def plot_pred_memory(no):
    plt.plot([pred[0][0] for pred in Globals().pred_plot_memory],
             color='red',
             label='0')
    plt.plot([pred[0][1] for pred in Globals().pred_plot_memory],
             color='green',
             label='1')
    plt.legend()
    plt.title(
        'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu'
    )
    plt.savefig('images_generated/rewards_' + no + '.png')
    plt.close()