def run_learnt_greedy(saveJson=True): Globals().cars_out_memory = [] model_file_names = [ 'static_files/model-agent0.h5', 'static_files/model-agent1.h5', 'static_files/model-agent2.h5', 'static_files/model-agent3.h5' ] agents = get_LearnSmartAgents(model_file_names) env = Env(agents) u = epoch_greedy(env) rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out if saveJson: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='env4', densityName='learnt_' + str(Globals().greedy_run_no)) exportData.saveToJson() maximum_possible_cars_out = Globals().u_value * Globals( ).vp.max_time_greedy * 8 cars_out_percentage = round(100 * cars_out / maximum_possible_cars_out, 2) print( f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewar' f'ds_sum:{round(rewards_sum, 0)}. Do układu wjechało {round(sum(sum(u)), 0)} pojazdów.' f' Wyjechało {round(cars_out, 0)}. Układ opuściło pr' f'ocentowo pojazdów:{cars_out_percentage}') Globals().greedy_run_no += 1 return rewards_mean, rewards_sum, cars_out, agents, sum( sum(u)), cars_out_percentage
def draw_predictions(no): agents = get_LearnSmartAgents() to_predict = [] for den0 in range(60): for den1 in range(60): to_predict.append([den0 * 2, den1 * 2, 0]) predictions = agents[0].model.predict( np.array(to_predict)) # tylko dla stanow z faza 0 dots_action_0 = [] dots_action_1 = [] dots_action_orange = [] for i in range(len(predictions)): pred = predictions[i] to_predict_state = to_predict[i] best_action_predicted = np.argmax(pred[:-1]) if best_action_predicted == 0: dots_action_0.append(to_predict_state) if best_action_predicted == 1: dots_action_1.append(to_predict_state) if best_action_predicted == 2: dots_action_orange.append(to_predict_state) # os x to zerowe den os y to pierwsze den plt.plot([den[0] for den in dots_action_0], [den[1] for den in dots_action_0], 'go') plt.plot([den[0] for den in dots_action_1], [den[1] for den in dots_action_1], 'ro') # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo') plt.savefig('predictions' + str(no) + '.png') plt.close()
def train(learntAgents=True, max_time_learn=20): if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() models = [agent.model for agent in agents] batches = get_batches() start_time = timer() x_batch = batches[0]['x_batch'] y_batch = batches[0]['y_batch'] model = models[0] val_loss = 5000 escape_flag = False while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2) if res.history['val_loss'][-1] > val_loss: escape_flag = True loss = res.history['val_loss'][-1] print(f'wynik sieci: {loss} straty') val_loss = 5000 else: val_loss = res.history['val_loss'][-1] x = [4, 20] pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) model.save('static_files/model-agent' + str(0) + '.h5') plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.legend() plt.title('Nagrody przewidziane dla akcji podjętych \n podczas monitorowanego stanu [4, 20]') plt.savefig('images_generated/state_predictions.png') plt.close()
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1)): reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.memories = [] for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=Globals().get_u(Globals().vp.max_time_learn), time=Globals().vp.max_time_learn) for agent in env.agents: agent.reshape_rewards() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='politechnika', densityName='random_now' + str(Globals().greedy_run_no)) exportData.saveToJson() env.remember_memory() save_batches(agents) return agents
def run_learnt_greedy(saveJson=False): Globals().cars_out_memory = [] Globals().cars_in_memory = [] saveJson = True model_file_names = [ 'static_files/model-agent0.h5', 'static_files/model-agent1.h5', 'static_files/model-agent2.h5', 'static_files/model-agent3.h5' ] agents = get_LearnSmartAgents(model_file_names) # print('weights!',agents[0].model.weights[0]) env = Env(agents) epoch_greedy(env) # env.update_memory_rewards() # TODO czy to mozna odkomentowac? rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out print('cars_out', cars_out) if saveJson: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='polibuda', densityName='learnt_' + str(Globals().greedy_run_no)) exportData.saveToJson() # print('exported') maximum_possible_cars_out = Globals().u_value * Globals().vp( ).max_time_greedy * 8 print('memory losowych', Globals().actions_memory) print('max greedy', max([max(x) for x in env.x])) print( f'gready run {Globals().greedy_run_no} - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out} procentowo:{float(cars_out)/maximum_possible_cars_out}' ) Globals().greedy_run_no += 1 return rewards_mean, rewards_sum, cars_out, agents
def run_learnt_greedy(saveJson=True): model_file_names = [ 'static_files/model-agent0.h5', 'static_files/model-agent1.h5', 'static_files/model-agent2.h5' ] agents = get_LearnSmartAgents(model_file_names) env = Env(agents) epoch_greedy(env) rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out if saveJson: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='env3', densityName='learnt_' + str(Globals().greedy_run_no)) exportData.saveToJson() maximum_possible_cars_out = Globals().u_value * Globals().vp( ).max_time_greedy * 3 print( f'gready run {Globals().greedy_run_no} - rewards_mean:{round(rewards_mean, 2)} rewards_sum:{round(rewards_sum,0)} cars_out:{round(cars_out, 0)} układ opuściło procentowo pojazdów:{cars_out / maximum_possible_cars_out}' ) Globals().greedy_run_no += 1 return rewards_mean, rewards_sum, cars_out, agents
def train(learntAgents=True, max_time_learn=20): l_rate = 0.0001 layers = [15, 25, 20, 15] optimizer = 'relu' regularizers_ = [0.2, 0.2, 0.2] print('train learntAgents', learntAgents) agents = get_LearnSmartAgents() # create_model(layers, optimizer, l_rate) # for i in range(3) models = [agent.model for agent in agents] batches = get_batches() # for i in range(len(models)): for i in range(3): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] x2 = [] y2 = [] val_loss = 5000 escape_flag = False while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2) if res.history['val_loss'][-1] > val_loss: escape_flag = True print('wynik sieci', res.history['val_loss'][-1]) val_loss = 5000 else: val_loss = res.history['val_loss'][-1] # res = model.fit(np.array(x2), np.array(y2), batch_size=20, epochs=1, verbose=0) if i == 0: # x = [7, 10, 10] + [10, 10, 20] + [6, 5, 4] + [2] x = [4, 4, 62] + [10, 10, 49] + [0, 10, 10] + [0] pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) # model.evaluate(np.array(x2), np.array(y2)) model.save('static_files/model-agent' + str(i) + '.h5') if i == 0: plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.plot([pred[0][2] for pred in Globals().pred_plot_memory], color='blue', label='2') plt.legend() plt.title( 'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu' ) plt.savefig('foo' + str(Globals().run_no) + '.png') plt.close()
def draw_predictions(no): agents = get_LearnSmartAgents() to_predict = [] for den0 in range(25): for den1 in range(25): for den2 in range(25): for den3 in range(25): to_predict.append([den0, den1, den2, den3, 0]) # na razie dla fazy 0 predictions = agents[0].model.predict(np.array(to_predict)) dots_action_0 = [] dots_action_1 = [] dots_action_orange = [] for i in range(len(predictions)): pred = predictions[i] to_predict_state = to_predict[i] best_action_predicted = np.argmax(pred) if best_action_predicted == 0: dots_action_0.append(to_predict_state) if best_action_predicted == 1: dots_action_1.append(to_predict_state) if best_action_predicted == 2: dots_action_orange.append(to_predict_state) # os x to den z gory os y to den z dolu fig, ax = plt.subplots() for den1 in range(25): for den3 in range(25): actions_0_better = len([ den for den in dots_action_0 if den[1] == den1 and den[3] == den3 ]) actions_1_better = len([ den for den in dots_action_1 if den[1] == den1 and den[3] == den3 ]) all = actions_0_better + actions_1_better if all == 0: ax.plot(den1, den3, 'o', color=(0, 0, 0)) continue # print('action_0_better',actions_0_better) # print('action_1_better',actions_1_better) red = actions_0_better / all green = actions_1_better / all # print('red', actions_0_better) # print('green', actions_1_better) # print('r', red) # print('g', green) # if den1==den3: # print(green) ax.plot(den1, den3, 'o', color=(red, green, 0)) # plt.plot([den[1] for den in dots_action_0], [den[3] for den in dots_action_0], 'ro') # plt.plot([den[1] for den in dots_action_1], [den[3] for den in dots_action_1], 'go') # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo') # print("draw pred!") # fig.savefig('plotcircles.png') name = 'predictions' + str(no) + '.png' fig.savefig(name) plt.close(fig) a = 3
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(200), plotting=False): # learntAgents = True # save_json = True # plotting=True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=env_settings.u_all_4) for agent in env.agents: agent.reshape_rewards() env.update_memory_rewards() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='random_' + str(e)) exportData.saveToJson() x_batch, y_batch = agents[0].memory_to_minibatch_with_oranges() if plotting: cars_outs.append(env.cars_out) print('rew', env.count_summed_rewards()[0]) print('cars_out', env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) for i in range(len(agents)): # print('i',i) filename = 'static_files/x_batch_agent_' + str(i) + '.txt' x_batch, y_batch = agents[i].full_batch() np.savetxt(filename, x_batch, delimiter=',') filename = 'static_files/y_batch_agent_' + str(i) + '.txt' np.savetxt(filename, y_batch, delimiter=',') if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close()
def train(learntAgents=True, max_time_learn=20,agents = None): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() models = [agent.model for agent in agents] batches = get_batches() for i in range(1): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 5000 val_loss_best = 5000 escape_flag = False escape_val = 0 a = 0 while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, initial_epoch=Globals().epochs_done, epochs=Globals().epochs_done+Globals().epochs_learn, verbose=0, validation_split=0.2, callbacks=[Globals().tensorboard,agents[i].weights_history_callback]) Globals().epochs_done+=Globals().epochs_learn if res.history['val_loss'][-1] < val_loss_best: val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] > val_loss: escape_val += 1 # print('escape_val',escape_val) # print('val loss',res.history['val_loss'][-1]) if escape_val > 2: escape_flag = True # print('przerwalbym!!!!!!') # print('wynik sieci', res.history['val_loss'][-1]) val_loss = 5000 else: val_loss = res.history['val_loss'][-1] if i == 0: x = [0, 0, 10, 15, 1, 0, 0, 0] pred = model.predict(np.array([x])) try: diff = abs(pred[0][0] - Globals().pred_plot_memory[-1][0][0]) + abs( pred[0][1] - Globals().pred_plot_memory[-1][0][1]) if a == 0: # print('diff', diff) a += 1 except: a = 23 Globals().pred_plot_memory.append(pred) # print('najlepszy loss',val_loss_best) # print('koniec', model.get_weights()) Globals().last_weights == model.get_weights() model.set_weights(weights_best) model.save('static_files/model-agent' + str(i) + '.h5')
def draw_batches(file_name='batches.png'): agents = get_LearnSmartAgents() batches = get_batches(agents) x_batch = batches[0]['x_batch'] y_batch = batches[0]['y_batch'] fig, ax = plt.subplots() for i in range(len(x_batch[0])): dens_i = [x[i] for x in x_batch] x_coordinate_for_i = [i] * len(dens_i) ax.plot(x_coordinate_for_i, dens_i, 'o', color=(0, 0, 0)) fig.savefig(file_name)
def generate_my_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False, actions=None, clear_memory=True,actual_number=''): save_front_json = True reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() if clear_memory: for agent in agents: agent.memories = [] # print(agents[0].orange_phase_duration) for e in epochs: Globals().epsilon = 1 env: Env = my_epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn) if reshaping: for agent in env.agents: agent.reshape_rewards() action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]] action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]] if save_front_json: save_front_json=False exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net16', densityName='my_epochs' + str(Globals().greedy_run_no)) exportData.saveToJson() env.remember_memory() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 save_batches(agents,actual_number) if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close() # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1): # print("weeeeeeeeeeee") return agents
def train(learntAgents=True, max_time_learn=20): if not learntAgents: agents = get_SmartAgents() else: print('get learnt!') agents = get_LearnSmartAgents() a = 7 models = [agent.model for agent in agents] batches = get_batches() for i in range(1): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] val_loss = 5000 escape_flag = False while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=100, epochs=1, verbose=0, validation_split=0.2) if res.history['val_loss'][-1] > val_loss: escape_flag = True print('wynik sieci', res.history['val_loss'][-1]) val_loss = 5000 else: val_loss = res.history['val_loss'][-1] if i == 0: x = [4, 40, 0] pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) model.save('static_files/model-agent' + str(i) + '.h5') if i == 0: plt.plot([pred[0][0] for pred in Globals().pred_plot_memory], color='red', label='0') plt.plot([pred[0][1] for pred in Globals().pred_plot_memory], color='green', label='1') plt.plot([pred[0][2] for pred in Globals().pred_plot_memory], color='blue', label='2') plt.legend() plt.title( 'Nagrody przewidziane dla akcji podjętych podczas monitorowanego stanu' ) plt.savefig('foo' + str(Globals().run_no) + '.png') plt.close()
def train(learntAgents=True, max_time_learn=60, agents=None, shuffle=True, batches=None, actual_number=''): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() if batches is None: batches = get_batches(agents, actual_number) models = [agent.model for agent in agents] for i in range(len(agents)): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 10**10 val_loss_best = 10**10 escape_flag = False escape_val = 0 start_flag = True while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, initial_epoch=Globals().epochs_learn_done, epochs=Globals().epochs_learn_done + Globals().vp().epochs_learn, validation_split=0.2, verbose=0) Globals().epochs_learn_done += Globals().vp().epochs_learn if start_flag: start_flag = False if res.history['val_loss'][-1] < val_loss_best: val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] >= val_loss: escape_val += 1 if escape_val > 10: escape_flag = True val_loss = 10**10 else: val_loss = res.history['val_loss'][-1] model.set_weights(weights_best) model.save('static_files/model-agent' + str(i) + '.h5')
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, u=Globals().u, clear_memory=True): reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() if clear_memory: for agent in agents: agent.memories = [] for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=u) if reshaping: for agent in env.agents: agent.reshape_rewards() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_updated' + str(e)) exportData.saveToJson() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_' + str(e)) exportData.saveToJson() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 save_batches(agents) return agents
def draw_predictions(no): agents = get_LearnSmartAgents() to_predict = [] for den0 in range(25): for den1 in range(25): for den2 in range(25): for den3 in range(25): to_predict.append([den0, den1, den2, den3]) predictions = agents[0].model.predict(np.array(to_predict)) dots_action_0 = [] dots_action_1 = [] dots_action_orange = [] for i in range(len(predictions)): pred = predictions[i] to_predict_state = to_predict[i] best_action_predicted = np.argmax(pred) if best_action_predicted == 0: dots_action_0.append(to_predict_state) if best_action_predicted == 1: dots_action_1.append(to_predict_state) if best_action_predicted == 2: dots_action_orange.append(to_predict_state) # os x to den z gory os y to den z dolu fig, ax = plt.subplots() for den1 in range(25): for den3 in range(25): actions_0_better = len([ den for den in dots_action_0 if den[1] == den1 and den[3] == den3 ]) actions_1_better = len([ den for den in dots_action_1 if den[1] == den1 and den[3] == den3 ]) all = actions_0_better + actions_1_better red = actions_0_better / all green = actions_1_better / all ax.plot(den1, den3, 'o', color=(red, green, 0)) # plt.plot([den[1] for den in dots_action_0], [den[3] for den in dots_action_0], 'ro') # plt.plot([den[1] for den in dots_action_1], [den[3] for den in dots_action_1], 'go') # plt.plot([den[0] for den in dots_action_orange], [den[1] for den in dots_action_orange], 'bo') # print("draw pred!") # fig.savefig('plotcircles.png') fig.savefig('predictions' + str(no) + '.png')
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1)): cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for agent in agents: agent.memories = [] for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn) for agent in env.agents: agent.reshape_rewards() env.remember_memory() Globals().actual_epoch_index += 1 save_batches(agents) return agents
def run_learnt_greedy(saveJson=True): model_file_names = ['static_files/model-agent0.h5'] agents = get_LearnSmartAgents(model_file_names) env = Env(agents) epoch_greedy(env) rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out if saveJson: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='env1', densityName='learnt_' + str(Globals().greedy_run_no)) exportData.saveToJson() Globals().greedy_run_no += 1 print( f'gready run - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}' ) return rewards_mean, rewards_sum, cars_out
def train(learntAgents=True, max_time_learn=60, agents=None): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() models = [agent.model for agent in agents] batches = get_batches() for i in range(1): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 10 ** 10 val_loss_best = 10 ** 10 escape_flag = False escape_val = 0 while timer() - start_time < max_time_learn and not escape_flag: res = model.fit(x_batch, y_batch, batch_size=Globals().vp.batch_size, initial_epoch=Globals().epochs_done, epochs=Globals().epochs_done + Globals().epochs_learn, validation_split=0.2, verbose=0) # callbacks=[Globals().tensorboard,agents[i].weights_history_callback] Globals().epochs_done += Globals().epochs_learn if res.history['val_loss'][-1] < val_loss_best: val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] > val_loss: escape_val += 1 if escape_val > 2: escape_flag = True val_loss = 10 ** 10 else: val_loss = res.history['val_loss'][-1] if i == 0: x = [0, 0, 10, 15, 1, 0] # dla fazy 0 - lepiej jednak jakby zmienil na swiatlo 1 pred = model.predict(np.array([x])) Globals().pred_plot_memory.append(pred) model.set_weights(weights_best) model.save('static_files/model-agent' + str(i) + '.h5')
def run_learnt_greedy(saveJson=False): # saveJson = True model_file_names = ['static_files/model-agent0.h5'] agents = get_LearnSmartAgents(model_file_names) # print('weights!',agents[0].model.weights[0]) env = Env(agents) epoch_greedy(env) # env.update_memory_rewards() # TODO czy to mozna odkomentowac? rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out if saveJson: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net11', densityName='learnt_' + str(Globals().greedy_run_no)) exportData.saveToJson() Globals().greedy_run_no += 1 # print(f'gready run - rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}') return rewards_mean, rewards_sum, cars_out
def draw_weights(): agents = get_LearnSmartAgents() for agent in agents: agent.plot_weights()
actual_number=actual_number) train(max_time_learn=Globals().vp().max_time_learn, actual_number=actual_number) # result = run_learnt_greedy() maximum_possible_cars_out = Globals().u_value * Globals().vp( ).max_time_greedy * 3 # print('max possible', maximum_possible_cars_out) pred_array = np.array([[ 90, 0, 0, 0, 0, 1, ]]) agent_0 = get_LearnSmartAgents()[0] pred_history.append(agent_0.model.predict(pred_array)[0]) print(f'{actual_number} pred {pred_history[-1]}') batches = get_batches(get_LearnSmartAgents(), actual_number) x_batch = batches[0]['x_batch'] y_batch = batches[0]['y_batch'] y_batch_history.append(y_batch[19]) # print(f'x_batch {x_batch} y_batch {y_batch}') draw_y_history(y_batch_history) indexes = [i for i in range(len(x_batch)) if x_batch[i][-1] == 0] # if result[2] > maximum_possible_cars_out * 0.93: # cars_out # print('u przed',Globals().u_value) # Globals().u_value=Globals().u_value*1.2 # print('u po',Globals().u_value) # results.append(result)
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False): # save_front_json = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for e in epochs: # print('epoch!!!!!',e) Globals().epsilon = 1 env: Env = epoch(agents, u=env_settings.get_u_under_x_random(8)) if reshaping: for agent in env.agents: agent.reshape_rewards() action_0_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [0] ] action_1_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [1] ] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards) > np.mean(action_1_rewards): # print('kupa') # else: # print('ok') if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_updated' + str(e)) exportData.saveToJson() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net14', densityName='random_' + str(e)) exportData.saveToJson() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) action_0_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [0] ] action_1_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [1] ] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards)>np.mean(action_1_rewards): # print('kupa') # else: # print('ok') for i in range(len(agents)): filename = 'static_files/x_batch_agent_' + str(i) + '.txt' x_batch, y_batch = agents[i].full_batch() np.savetxt(filename, x_batch, delimiter=',') filename = 'static_files/y_batch_agent_' + str(i) + '.txt' np.savetxt(filename, y_batch, delimiter=',') if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close()
from services.parser import get_G ActionInt = int def epoch(): Globals().time = 0 env = Env(agents) for t in range(max_time): actions: List[ActionInt] = [ agent.get_action(agent.local_state) for agent in agents ] env.step(actions) Globals().epochs_done += 1 return env Globals().max_epsilon = 0 agents: List[SmartAgent] = get_LearnSmartAgents() env: Env = epoch() # :1 rewards = nested_sum(env.global_rewards) print('rewards', rewards) print('carsout', env.cars_out) exportData = ExportData(learningMethod='Monte Carlo TODO', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='last_epoch') exportData.saveToJson()
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False, actions=None, clear_memory=True): # save_front_json = True reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() if clear_memory: for agent in agents: agent.memories = [] # print(agents[0].orange_phase_duration) for e in epochs: Globals().epsilon = 1 env: Env = epoch(agents, u=Globals().get_u(Globals().vp().max_time_learn), time=Globals().vp().max_time_learn) if reshaping: for agent in env.agents: agent.reshape_rewards() action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]] action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards) > np.mean(action_1_rewards): # print('kupa') # else: # print('ok') # [x[:][1] for x in self.A[0]] if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='politechnika', densityName='random_now' + str(Globals().greedy_run_no)) exportData.saveToJson() print('doneeeeeee',e) env.remember_memory() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 # action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]] # action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards)>np.mean(action_1_rewards): # print('kupa') # else: # print('ok') # print('cars_out random', env.cars_out) # for agent in agents: # highest_x=max([max(mem.state.to_learn_array(agent)[0]) for mem in agent.memories]) # print('highest_x',highest_x) save_batches(agents) if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close() # if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10.1): # print("weeeeeeeeeeee") return agents
def generate_random_epochs(learntAgents=False, save_front_json=False, epochs=range(1), plotting=False, reshaping=False, u=env_settings.u, actions=None): # save_front_json = True reshaping = True cars_outs = [] rewards = [] rewards_mean = [] if learntAgents: agents: List[SmartAgent] = get_LearnSmartAgents() else: agents: List[SmartAgent] = get_SmartAgents() for e in epochs: # print('epoch!!!!!',e) Globals().epsilon = 1 env: Env = epoch(agents, u=u) if reshaping: for agent in env.agents: agent.reshape_rewards() action_0_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [0] ] action_1_rewards = [ net.rewards[0] for net in env.global_memories if net.actions == [1] ] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards) > np.mean(action_1_rewards): # print('kupa') # else: # print('ok') if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net15', densityName='random_updated' + str(e)) exportData.saveToJson() env.remember_memory() if save_front_json: exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net15', densityName='random_' + str(e)) exportData.saveToJson() if plotting: cars_outs.append(env.cars_out) rewards.append(env.count_summed_rewards()[0]) rewards_mean.append(env.count_summed_rewards()[1]) Globals().actual_epoch_index += 1 # action_0_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [0]] # action_1_rewards = [net.rewards[0] for net in env.global_memories if net.actions == [1]] # print('mean 0', np.mean(action_0_rewards)) # print('mean 1', np.mean(action_1_rewards)) # if np.mean(action_0_rewards)>np.mean(action_1_rewards): # print('kupa') # else: # print('ok') save_batches(agents) if plotting: plt.plot(cars_outs) plt.title('Ilość pojazdów opuszczających układ - losowe akcje') plt.savefig('img_cars_out_random.png') plt.close() plt.plot(rewards_mean) plt.title('Średnia nagroda za akcję - losowe akcje') plt.savefig('img_rewards_mean_random.png') plt.close() plt.plot(rewards) plt.title('Suma nagród - losowe akcje') plt.savefig('img_rewards_random.png') plt.close() if any(x for x in [mem.reward for mem in agents[0].memories] if x > 10): print("weeeeeeeeeeee") return agents
def train(learntAgents=True, max_time_learn=60, agents=None,shuffle=True,batches=None,actual_number=''): if agents is None: if not learntAgents: agents = get_SmartAgents() else: agents = get_LearnSmartAgents() if batches is None: batches = get_batches(agents,actual_number) models = [agent.model for agent in agents] for i in range(len(agents)): start_time = timer() x_batch = batches[i]['x_batch'] y_batch = batches[i]['y_batch'] model = models[i] weights_best = model.get_weights() val_loss = 10 ** 10 val_loss_best = 10 ** 10 escape_flag = False escape_val = 0 inne = 0 te_same = 0 start_flag=True while timer() - start_time < max_time_learn and not escape_flag: # print('na start mamy res.history[val_loss]',model.history['val_loss'][-1]) # if shuffle: # validation_indexes = random.sample(range(len(x_batch)),int(len(x_batch)/10)) # validation_x,validation_y = [[x_batch[index_x] for index_x in validation_indexes]],[[y_batch[index_y] for index_y in validation_indexes]] # res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, # initial_epoch=Globals().epochs_done, # epochs=Globals().epochs_done + Globals().epochs_learn, # validation_data=(validation_x,validation_y), # verbose=0) # callbacks=[Globals().tensorboard,agents[i].weights_history_callback] wagi_przed_uczeniem = model.get_weights() res = model.fit(x_batch, y_batch, batch_size=Globals().vp().batch_size, initial_epoch=Globals().epochs_learn_done, epochs=Globals().epochs_learn_done + Globals().vp().epochs_learn, validation_split=0.2, verbose=0) # callbacks=[Globals().tensorboard,agents[i].weights_history_callback] same = True porownanie = wagi_przed_uczeniem [0] == model.get_weights()[0] for porownanie_warstwa in porownanie: if any([s == False for s in porownanie_warstwa]): same = False if same: te_same += 1 print(f'wagi te same {te_same} inne {inne}') else: inne += 1 # print('wagi te same - uczenie', same) Globals().epochs_learn_done += Globals().vp().epochs_learn if start_flag: # print('res history z start_flag to',res.history['val_loss']) start_flag=False if res.history['val_loss'][-1] < val_loss_best: res_hist=res.history['val_loss'][-1] # print(f'res.history {res_hist} lepsze niz {val_loss_best}') val_loss_best = res.history['val_loss'][-1] weights_best = model.get_weights() if res.history['val_loss'][-1] >= val_loss: escape_val += 1 # print('escape_val',escape_val) # print('val loss',res.history['val_loss'][-1]) if escape_val > 10: escape_flag = True # print('przerwalbym!!!!!!') # print('wynik sieci', res.history['val_loss'][-1]) val_loss = 10 ** 10 else: val_loss = res.history['val_loss'][-1] if not (timer() - start_time < max_time_learn and not escape_flag): print('loss',res.history['val_loss'][-1]) Globals().last_weights == model.get_weights() model.set_weights(weights_best) # print('wagi po', model.get_weights()) # print('porownanko',model.get_weights()[0]) # print('wagi takie same',wagi_przed[0]==model.get_weights()[0]) model.save('static_files/model-agent' + str(i) + '.h5')
def count_rewards(env): memsum = 0 i = 0 for agent in env.agents: for mem in agent.memories: i += 1 memsum += mem.reward return memsum, memsum / i model_file_names = [ 'static_files/model-agent0.h5', 'static_files/model-agent1.h5', 'static_files/model-agent2.h5' ] agents = get_LearnSmartAgents(model_file_names) env = Env(agents) epoch_greedy(env) env.update_memory_rewards() rewards_sum, rewards_mean = count_rewards(env) cars_out = env.cars_out exportData = ExportData(learningMethod='DQN', learningEpochs=0, nets=env.global_memories, netName='net4', densityName='learnt1') exportData.saveToJson() print( f'rewards_mean:{rewards_mean} rewards_sum:{rewards_sum} cars_out:{cars_out}' )