def learn(): while len(Chromosome.all()) < 30: Chromosome().create() test_set = Match.test_set() if not test_set: print('There are no matches to learn from') return agent_pool = [Agent(c) for c in Chromosome.all()] reserve = [] n = 1 with Pool(4) as pool: while True: try: print('Starting Generation {}'.format(n)) start = datetime.now() for n2, agent in enumerate(agent_pool): print('Running Agent {}'.format(n2)) guesses = pool.map(agent.guess, test_set) agent.right = guesses.count(True) agent.wrong = guesses.count(False) agent.chromosome.agent_score = agent.accuracy agent.chromosome.save() agent_pool.extend(reserve) agent_pool.sort(key=get_score, reverse=True) print('Generation took {}'.format(datetime.now() - start)) print('Best Agent: {}% accurate'.format(agent_pool[0].accuracy * 100)) Chromosome.delete_many(ids=[agent.chromosome.id for agent in agent_pool[15:]]) agent_pool = [Agent(agent.chromosome.mutate()) for agent in agent_pool[:15]] # mutate the top half reserve = agent_pool[:15] n += 1 except KeyboardInterrupt: Chromosome.clean() # keep best 15 break print('Finished learning')
def test_agent_collisions2(self): world = World(5, 5) # agent moving north agentN = Agent(0) agentN.direction = Direction.NORTH agentN.move = True # agent moving south agentS = Agent(1) agentS.direction = Direction.SOUTH agentS.move = True # agent moving east agentE = Agent(2) agentE.direction = Direction.EAST agentE.move = True # agent moving west agentW = Agent(3) agentW.direction = Direction.WEST agentW.move = True # setup world.agents[(2, 0)] = agentN world.agents[(2, 4)] = agentS world.agents[(1, 1)] = agentE world.agents[(3, 1)] = agentW # update & test world.update() self.assertEqual(world.agents[(2, 1)], agentN, '(1) world incorrectly updated north agent') self.assertEqual(world.agents[(2, 3)], agentS, '(1) world incorrectly updated south agent') self.assertEqual(world.agents[(1, 1)], agentE, '(1) world incorrectly updated east agent') self.assertEqual(world.agents[(3, 1)], agentW, '(1) world incorrectly updated west agent') world.update() self.assertEqual(world.agents[(2, 2)], agentN, '(2) world incorrectly updated north agent') self.assertEqual(world.agents[(2, 3)], agentS, '(2) world incorrectly updated south agent') self.assertEqual(world.agents[(2, 1)], agentE, '(2) world incorrectly updated east agent') self.assertEqual(world.agents[(3, 1)], agentW, '(2) world incorrectly updated west agent') world.update() self.assertEqual(world.agents[(2, 2)], agentN, '(3) world incorrectly updated north agent') self.assertEqual(world.agents[(2, 3)], agentS, '(3) world incorrectly updated south agent') self.assertEqual(world.agents[(2, 1)], agentE, '(3) world incorrectly updated east agent') self.assertEqual(world.agents[(3, 1)], agentW, '(3) world incorrectly updated west agent') self.assertEqual(len(world.agents), 4, 'World has incorrect number of agents')
def main(): # Initialize world world = World(5, 5) world.agents[(0,0)] = Agent() world.agents[(0,0)].move = True world.agents[(0,0)].direction = Direction.EAST world.agents[(3,3)] = Agent() world.agents[(3,3)].move = True print('0', world) # Run simulation for t in range(1,10): world.update() print(t, world)
def test_agent_move_east(self): world = World(5, 10) agent = Agent(0) agent.direction = Direction.EAST agent.move = True world.agents[(0, 0)] = agent self.assertEqual(world.agents[(0, 0)], agent, '(0) world incorrectly set agent position') world.update() self.assertTrue((0, 0) not in world.agents, 'agent not moved from previous location') self.assertEqual(world.agents[(1, 0)], agent, '(1) world incorrectly updated to move agent East') world.update() self.assertEqual(world.agents[(2, 0)], agent, '(2) world incorrectly updated to move agent East') world.update() self.assertEqual(world.agents[(3, 0)], agent, '(3) world incorrectly updated to move agent East') world.update() self.assertEqual(world.agents[(4, 0)], agent, '(4) world incorrectly updated to move agent East') world.update() self.assertEqual( world.agents[(0, 0)], agent, '(5) world incorrectly updated to move agent East (wrap)') self.assertEqual(len(world.agents), 1, 'World has incorrect number of agents')
def get_command(agent_id): agent = Agent.query.get(agent_id) if not agent: agent = Agent(agent_id) db.session.add(agent) db.session.commit() # Report basic info about the agent info = request.json if info: if 'platform' in info: agent.operating_system = info['platform'] if 'hostname' in info: agent.hostname = info['hostname'] if 'username' in info: agent.username = info['username'] agent.last_online = datetime.now() agent.remote_ip = request.remote_addr agent.geolocation = geolocation(agent.remote_ip) db.session.commit() # Return pending commands for the agent cmd_to_run = '' cmd = agent.commands.order_by(Command.timestamp.desc()).first() if cmd: cmd_to_run = cmd.cmdline db.session.delete(cmd) db.session.commit() return cmd_to_run
def add_new_agent(agent_name): """ валидация и запись нового контрагента в БД agent_name (str): имя контрагента из формы на стр. agent_add.html """ agent = session.query(Agent).filter_by(name=agent_name).all() # проверка на наличие контрагента в базе данных if agent: eel.alert_message(f'Контрагент "{agent[0].name}" уже сущесвует') print(f'Контрагент "{agent[0].name}" уже сущесвует') return False # валидация имени контрагента if agent_name: print(agent_name) new_agent = Agent(name=agent_name) session.add(new_agent) session.commit() new_agent_id = session.query(Agent).filter_by( name=agent_name).first().id eel.alert_message( f'Добавлен новый контрагент: {agent_name} c id: {new_agent_id}') print(f'Добавлен новый контрагент: {agent_name} c id: {new_agent_id}') else: eel.alert_message(f'Не верное имя агента: {agent_name}') print(f'Не верное имя агента: {agent_name}') return False return True
def test_agent_move_south(self): world = World(10, 5) agent = Agent(0) agent.direction = Direction.SOUTH agent.move = True world.agents[(0, 4)] = agent self.assertEqual(world.agents[(0, 4)], agent, '(0) world incorrectly set agent position') world.update() self.assertTrue((0, 4) not in world.agents, 'agent not moved from previous location') self.assertEqual(world.agents[(0, 3)], agent, '(1) world incorrectly updated to move agent South') world.update() self.assertEqual(world.agents[(0, 2)], agent, '(2) world incorrectly updated to move agent South') world.update() self.assertEqual(world.agents[(0, 1)], agent, '(3) world incorrectly updated to move agent South') world.update() self.assertEqual(world.agents[(0, 0)], agent, '(4) world incorrectly updated to move agent South') world.update() self.assertEqual( world.agents[(0, 4)], agent, '(5) world incorrectly updated to move agent South (wrap)') self.assertEqual(len(world.agents), 1, 'World has incorrect number of agents')
def main( env_name, n_epoch, learning_rate, gamma, n_hidden, seed_val=0, max_steps=1000 ): '''train an a2c network some gym env''' # define env env = gym.make(env_name) env.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) # define agent state_dim, n_actions, action_space_type = get_env_info(env) agent = Agent(state_dim, n_hidden, n_actions) optimizer = torch.optim.Adam(agent.parameters(), lr=learning_rate) # train log_step = np.zeros((n_epoch,)) log_return = np.zeros((n_epoch,)) log_loss_v = np.zeros((n_epoch,)) log_loss_p = np.zeros((n_epoch,)) for i in range(n_epoch): cumulative_reward, step, probs, rewards, values = run( agent, env, gamma=gamma, max_steps=max_steps ) # update weights returns = compute_returns(rewards, gamma=gamma, normalize=True) loss_policy, loss_value = compute_a2c_loss(probs, values, returns) loss = loss_policy + loss_value optimizer.zero_grad() loss.backward() optimizer.step() # log message log_step[i] = step log_return[i] = cumulative_reward log_loss_v[i] = loss_value.item() log_loss_p[i] = loss_policy.item() if np.mod(i, 10) == 0: print( 'Epoch : %.3d | R: %.2f, steps: %4d | L: pi: %.2f, V: %.2f' % (i, log_return[i], log_step[i], log_loss_p[i], log_loss_v[i]) ) # save weights # ckpt_fname = f'../log/agent-{env_name}-{n_epoch}.pth' ckpt_fname = f'../log/agent-{env_name}.pth' torch.save(agent.state_dict(), ckpt_fname) '''show learning curve: return, steps''' f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True) axes[0].plot(log_return) axes[1].plot(log_step) axes[0].set_title(f'Learning curve: {env_name}') axes[0].set_ylabel('Return') axes[1].set_ylabel('#steps') axes[1].set_xlabel('Epoch') sns.despine() f.tight_layout() f.savefig(f'../figs/lc-{env_name}.png', dpi=120)
def main(env_name, n_hidden): '''render the performance of a saved ckpt''' # define env and agent env = gym.make(env_name).env state_dim, n_actions, action_space_type = get_env_info(env) agent = Agent(state_dim, n_hidden, n_actions) agent.load_state_dict(torch.load(f'../log/agent-{env_name}.pth')) agent.eval() cumulative_reward, step, probs, rewards, values = run(agent, env, render=True)
def upload(): to_load = {} with open('/tmp/svrdata.pkl', 'rb') as f: to_load = cPickle.load(f) for d in to_load.get('pos_to_put', []): try: agent = Agent.select(Agent.q.AgentName == d['name'])[0] except IndexError: try: agent = Agent(**d['data']) # karena 'id'nya tidak standar, # SQLObject bilang Not Found except SQLObjectNotFound: agent = Agent.select(Agent.q.AgentName == d['name'])[0] # Periksa apakah table pemuat Logs untuk # pos ini telah tersedia try: rs = conn.queryAll("SELECT SamplingDate FROM %s \ LIMIT 0, 1" % (agent.table_name)) except: rs = conn.queryAll("CREATE TABLE %s \ LIKE tpl_agent" % agent.table_name) print agent.AgentName for l in d['logs']: print '\t', l['SamplingDate'], l['SamplingTime'] sql = "SELECT COUNT(*) FROM %s \ WHERE SamplingDate='%s' AND \ SamplingTime='%s'" % (agent.table_name, l['SamplingDate'], l['SamplingTime']) rs = conn.queryAll(sql) print '\t\trs[0][0]:', rs[0][0] if rs[0][0] == 0: sql = "INSERT INTO %s (RID, ReceivedDate, \ ReceivedTime, DataType, StatusPort, \ SamplingDate, SamplingTime, Temperature, \ Humidity, Rain, Rain1, Rain2, Rain3, \ Rain4, WLevel, Wlevel1, WLevel2, \ WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \ '%s', %s, '%s', '%s', '%s', %s, %s, \ %s, %s, %s, %s, %s, %s, %s, %s, %s, \ %s, '%s', %s)" % ( agent.table_name, l['RID'], l['ReceivedDate'], l['ReceivedTime'], l['DataType'], l['StatusPort'], l['SamplingDate'], l['SamplingTime'], l['Temperature'], l['Humidity'], l['Rain'], l['Rain1'], l['Rain2'], l['Rain3'], l['Rain4'], l['WLevel'], l['WLevel1'], l['WLevel2'], l['WLevel3'], l['WLevel4'], l['up_since'], l['sq']) rs = conn.query(sql) print '\tsaved:', l['SamplingDate'], l['SamplingTime'] del l
def create_new_agent(): ''' Create a new Agent --- tags: - Agents parameters: - name: Agent in: body schema: type: object properties: name: type: string email: type: string phone_number: type: string responses: 201: description: Returns the new Agent ''' try: body = request.get_json() if body is None: abort(400) user_input = { "name": body.get('name', None), "phone_number": body.get('phone_number', None), "email": body.get('email', None) } agent = Agent(name=user_input['name'], phone_number=user_input['phone_number'], email=user_input['email']) db.session.add(agent) db.session.commit() return jsonify({ "success": True, "result": agent.format(), "timestamp": time.time() }), 201 except AuthError: abort(401) except Exception as err: db.session.rollback() return jsonify({"success": False, "error": str(err)}), 500 finally: db.session.close()
def add_agent(): """добавление нового агента""" name = input('Введите название контрагента: ') q = session.query(Agent).filter_by(name=name).all() # проверка на наличие агента в базе if q: print(q[0], 'уже сущесвует.') else: new_agent = Agent(name=name) session.add(new_agent) session.commit() return
def get_command(agent_id): agent = Agent.query.get(agent_id) if not agent: agent = Agent(agent_id) db.session.add(agent) db.session.commit() # Report basic info about the agent info = request.json if info: if 'platform' in info: plataform = info['platform'] plataform = str(plataform) agent.operating_system = base64.b16decode(plataform) if 'hostname' in info: hostname = info['hostname'] hostname = str(hostname) agent.hostname = base64.b16decode(hostname) if 'username' in info: username = info['username'] username = str(username) agent.username = base64.b16decode(username) if 'cpu' in info: cpu = info['cpu'] cpu = str(cpu) agent.cpu = base64.b16decode(cpu) if 'gpu' in info: gpu = info['gpu'] gpu = str(gpu) agent.gpu = base64.b16decode(gpu) if 'memory' in info: agent.memory = base64.b16decode(info['memory']) agent.memory = int(agent.memory) / 2.**30 agent.memory = ('% 6.2f' % agent.memory) + " GB" agent.memory = str(agent.memory) agent.last_online = datetime.now() agent.remote_ip = request.remote_addr agent.geolocation = geolocation(agent.remote_ip) db.session.commit() # Return pending commands for the agent cmd_to_run = '' cmd = agent.commands.order_by(Command.timestamp.desc()).first() if cmd: cmd_to_run = cmd.cmdline db.session.delete(cmd) db.session.commit() return cmd_to_run
def initialize_agent(self, world, i, team, nn_model=None, nn_weights=None): agent = Agent(i) agent.move = bool(random.getrandbits(1)) agent.direction = Direction(random.randrange(4)) agent.team = team agent.recurrent_memory = np.array(self.recurrent_nodes * [0.0]) if nn_model: agent.model = nn_model else: agent.model = self.initialize_agent_nn() if nn_weights: agent.model.set_weights(nn_weights) x = random.randrange(world.width) y = random.randrange(world.height) while (x, y) in world.agents: x = random.randrange(world.width) y = random.randrange(world.height) world.agents[(x, y)] = agent return (x, y)
def save(self, *args, **kwargs): """ Create the new user. If no username is supplied (may be hidden via ``ACCOUNTS_PROFILE_FORM_EXCLUDE_FIELDS`` or ``ACCOUNTS_NO_USERNAME``), we generate a unique username, so that if profile pages are enabled, we still have something to use as the profile's slug. """ account_type = self.cleaned_data['type'] username, email, password1, password2 = ( self.cleaned_data['username'], self.cleaned_data['email'], self.cleaned_data['password1'], self.cleaned_data['password2']) print 'user type:{} name:{} email:{}'.format(account_type, username, email) if password1 != password2: raise forms.ValidationError(_("Password not match")) try: existing_user = get_object_or_404(User, username=username) print '1' * 20 if existing_user: raise forms.ValidationError(_("Username already exists")) except Http404 as e: print e try: print '2' * 20 existing_user = get_object_or_404(User, email=email) print 'existing_user:{} email:{}'.format(existing_user, email) if existing_user: raise forms.ValidationError(_("Email already exists")) except Http404 as e: print e new_user = User.objects.create_user(username, email, password1) user = authenticate(username=username, password=password1) if int(account_type) == 1: agent_group = Group.objects.get(name='AgentGroup') print agent_group new_user.groups.add(agent_group) new_user.save() agent = Agent(user=new_user, username=username, email=email, password=password1, signup_flag=False) agent.save() elif int(account_type) == 2: customer_group = Group.objects.get(name='CustomerGroup') print customer_group new_user.groups.add(customer_group) new_user.save() customer = Customer(user=new_user, username=username, email=email, password=password1, signup_flag=False) customer.save() else: print 'incorrect account_type:{}'.format(account_type) return user
def train(args): chrome_driver_path = args.chrome_driver_path checkpoint_path = args.checkpoint_path nb_actions = args.nb_actions initial_epsilon = args.initial_epsilon epsilon = initial_epsilon final_epsilon = args.final_epsilon gamma = args.gamma nb_memory = args.nb_memory nb_expolre = args.nb_expolre is_debug = args.is_debug batch_size = args.batch_size nb_observation = args.nb_observation desired_fps = args.desired_fps is_cuda = True if args.use_cuda and torch.cuda.is_available() else False log_frequency = args.log_frequency save_frequency = args.save_frequency ratio_of_win = args.ratio_of_win if args.exploiting: nb_observation = -1 epsilon = final_epsilon seed = 22 np.random.seed(seed) memory = deque() env = DinoSeleniumEnv(chrome_driver_path, speed=args.game_speed) agent = Agent(env) game_state = GameState(agent, debug=is_debug) qnetwork = QNetwork(nb_actions) if is_cuda: qnetwork.cuda() optimizer = torch.optim.Adam(qnetwork.parameters(), 1e-4) tmp_param = next(qnetwork.parameters()) try: m = torch.load(checkpoint_path) qnetwork.load_state_dict(m["qnetwork"]) optimizer.load_state_dict(m["optimizer"]) except: logger.warn("No model found in {}".format(checkpoint_path)) loss_fcn = torch.nn.MSELoss() action_indx = 0 # do nothing as the first action screen, reward, is_gameover, score = game_state.get_state(action_indx) current_state = np.expand_dims(screen, 0) # [IMAGE_CHANNELS,IMAGE_WIDTH,IMAGE_HEIGHT] current_state = np.tile(current_state, (IMAGE_CHANNELS, 1, 1)) initial_state = current_state t = 0 last_time = 0 sum_scores = 0 total_loss = 0 max_score = 0 qvalues = np.array([0, 0]) lost_action = [] win_actions = [] action_random = 0 action_greedy = 0 episodes = 0 nb_episodes = 0 if not args.exploiting: try: t, memory, epsilon, nb_episodes = pickle.load(open( "cache.p", "rb")) except: logger.warn("Could not load cache file! Starting from scratch.") try: while True: qnetwork.eval() if np.random.random() < epsilon: # epsilon greedy action_indx = np.random.randint(nb_actions) action_random += 1 else: action_greedy += 1 tensor = torch.from_numpy(current_state).float().unsqueeze(0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() _, action_indx = qvalues.max(-1) action_indx = action_indx.item() if epsilon > final_epsilon and t > nb_observation: epsilon -= (initial_epsilon - final_epsilon) / nb_expolre screen, reward, is_gameover, score = game_state.get_state( action_indx) if is_gameover: episodes += 1 nb_episodes += 1 lost_action.append(action_indx) sum_scores += score else: win_actions.append(action_indx) if score > max_score: max_score = score if last_time: fps = 1 / (time.time() - last_time) if fps > desired_fps: time.sleep(1 / desired_fps - 1 / fps) if last_time and t % log_frequency == 0: logger.info('fps: {0}'.format(1 / (time.time() - last_time))) last_time = time.time() screen = np.expand_dims(screen, 0) next_state = np.append(screen, current_state[:IMAGE_CHANNELS - 1, :, :], axis=0) if not args.exploiting and (is_gameover or np.random.random() < ratio_of_win): memory.append((current_state, action_indx, reward, next_state, is_gameover)) if len(memory) > nb_memory: memory.popleft() if nb_observation > 0 and t > nb_observation: indxes = np.random.choice(len(memory), batch_size, replace=False) minibatch = [memory[b] for b in indxes] inputs = tmp_param.new(batch_size, IMAGE_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT).zero_() targets = tmp_param.new(batch_size, nb_actions).zero_() for i, (state_t, action_t, reward_t, state_t1, is_gameover_t1) in enumerate(minibatch): inputs[i] = torch.from_numpy(state_t).float() tensor = inputs[i].unsqueeze(0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() targets[i] = qvalues if is_gameover_t1: assert reward_t == -1 targets[i, action_t] = reward_t else: tensor = torch.from_numpy(state_t1).float().unsqueeze( 0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() qvalues = qvalues.cpu().numpy() targets[i, action_t] = reward_t + gamma * qvalues.max() qnetwork.train() qnetwork.zero_grad() q_values = qnetwork(inputs) loss = loss_fcn(q_values, targets) loss.backward() optimizer.step() total_loss += loss.item() current_state = initial_state if is_gameover else next_state t += 1 if t % log_frequency == 0: logger.info( "For t {}: mean score is {} max score is {} mean loss: {} number of episode: {}" .format(t, sum_scores / (episodes + 0.1), max_score, total_loss / 1000, episodes)) logger.info( "t: {} action_index: {} reward: {} max qvalue: {} total number of eposodes so far: {}" .format(t, action_indx, reward, qvalues.max(), nb_episodes)) tmp = np.array(lost_action) dnc = (tmp == 0).sum() logger.info( "Lost actions do_nothing: {} jump: {} length of memory {}". format(dnc, len(tmp) - dnc, len(memory))) tmp = np.array(win_actions) dnc = (tmp == 0).sum() logger.info("Win actions do_nothing: {} jump: {}".format( dnc, len(tmp) - dnc)) logger.info("Greedy action {} Random action {}".format( action_greedy, action_random)) action_greedy = 0 action_random = 0 lost_action = [] win_actions = [] if episodes != 0: sum_scores = 0 total_loss = 0 episodes = 0 if t % save_frequency and not args.exploiting: env.pause_game() with open("cache.p", "wb") as fh: pickle.dump((t, memory, epsilon, nb_episodes), fh) gc.collect() torch.save( { "qnetwork": qnetwork.state_dict(), "optimizer": optimizer.state_dict() }, checkpoint_path) env.resume_game() except KeyboardInterrupt: if not args.exploiting: torch.save( { "qnetwork": qnetwork.state_dict(), "optimizer": optimizer.state_dict() }, checkpoint_path) with open("cache.p", "wb") as fh: pickle.dump((t, memory, epsilon, nb_episodes), fh)
def ddpg(env: UnityEnvironment, agent=None, n_episodes=500, max_t=2000, eps_start=1.0, eps_end=0.01, eps_decay=0.995, seed=0, target=30.0): """ Deep Deterministic Policy Gradients Params ====== env (UnityEnvironment): enviroment agent (Agent): agent that is responsible for control the actions (if no agent, create a new agent) n_episodes (int): maximum number of training episodes max_t (int): max number of steps in each episode eps_start (float): starting value of epsilon, for attenuate the noise applied to the action eps_end (float): minimum value of epsilon eps_decay (float): multiplicative factor (per episode) for decreasing epsilon seed (int): random seed target (float): value expected to save the model and exit training Return ====== scores: List all scores (for all the agents) in each episode agent (Agent): Trained agent """ # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # get size of action and state action_size = brain.vector_action_space_size state_size = len(env_info.vector_observations[0]) if not agent: # create the agent if no agent is provided agent = Agent(state_size=state_size, action_size=action_size, random_seed=seed) agent.network_summary() # Print Networks Info (actor and critic) all_scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon print('*** Starting Training ***') for i_episode in tqdm(range(1, n_episodes + 1)): scores = run_single_episode(env, brain_name, agent, train_mode=True, epsilon=eps, max_t=max_t) scores_window.append(scores) # save most recent score to the window all_scores.append(scores) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon print(score_log(i_episode, scores_window, scores), end="") # Save the model each 100 or if the target is achieved if i_episode % 100 == 0 or np.mean(scores_window) >= target: print(score_log(i_episode, scores_window, scores) + '\tSaved!') agent.save() if np.mean(scores_window) >= 30: break # Finish the execution if the target is achieved return all_scores, agent
def import_input(self): print('import_input running') to_load = {} with open('/tmp/svrdata.pkl', 'rb') as f: to_load = pickle.load(f) for d in to_load.get('pos_to_put', []): try: agent = Agent.select(Agent.q.AgentName == d['name'])[0] except IndexError: try: agent = Agent(**d['data']) # karena 'id'nya tidak standar, # SQLObject bilang Not Found except SQLObjectNotFound: agent = Agent.select( Agent.q.AgentName == d['name'])[0] # Periksa apakah table pemuat Logs untuk # pos ini telah tersedia print('incoming:', agent.table_name) try: rs = conn.queryAll("SELECT SamplingDate FROM %s \ LIMIT 0, 1" % (agent.table_name)) except: try: rs = conn.queryAll("CREATE TABLE %s \ LIKE tpl_agent" % agent.table_name) except: pass for l in d['logs']: sql = "SELECT COUNT(*) FROM %s \ WHERE SamplingDate='%s' AND \ SamplingTime='%s'" % (agent.table_name, l['SamplingDate'], l['SamplingTime']) rs = conn.queryAll(sql) if rs[0][0] == 0: sql = "INSERT INTO %s (RID, ReceivedDate, \ ReceivedTime, DataType, StatusPort, \ SamplingDate, SamplingTime, Temperature, \ Humidity, Rain, Rain1, Rain2, Rain3, \ Rain4, WLevel, Wlevel1, WLevel2, \ WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \ '%s', %s, '%s', '%s', '%s', %s, %s, \ %s, %s, %s, %s, %s, %s, %s, %s, %s, \ %s, '%s', %s)" % (agent.table_name, l['RID'], l['ReceivedDate'], l['ReceivedTime'], l['DataType'], l['StatusPort'], l['SamplingDate'], l['SamplingTime'], l['Temperature'], l['Humidity'], l['Rain'], l['Rain1'], l['Rain2'], l['Rain3'], l['Rain4'], l['WLevel'], l['WLevel1'], l['WLevel2'], l['WLevel3'], l['WLevel4'], l['up_since'], l['sq']) rs = conn.query(sql) l = None try: new_pos_data = d['data'] for k in new_pos_data.keys(): setattr(agent, k, new_pos_data[k]) except: pass # POS to Del for d in to_load.get('pos_to_del', []): try: agent = Agent.select( Agent.q.AgentName == d['AgentName'])[0] agent.destroySelf() except: pass return "Ok"
def dqn(env: UnityEnvironment, n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995, seed=0, save_threshold=13.0): """ Deep Q-Learning Training Params ====== n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode eps_start (float): starting value of epsilon, for epsilon-greedy action selection eps_end (float): minimum value of epsilon eps_decay (float): multiplicative factor (per episode) for decreasing epsilon seed (int): random seed save_threshold (float): value expected to save the model and exit training """ # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # get size of action and state action_size = brain.vector_action_space_size state_size = len(env_info.vector_observations[0]) # create the agent agent = Agent(state_size=state_size, action_size=action_size, seed=seed) scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon for i_episode in tqdm(range(1, n_episodes + 1)): score = run_single_episode(env, brain_name, agent, max_t, eps, train_mode=True) scores_window.append(score) # save most recent score scores.append(score) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon print(score_log(i_episode, scores_window), end="") if i_episode % 100 == 0: if np.mean( scores_window ) >= save_threshold: # save if avg score is higher than the threshold print(score_log(i_episode, scores_window) + '\tSaved!') torch.save( agent.qnetwork_local.state_dict(), os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'results', 'checkpoint.pth')) # break else: print(score_log(i_episode, scores_window)) return scores, agent
from datetime import datetime import os from models import FreeEnergyBarrier, Agent if __name__ == '__main__': X = 200 Y = 200 LR = 0.003 GAMMA = 0.95 BATCH_SIZE = 30000 MAX_MEM = 300000 NUMBER_OF_ACTIONS = 4 EPSILON = 1.0 env = FreeEnergyBarrier(X, Y) agent = Agent(gamma=GAMMA, epsilon=EPSILON, batch_size=BATCH_SIZE, max_mem_size=500000, n_actions=NUMBER_OF_ACTIONS, eps_end=0.01, input_dims=[2], lr=LR) scores, eps_history = [], [] n_runs = 10 df = pd.DataFrame() for i in range(n_runs): score = 0 done = False observation = env.reset() run = [] while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_
def put(self): agent = Agent(**json.decode(self.request.body)) agent.put()
def POST(self): auth = web.ctx.env.get('HTTP_AUTHORIZATION') authreq = False allowed = [(u.username, u.password) for u in Authuser.select()] if auth is None: authreq = True else: auth = re.sub('^Basic', '', auth) username, password = base64.decodestring(auth).split(':') password = md5(password).hexdigest() auth = None if (username, password) in allowed: x = web.input() of = open('/tmp/svrdata.pkl', 'wb') of.write(x['svr_data']) of.close() to_load = {} with open('/tmp/svrdata.pkl', 'rb') as f: to_load = cPickle.load(f) for d in to_load.get('pos_to_put', []): try: agent = Agent.select(Agent.q.AgentName == d['name'])[0] except IndexError: try: agent = Agent(**d['data']) # karena 'id'nya tidak standar, # SQLObject bilang Not Found except SQLObjectNotFound: agent = Agent.select( Agent.q.AgentName == d['name'])[0] # Periksa apakah table pemuat Logs untuk # pos ini telah tersedia try: rs = conn.queryAll("SELECT SamplingDate FROM %s \ LIMIT 0, 1" % (agent.table_name)) except: rs = conn.queryAll("CREATE TABLE %s \ LIKE tpl_agent" % agent.table_name) for l in d['logs']: sql = "SELECT COUNT(*) FROM %s \ WHERE SamplingDate='%s' AND \ SamplingTime='%s'" % (agent.table_name, l['SamplingDate'], l['SamplingTime']) rs = conn.queryAll(sql) if rs[0][0] == 0: sql = "INSERT INTO %s (RID, ReceivedDate, \ ReceivedTime, DataType, StatusPort, \ SamplingDate, SamplingTime, Temperature, \ Humidity, Rain, Rain1, Rain2, Rain3, \ Rain4, WLevel, Wlevel1, WLevel2, \ WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \ '%s', %s, '%s', '%s', '%s', %s, %s, \ %s, %s, %s, %s, %s, %s, %s, %s, %s, \ %s, '%s', %s)" % ( agent.table_name, l['RID'], l['ReceivedDate'], l['ReceivedTime'], l['DataType'], l['StatusPort'], l['SamplingDate'], l['SamplingTime'], l['Temperature'], l['Humidity'], l['Rain'], l['Rain1'], l['Rain2'], l['Rain3'], l['Rain4'], l['WLevel'], l['WLevel1'], l['WLevel2'], l['WLevel3'], l['WLevel4'], l['up_since'], l['sq']) rs = conn.query(sql) l = None try: new_pos_data = d['data'] for k in new_pos_data.keys(): setattr(agent, k, new_pos_data[k]) except: pass # POS to Del for d in to_load.get('pos_to_del', []): try: agent = Agent.select( Agent.q.AgentName == d['AgentName'])[0] agent.destroySelf() except: pass return "Ok" else: authreq = True if authreq: web.header('WWW-Authenticate', 'Basic realm="incoming"') web.ctx.status = '401 unauthorized' return """<html>
p_rm_ob_enc=p_rm_ob_enc, p_rm_ob_rcl=p_rm_ob_rcl, n_hidden=n_hidden, n_hidden_dec=n_hidden_dec, lr=learning_rate, eta=eta, cmpt=cmpt, ) # init env task = SequenceLearning( n_param=p.env.n_param, n_branch=p.env.n_branch, pad_len=p.env.pad_len, p_rm_ob_enc=p.env.p_rm_ob_enc, p_rm_ob_rcl=p.env.p_rm_ob_rcl, def_path=p.env.def_path, def_prob=p.env.def_prob, def_tps=p.env.def_tps, similarity_cap_lag=p.n_event_remember, similarity_max=similarity_max, similarity_min=similarity_min, ) # init agent agent = Agent( input_dim=task.x_dim, output_dim=p.a_dim, rnn_hidden_dim=p.net.n_hidden, dec_hidden_dim=p.net.n_hidden_dec, dict_len=p.net.dict_len, cmpt=p.net.cmpt ) optimizer_sup = torch.optim.Adam(agent.parameters(), lr=p.net.lr) scheduler_sup = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer_sup, factor=1 / 2, patience=30, threshold=1e-3, min_lr=1e-8, verbose=True) optimizer_rl = torch.optim.Adam(agent.parameters(), lr=p.net.lr) scheduler_rl = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer_rl, factor=1 / 2, patience=30, threshold=1e-3, min_lr=1e-8, verbose=True) # create logging dirs
def test_agent_initialization(self): agent = Agent(0) self.assertEqual(agent.direction, Direction.NORTH, 'incorrect initial agent direction') self.assertEqual(agent.move, False, 'incorrect initial agent move')
task = SequenceLearning( n_param=p.env.n_param, n_branch=p.env.n_branch, p_rm_ob_enc=p_test, p_rm_ob_rcl=p_test, similarity_cap_lag=p.n_event_remember, similarity_max=similarity_max_test, similarity_min=similarity_min_test, ) x_dim = task.x_dim if attach_cond != 0: x_dim += 1 # load the agent back agent = Agent(input_dim=x_dim, output_dim=p.a_dim, rnn_hidden_dim=p.net.n_hidden, dec_hidden_dim=p.net.n_hidden_dec, dict_len=p.net.dict_len) agent, optimizer = load_ckpt(epoch_load, log_subpath['ckpts'], agent) # test the model np.random.seed(seed) torch.manual_seed(seed) [results, metrics, XY] = run_tz(agent, optimizer, task, p, n_examples_test, supervised=False, learning=False, get_data=True,
def test_agent_gridlock2(self): world = World(3, 3) # agent moving north agentN = Agent(0) agentN.direction = Direction.NORTH agentN.move = True # agent moving north2 agentN2 = Agent(1) agentN2.direction = Direction.NORTH agentN2.move = True # agent moving north3 agentN3 = Agent(2) agentN3.direction = Direction.NORTH agentN3.move = True # agent moving south agentS = Agent(3) agentS.direction = Direction.SOUTH agentS.move = True # agent moving east agentE = Agent(4) agentE.direction = Direction.EAST agentE.move = True # agent moving west agentW = Agent(5) agentW.direction = Direction.WEST agentW.move = True # agent moving west2 agentW2 = Agent(6) agentW2.direction = Direction.WEST agentW2.move = True # setup world.agents[(1, 1)] = agentN world.agents[(0, 0)] = agentN2 world.agents[(2, 0)] = agentN3 world.agents[(0, 2)] = agentS world.agents[(0, 1)] = agentE world.agents[(1, 2)] = agentW world.agents[(2, 1)] = agentW2 # update & test world.update() self.assertEqual(world.agents[(1, 1)], agentN, '(1) world incorrectly updated north agent') self.assertEqual(world.agents[(0, 0)], agentN2, '(1) world incorrectly updated north2 agent') self.assertEqual(world.agents[(2, 0)], agentN3, '(1) world incorrectly updated north3 agent') self.assertEqual(world.agents[(0, 2)], agentS, '(1) world incorrectly updated south agent') self.assertEqual(world.agents[(0, 1)], agentE, '(1) world incorrectly updated east agent') self.assertEqual(world.agents[(1, 2)], agentW, '(1) world incorrectly updated west agent') self.assertEqual(world.agents[(2, 1)], agentW2, '(1) world incorrectly updated west2 agent') world.update() self.assertEqual(world.agents[(1, 1)], agentN, '(2) world incorrectly updated north agent') self.assertEqual(world.agents[(0, 0)], agentN2, '(2) world incorrectly updated north2 agent') self.assertEqual(world.agents[(2, 0)], agentN3, '(2) world incorrectly updated north3 agent') self.assertEqual(world.agents[(0, 2)], agentS, '(2) world incorrectly updated south agent') self.assertEqual(world.agents[(0, 1)], agentE, '(2) world incorrectly updated east agent') self.assertEqual(world.agents[(1, 2)], agentW, '(2) world incorrectly updated west agent') self.assertEqual(world.agents[(2, 1)], agentW2, '(2) world incorrectly updated west2 agent') self.assertEqual(len(world.agents), 7, 'World has incorrect number of agents')