def transaction__purge_accounts(self, args): import os from sample_agent import Agent from sample_config import Config from sample_transaction import Transaction text = "This test checks transaction.purge_accounts \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info( 'START logging for test transaction__purge_accounts in run: %s', environment_directory + identifier + ".xml") # Construct a config config = Config() agent_one = Agent("agent_one", {"test": "parameters"}, {"test": "variables"}) agent_two = Agent("agent_two", {"test": "parameters"}, {"test": "variables"}) config.agents = [] config.agents.append(agent_one) config.agents.append(agent_two) # # TESTING # print("Before purging the accounts") transaction = Transaction() transaction.this_transaction("type", "asset", "agent_one", "agent_two", 0, 2, 3, 4) transaction.add_transaction(config) transaction = Transaction() transaction.this_transaction("type", "asset", "agent_one", "agent_two", 1, 2, 3, 4) transaction.add_transaction(config) print(config.get_agent_by_id("agent_one")) print(config.get_agent_by_id("agent_two")) print("After clearing one bank's accounts") transaction.purge_accounts(config) print(config.get_agent_by_id("agent_one")) print(config.get_agent_by_id("agent_two"))
def transaction__write_transaction(self, args): import os from sample_agent import Agent from sample_config import Config from sample_transaction import Transaction text = "This test checks transaction.write_transaction \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info( 'START logging for test transaction__write_transaction in run: %s', environment_directory + identifier + ".xml") # Construct a config config = Config() agent_one = Agent("agent_one", {"test": "parameters"}, {"test": "variables"}) agent_two = Agent("agent_two", {"test": "parameters"}, {"test": "variables"}) config.agents = [] config.agents.append(agent_one) config.agents.append(agent_two) # # TESTING # print("Creating a transaction") transaction = Transaction() print("Assigning values") transaction.this_transaction("type", "asset", "agent_one", "agent_two", 1, 2, 3, 4) print("Adding the transaction to the books") transaction.add_transaction(config) print("Printing transaction:") print(transaction.write_transaction())
track_number = 0 # track_number = 0,1,2,...,18 episode_count = 10 max_steps = 50 reward = 0 done = False step = 0 # Generate a Torcs environment env = TorcsEnv(vision=vision, throttle=throttle, brake=brake, gear_change=gear_change, port=port, track_number=track_number) agent = Agent(env) print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. for j in range(max_steps): action = agent.act(ob)
from gym_torcs import TorcsEnv from sample_agent import Agent import numpy as np vision = True episode_count = 10 max_steps = 50 reward = 0 done = False step = 0 # Generate a Torcs environment env = TorcsEnv(vision=vision, throttle=False) agent = Agent(1) # steering only print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() # env.reset() return current state including # total_reward = 0. for j in range(max_steps):
# -*- coding: utf-8 -*- from Gym_LineTracer import LineTracerEnv, APPWINDOW from sample_agent import Agent import wx from threading import Thread # Define Agent And Environment agent = Agent(2) env = LineTracerEnv() class SimulationLoop(Thread): def __init__(self): Thread.__init__(self) self.start() # start the thread def run(self): observation = env.reset() for t in range(1000): env.render() print(observation) action = agent.act() observation, reward, done, info = env.step(action) if done: print("Episode finished after {} timesteps".format(t + 1)) break wx.Yield() env.monitor.close()
def initialize_agents(self): for agent_iterator in range(0, int(self.model_parameters['num_agents'])): temp_agent = Agent(str(agent_iterator), {}, {}) self.agents.append(temp_agent)
reward = 0 done = False #change directory to file path os.chdir(os.path.dirname(os.path.abspath(__file__))) if __name__ == "__main__": log_level = logging.INFO logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=log_level, datefmt='%Y-%m-%d %H:%M:%S') logging.info("simstar env init") env = SimstarEnv(synronized_mode=True, speed_up=5, hz=1) agent = Agent(dim_action=3) logging.info("entering main loop") for ee in range(max_episodes): episode_total_reward = 0 logging.info("reset environment. eposde number: %d", ee + 1) time.sleep(2) observation = env.reset() for ii in range(max_steps): action = agent.act(observation, reward, done) action[2] = 0.0 print(ii, action) debug_action = [0.0, 1.0, 0.0] action = debug_action observation, reward, done, _ = env.step(action)
def compute_gradient(traj, baseline, max_current_steps): gradient = np.zeros((n_action, n_states)) for i in range(avg_episode): single_gradient = np.zeros((n_action,n_states)) for j in range(max_current_steps): if j < traj[i+1][3]: single_traj = traj[i+1] single_gradient = single_gradient + single_traj[0][j] * (single_traj[4][j] - baseline[j]) gradient = gradient + single_gradient return gradient / avg_episode # Generate a Torcs environment print ("Creating Torcs environment") env = TorcsEnv(vision=vision, throttle=False) print("Torcs env created--------------------") agent = Agent(3) # now we use steering only, but we might use throttle and gear #Init theta vector theta = np.random.normal(0, 0.01,(n_action,n_states)) performance = np.array([0]) #Baselines sum baseline_n = 0 baseline_d = 0 #max of steps per trajectory max_current_steps = 0 #Vector to compute gradient traj = np.array([[0, 0, 0, 0, 0]])