def simulate(num_epochs:int): ''' runs our simulation with 2 actors :param num_epochs: the number of iterations we want to run :type num_epochs: int :return: None ''' data = {"actor1_money":[], "actor2_money":[], "actor1_choice":[], "actor2_choice":[]} player1 = Actor(avaliable_funds = 10, lower_threshhold=5, betrayal_rate=.03) player2 = Actor(avaliable_funds = 10, lower_threshhold=5, betrayal_rate=.90) for i in range(num_epochs): choice1 = player1.make_choice() choice2 = player2.make_choice() processChoice(choice1, choice2, player1, player2) data["actor1_money"].append(player1.avaliable_funds) data["actor2_money"].append(player2.avaliable_funds) data["actor1_choice"].append(choice1) data["actor2_choice"].append(choice2) player1.update() player2.update() df = pd.DataFrame(data) print(df) plt.plot(range(0,num_epochs), df['actor1_money']) plt.plot(range(0,num_epochs), df['actor2_money']) plt.show() plt.clf() counts1 = df.groupby(['actor1_choice'])['actor1_choice'].count() counts2 = df.groupby(['actor2_choice'])['actor2_choice'].count() print(counts2)
def __init__(self, state_size, action_size, seed=0): '''Initlize the Agent. Parameters ---------- state_size : int The dimension of each state action_size : int The dimension of each action seed : int The random seed used to generate random numbers. ''' self.state_size = state_size self.action_size = action_size random.seed(seed) #actor gives the best action for given state self.actor_local = Actor(state_size, action_size, seed).to(device) self.actor_target = Actor(state_size, action_size, seed).to(device) #evaluates the action self.critic_local = Critic(state_size, action_size, seed).to(device) self.critic_target = Critic(state_size, action_size, seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=ACTOR_LEARNING_RATE) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=CRITIC_LEARNING_RATE, weight_decay=WEIGHT_DECAY) #Replay Memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) #Noise self.noise = OUNoise(action_size,seed) self.t_step = 0
def setUpClass(cls): cls.world_one = World(7, 9) cls.world_two = World(10, 15) cls.actor_one = Actor() cls.actor_two = Actor() cls.actor_three = Actor() cls.world_two.addObject(cls.actor_two, 5, 10)
def __init__(self, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, random_seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, state_size, action_size): self.epsilon = 0.8 self.state_size = state_size self.action_size = action_size # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process # self.exploration_mu = 0 # self.exploration_theta = 0.15 # self.exploration_sigma = 0.2 # self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 20000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.95 # discount factor self.tau = 0.002 # for soft update of target parameters self.stats = np.array([])
def __init__(self, eventQueue, commentatorQueue): Thread.__init__(self) self.eventQueue = eventQueue self.commentatorQueue = commentatorQueue self.commentators = [] self.commentators.append(Actor("Rivington the 4th", "Brian", 0)) self.commentators.append( Actor( random.choice([ "Robo shocks", "the queen of twitch", "Your mom", "Pissed off", "undefined" ]), "Salli", 1)) riv_bot = self.commentators[0] salli_bot = self.commentators[1] self.reset() with open("Config.json") as configFile: config = json.load(configFile) self.numberTh = [ "first", "second", "third", "forth", "fifth", "sixth", "seventh", "eigth", "ninth", "tenth", "eleventh" ]
def __init__(self, gamma, memory, s, a, tau, learningRate=1e-3, criticpath=None, actorpath=None): self.gamma = gamma self.memory = ReplayMemory(memory) self.actor = Actor(state=s, actions=a) self.critic = Critic(state=s, actions=a) if (not (criticpath == None)): self.critic.load_state_dict(torch.load(criticpath)) if (not (actorpath == None)): self.actor.load_state_dict(torch.load(actorpath)) self.targetActor = Actor(state=s, actions=a) self.targetActor.load_state_dict(self.actor.state_dict()) self.targetCritic = Critic(state=s, actions=a) self.targetCritic.load_state_dict(self.critic.state_dict()) self.tau = tau self.actorOptimizer = optim.Adam(self.actor.parameters(), learningRate) self.criticOptimizer = optim.Adam(self.critic.parameters(), learningRate) #more a dimensionality thing self.state = s self.action = a self.OUarray = np.zeros((1000, self.action), dtype="f") self.step = 0
def __init__(self, state_size, action_size, num_agents): """ Params ====== state_size (int): dimension of each state action_size (int): dimension of each action num_agents (int): number of agents in the environment """ random_seed = 10.0 self.state_size = state_size self.action_size = action_size self.random_seed = random.seed(random_seed) self.num_agents = num_agents # Replay memory self.memory = ReplayBuf(action_size, BUFFER_SIZE, BATCH_SIZE, self.random_seed) # Actor Networks self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Make sure the Actor Target Network has the same weight values as the Local Network for target, local in zip(self.actor_target.parameters(), self.actor_local.parameters()): target.data.copy_(local.data) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size * num_agents, action_size * num_agents, random_seed).to(device) self.critic_target = Critic(state_size * num_agents, action_size * num_agents, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) """ self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) """ # Make sure the Critic Target Network has the same weight values as the Local Network for target, local in zip(self.critic_target.parameters(), self.critic_local.parameters()): target.data.copy_(local.data) self.noise = Ornstein_Uhlenbeck_Noise(action_size, random_seed)
def load_weights(self, option=None): if (option == None): self.trained = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr, self.network) self.trained.model.load_weights('model_weights.h5') else: self.trained = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr, self.network) self.trained.model.load_weights('weights-best.hdf5') print(self.trained.model.summary())
def __init__(self, transactionXmlNode, simulation, tid=None, ppid=None, entitiesXmlNode=None, actor=None, entities=None, xcontext=None): super().__init__(sim=simulation) self.simulation = simulation try: self.entitiesXmlNode = entitiesXmlNode if entitiesXmlNode is not None else XmlSource( ) self.template = transactionXmlNode.get("id") self.pid = self.simulation.getTId() self.id = tid if tid is not None else self.pid self.ppid = ppid if actor is not None: self.actor = actor else: path, base = transactionXmlNode.getWithBase("actor") if path is not None: self.actor = Actor(self.simulation, xmlLoader(path, base=base), extraProperties=True) else: self.actor = Actor(self.simulation, XmlSource()) self.startTime = None if xcontext is None: self.xcontext = XValueContext( lambda: self.simulation.now() - self.startTime) else: self.xcontext = xcontext self.t = self.xcontext.t path, base = transactionXmlNode.getWithBase("entities") if path is not None: self.entitiesXmlNode.append(xmlLoader(path, base=base)) if entities is None: self.factory = EntityFactory(entitiesXmlNode) self.entities = populateEntities(self.factory, self, transactionXmlNode) else: for entity in entities: entity.setTransaction(self) self.entities = entities except Exception as e: print(e) traceback.print_exc(file=sys.stderr)
def main(): world = World(7, 9) print("Number of objects in cell(4,4) = %d" % world.addObject(Actor(), 4, 4)) print("Number of objects in cell(2,3) = %d" % world.addObject(Actor(), 2, 3)) print("Number of objects in cell(4,4) = %d" % world.addObject(Actor(), 4, 4)) print("Number of objects in cell(4,4) = %d" % world.addObject(Actor(), 4, 4)) print(world) print("%r" % world)
def test_exceptions(self): with self.assertRaises(NameError): self.world_one.addObject(None, 6, 5) with self.assertRaises(ValueError): self.world_one.addObject(Actor(), 10, 5) with self.assertRaises(ValueError): self.world_one.addObject(Actor(), -1, 5) with self.assertRaises(ValueError): self.world_one.addObject(Actor(), 6, 10) with self.assertRaises(ValueError): self.world_one.addObject(Actor(), 10, -1) with self.assertRaises(SyntaxError): for ind in range(6): self.world_one.addObject(Actor(), 6, 5)
def consume(self): """ Finds the IMDb id for a film if possible :param input_q: queue of input items :param output_q: queue of output items :return: Nothing """ film_todo = self.input_q.get() self.input_q.task_done() film_todo.set_non_aggregate_fields() self.output_q.put(film_todo, film_todo.id) for a in film_todo.get_actors(): self.actor_ins.put(Actor(a, False), a) self.actor_ins.put(Actor(film_todo.director, True), "director-{0}".format(film_todo.director))
def __init__(self, state_size, action_size, num_agents, random_seed): """ Initialize an Agent Params ====== state_size (int): state dimension action_size (int): action dimension num_agents (int): simultaneous running agents random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.num_agents = num_agents random.seed(random_seed) # Actor Network and its target network self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network and its target network self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise object self.noise = OUNoise((num_agents, action_size), random_seed) # Replay Memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, EXPERIENCES_PER_SAMPLING, device, random_seed) # Initialize time step (for updating every UPDATE_NN_EVERY steps) self.t_step_nn = 0 # Initialize time step (for updating every UPDATE_MEM_PAR_EVERY steps) self.t_step_mem_par = 0 # Initialize time step (for updating every UPDATE_MEM_EVERY steps) self.t_step_mem = 0
def __init__(self, sess, scale_u, params): self.sess = sess self.scale_u = scale_u self.__dict__.update(params) # CREATE INPUT PLACEHOLDERS self.create_input_placeholders() # INITIALIZE ACTOR & CRITIC MODELS self.agents = [ Actor(self.sess, self.inputs, i, **self.actor_params) for i in [1, 2, 3] ] self.critic = Critic(self.sess, self.inputs, **self.critic_params) # INITIALIZE EXPLORATION MODEL self.noise_params = { k: np.fromstring(v, sep=",", dtype="f") for k, v in self.noise_params.items() } self.noise = [Noise(**self.noise_params) for _ in range(3)] # INITIALIZE REPLAY BUFFER self.memory = Memory(self.memory_size) # AVERAGE AGENT POLICIES avg_pi = [ tf.reduce_mean(i, axis=0) for i in zip(*[x.pi.net_params for x in self.agents]) ] self.avg_op = [ tf.assign(i, j) for x in self.agents for i, j in zip(x.pi.net_params, avg_pi) ]
def read_csv_file(self): with open(self._file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) for row in movie_file_reader: # Title Add title = row['Title'] release_year = int(row['Year']) new_movie = Movie(title, release_year) self.dataset_of_movies.add(new_movie) # Actor Add actors = row['Actors'] actors_list = actors.split(",") for people in actors_list: new_actor = Actor(people.strip()) if new_actor not in self.dataset_of_actors: self.dataset_of_actors.add(new_actor) # Director Add director = row['Director'] new_director = Director(director) if new_director not in self.dataset_of_directors: self.dataset_of_directors.add(new_director) # Genre Add genre = row['Genre'] genre_list = genre.split(",") for g in genre_list: new_genre = Genre(g.strip()) if new_genre not in self.dataset_of_genres: self.dataset_of_genres.add(new_genre)
def _actor(self, match): actorName = match.group(1) parentName = match.group(2) replaces = match.group(3) newActor = Actor(actorName, parentName, replaces) self.actors += [newActor] self.actorMap[actorName] = newActor
def __fill_actors(raw_actors: str) -> List[Actor]: actors: List[Actor] = [] raw_actors = json.loads(raw_actors) for actor in raw_actors: actors.append(Actor(actor)) return actors
def makeDefault(self, meshSrc="Empty"): """Create an Actor with a default set of components, and specified mesh.""" actor = Actor(self.renderer) actor.components['Mesh'] = Mesh.getMesh(meshSrc) # NOTE Meshes are currently shared, therefore not linked to individual actors actor.components['Transform'] = Transform(actor=actor) actor.components['Material'] = Material(actor=actor) return actor
def read_csv_file(self): with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile: movie_file_reader = csv.DictReader(csvfile) index = 0 for row in movie_file_reader: movie = Movie(row["Title"], int(row["Year"])) movie.description = row["Description"] movie.runtime_minutes = int(row["Runtime (Minutes)"]) self.__total_runtime_minutes += int(row["Runtime (Minutes)"]) self.__runtime_minutes_number_of_movies += 1 if row["Rating"] != "N/A": movie.rating = float(row['Rating']) self.__total_rating += float(row['Rating']) self.__rating_number_of_movies += 1 if row["Votes"] != "N/A": movie.votes = int(row["Votes"]) self.__total_votes += int(row["Votes"]) self.__votes_number_of_movies += 1 if row["Revenue (Millions)"] != "N/A": movie.revenue_millions = float(row["Revenue (Millions)"]) self.__total_revenue_millions += float( row["Revenue (Millions)"]) self.__revenue_millions_number_of_movies += 1 if row["Metascore"] != "N/A": movie.metascore = int(row["Metascore"]) self.__total_metascore += int(row["Metascore"]) self.__metascore_number_of_movies += 1 self.__dataset_of_movies.append(movie) self.__dataset_of_directors.add(Director(row["Director"])) for actor in row["Actors"].split(","): self.__dataset_of_actors.add(Actor(actor.strip())) for genre in row["Genre"].split(","): self.__dataset_of_genres.add(Genre(genre.strip())) index += 1
def __init__(self, state_size, action_size, random_seed, num_agents, device, hps): self.noise = OUNoise(action_size, random_seed) self.state_size = state_size self.action_size = action_size self.num_agents = num_agents self.count = 0 # setting the hyperparameters self.batch_size = hps.batch_size self.tau = hps.tau self.lr_actor = hps.lr_actor self.lr_critic = hps.lr_critic self.update_every = hps.update_every # shared replay buffer self.memory = ReplayBuffer(BUFFER_SIZE, self.batch_size, random_seed) # Critic networks - 1 network (local + target) per agent self.critics = [ Critic(state_size, action_size, random_seed, self.lr_critic, WEIGHT_DECAY, device) for i in range(num_agents) ] # Actor networks - 1 network (local + target) per agent self.actors = [ Actor(state_size, action_size, random_seed, self.lr_actor, self.noise, device) for i in range(num_agents) ]
def start(GAME_NAME, MAX_EPISODE): env = gym.make(GAME_NAME) # create enviornment actor = Actor(env.observation_space, env.action_space) # create actor critic = Critic(env.observation_space, env.action_space) # create critic reward_per_epi = [] durations_per_epi = [] l_A = [] l_C = [] MAX_EPISODE = MAX_EPISODE RENDER = False MAX_EP_STEPS = 1000 #DISPLAY_REWARD_THRESHOLD=200 #print ("begin.\n\n") for i_episode in range(MAX_EPISODE): s = env.reset() critic.reset() actor.reset() track_r = [] for t in count(): if RENDER: env.render() a = actor.choose_action(s) s_, r, done, info = env.step(a) #if done: r = -20 # Penalty if die track_r.append(r) td_error, abs_error = critic.learn(s, r, s_) # Critic Learn actor.learn(s, a, td_error) # Actor Learn s = s_ #print ("... in episode (%d) step (%d)" % (i_episode+1,t)) if is_ipython: display.clear_output(wait=True) display.display(plt.gcf()) #env.render() if done or t >= MAX_EP_STEPS: # Episode finished, print results ep_rs_sum = sum(track_r) #if 'running_reward' not in globals(): # running_reward = ep_rs_sum #else: # running_reward = running_reward * 0.95 + ep_rs_sum * 0.05 #if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True # rendering running_reward_avg = ep_rs_sum / float(t) reward_per_epi.append(ep_rs_sum) durations_per_epi.append(t) l_A.append(np.mean(actor._loss_)) l_C.append(np.mean(critic._loss_)) #print("episode:", i_episode, " reward:", ep_rs_sum) #plot(reward_per_epi, durations_per_epi, l_A, l_C) break return reward_per_epi, durations_per_epi, l_A, l_C
def add_actor_to_map(self, tile, tile_pos, add_to_enemy_list=False): delay = choice([x / 1000. for x in range(80, 200, 5)]) actor = Actor(tile.col, tile.row, pos=(tile_pos[0] + 10, tile_pos[1] + 253), anim_delay=delay) self.enemy_list.append(actor) if add_to_enemy_list: self.add_widget(actor)
def DoEpisodes(episodes, boardSize, maxRemovePegs, boardType, epsilon=0.5, learningRate=0.9, policyTable={}, valueTable={}): TotalError = 0 stepsTaken = 1 actor = Actor(0.9, learningRate, epsilon, policyTable) critic = Critic(0.9, learningRate, valueTable) for i in range(episodes): world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType) actor.resetEligibility() critic.resetEligibility() critic.tdError = 0 reward = 0 state = world.stateToHash() chosenAction = actor.ChooseActionByPolicy(world) while True: reward = world.makeAction(chosenAction) nextAction = actor.ChooseActionByPolicy(world) nextState = world.stateToHash() actor.eligibility[state + str(chosenAction)] = 1 critic.updateTDError(reward, state, nextState) critic.eligibility[state] = 1 TotalError += abs(critic.tdError) for SAP in world.getGameLog(): critic.updateValue(SAP) critic.decayEligibility(SAP) actor.updatePolicy(SAP, critic.tdError) actor.decayEligibility(SAP) if reward == 10: #print(world.startRemoveLocations, stepsTaken, world.getGameLog()[-1].stateHash) updateSolvableStates(boardType + str(boardSize), world.startRemoveLocations) if chosenAction == None: break chosenAction = nextAction state = nextState stepsTaken += 1 print('Episode:', i, 'MeanError', TotalError / stepsTaken) WriteTables(critic.getValueTable(), actor.getPolicyTable(), boardType, boardSize)
def CreateActors(self, actorNumber): self.actorList = numpy.zeros(0, dtype=Actor) for name in range(0, actorNumber): #Actor obj takes name, self.actorList = numpy.append(self.actorList, [ Actor(name, self.market, self.resource, self.min_init_amountToSell, self.max_init_amountToSell, self.min_init_priceDivergence, self.max_init_priceDivergence, self.currency) ])
def __init__(self): tf.reset_default_graph() self.sess = tf.Session() self.actor = Actor(self.sess, \ n_features=Config.PLAYER_DIMENSION*(Config.DEFENDER_COUNT+Config.INTRUDER_COUNT), \ lr=Config.LEARNING_RATE_START, action_bound=[-math.pi, math.pi]) self.critic = Critic(self.sess, \ n_features=Config.PLAYER_DIMENSION*(Config.DEFENDER_COUNT+Config.INTRUDER_COUNT), \ lr=Config.LEARNING_RATE_START) self.sess.run(tf.global_variables_initializer())
def __init__(self, env, batchSize = 10, bufferSize = 100, gamma = 0.98, actorLR = 1e-4, criticLR = 1e-3, maxSteps = 200, targetUpdate = 1e-3, epsilon = 1, decay = 0.99, rewardScale = 1e-3, logFile = 'run.log'): self.env = env self.gamma = gamma self.batchSize = batchSize self.bufferSize = bufferSize self.maxSteps = maxSteps + 1 self.rewardScale = rewardScale self.epsilon = epsilon self.decay = decay # Useful helpers. self.actionDim = self.env.action_space.shape[0] self.stateDim = self.env.observation_space.shape[0] self.featureDim = self.actionDim + self.stateDim self.minAction = self.env.action_space.low self.maxAction = self.env.action_space.high # For scaling output action values. self.actionBiasZeroOne = self.minAction self.actionScaleZeroOne = self.maxAction - self.minAction self.actionBiasTanH = (self.maxAction + self.minAction) / 2.0 self.actionScaleTanH = self.maxAction - self.actionBiasTanH # Initialize noise process. self.noise = OUNoise(self.actionDim) # Initialize replay buffer. self.buffer = ReplayBuffer(self.bufferSize) # Initialize logging. logging.basicConfig(filename = logFile, level = logging.INFO, format = '[%(asctime)s] %(message)s', datefmt = '%m/%d/%Y %I:%M:%S %p') logging.info('Initializing DRPG agent with passed settings.') # Tensorflow GPU optimization. config = tf.ConfigProto() # GPU fix? config.gpu_options.allow_growth = True self.sess = tf.Session(config = config) from keras import backend as K K.set_session(self.sess) # Make actor network (creates target model internally). self.actor = Actor(self.sess, self.maxSteps, self.featureDim, self.actionDim, self.batchSize, targetUpdate, actorLR, self.actionScaleTanH, self.actionBiasTanH) # Make critic network (creates target model internally). self.critic = Critic(self.sess, self.maxSteps, self.featureDim, self.actionDim, self.batchSize, targetUpdate, actorLR)
def __init__(self, params): self.action_size = params['action_size'] self.state_size = params['state_size'] self.num_agents = params['num_agents'] self.buffer_size = params['buffer_size'] self.batch_size = params['batch_size'] self.__gamma = params['gamma'] self.__tau = params['tau'] self.__update_every = params['update_every'] self.__save_to = params['save_to'] self.__memory = ReplayBuffer(self.buffer_size, self.batch_size) self.__lr = params['lr'] self.noise_type = params['noise_type'] actor_params = dict() actor_params['arch_params_actor'] = params['arch_params_actor'] actor_params['action_size'] = self.action_size actor_params['state_size'] = self.state_size actor_params['eps'] = params['eps'] actor_params['eps_decay'] = params['eps_decay'] actor_params['eps_min'] = params['min_eps'] actor_params['noise_type'] = params['noise_type'] self.actor = Actor(actor_params) self.actor_target = Actor(actor_params) self.optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.__lr) self.scheduler_actor = optim.lr_scheduler.StepLR(self.optimizer_actor, step_size=100, gamma=0.95) critic_params = dict() critic_params['arch_params_critic'] = params['arch_params_critic'] critic_params['action_size'] = self.action_size critic_params['state_size'] = self.state_size self.critic = Critic(critic_params) self.critic_target = Critic(critic_params) self.optimizer_critic = optim.Adam(self.critic.parameters(), lr=self.__lr) self.scheduler_critic = optim.lr_scheduler.StepLR(self.optimizer_actor, step_size=100, gamma=0.95) self.__t = 0
def __init__(self, state_size, action_size, action_low, action_high): # self.task = task self.state_size = state_size self.action_size = action_size self.action_low = action_low self.action_high = action_high # learning rates self.lr_actor = 1e-4 self.lr_critic = 1e-3 # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.lr_actor) self.actor_target = Actor(self.state_size, self.action_size, self.lr_actor) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, self.lr_critic) self.critic_target = Critic(self.state_size, self.action_size, self.lr_critic) # store model architecture of actor and critic locally # keras.utils.plot_model(self.actor_local.model, '/home/danie/catkin_ws/src/ddpg/src/actor.png', show_shapes=True) # keras.utils.plot_model(self.critic_local.model, '/home/danie/catkin_ws/src/ddpg/src/critic.png', show_shapes=True) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Initialize OU noise self.noise = OUNoise(action_size=self.action_size) # Currently testing with Gaussian noise instead of OU. Parameters for Gaussian follow self.noise_mean = 0.0 self.noise_stddev = 0.2 # Initialize replay buffer self.buffer_size = 1e6 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Parameters for DDPG self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters
def main(args=None): numItr = 5 if len(args) > 1: numItr = (args[1]) print('Simulation of MyWorld') world = MyWorld() for x in range(numItr): world.act() obj = world.getObjects() for each in obj: each.act() print('Simulation of World') world = World(100, 100) world.addObject(Actor(), 10, 10) world.addObject(Actor(), 90, 90) for x in range(numItr): world.act() obj = world.getObjects() for each in obj: each.act()