def simulate(num_epochs:int):
    '''
    runs our simulation with 2 actors
    :param num_epochs: the number of iterations we want to run
    :type num_epochs: int
    :return: None
    '''
    data = {"actor1_money":[], "actor2_money":[], "actor1_choice":[], "actor2_choice":[]}

    player1 = Actor(avaliable_funds = 10, lower_threshhold=5, betrayal_rate=.03)
    player2 = Actor(avaliable_funds = 10, lower_threshhold=5, betrayal_rate=.90)

    for i in range(num_epochs):
        choice1 = player1.make_choice()
        choice2 = player2.make_choice()
        processChoice(choice1, choice2, player1, player2)
        data["actor1_money"].append(player1.avaliable_funds)
        data["actor2_money"].append(player2.avaliable_funds)
        data["actor1_choice"].append(choice1)
        data["actor2_choice"].append(choice2)
        player1.update()
        player2.update()

    df = pd.DataFrame(data)
    print(df)
    plt.plot(range(0,num_epochs), df['actor1_money'])
    plt.plot(range(0,num_epochs), df['actor2_money'])
    plt.show()

    plt.clf()
    counts1 = df.groupby(['actor1_choice'])['actor1_choice'].count()
    counts2 = df.groupby(['actor2_choice'])['actor2_choice'].count()

    print(counts2)
Пример #2
0
	def __init__(self, state_size, action_size, seed=0):
		'''Initlize the Agent.
		
		Parameters
		----------
		state_size : int
			The dimension of each state
		
		action_size : int
			The dimension of each action
		
		seed : int
			The random seed used to generate random numbers.
		'''
		self.state_size = state_size
		self.action_size = action_size
		random.seed(seed)

		#actor gives the best action for given state
		self.actor_local = Actor(state_size, action_size, seed).to(device)
		self.actor_target = Actor(state_size, action_size, seed).to(device)

		#evaluates the action
		self.critic_local = Critic(state_size, action_size, seed).to(device)
		self.critic_target = Critic(state_size, action_size, seed).to(device)

		self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=ACTOR_LEARNING_RATE)
		self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=CRITIC_LEARNING_RATE, weight_decay=WEIGHT_DECAY)

		#Replay Memory
		self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

		#Noise
		self.noise = OUNoise(action_size,seed)
		self.t_step = 0
Пример #3
0
 def setUpClass(cls):
     cls.world_one = World(7, 9)
     cls.world_two = World(10, 15)
     cls.actor_one = Actor()
     cls.actor_two = Actor()
     cls.actor_three = Actor()
     cls.world_two.addObject(cls.actor_two, 5, 10)
Пример #4
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
Пример #5
0
    def __init__(self, state_size, action_size):
        self.epsilon = 0.8
        self.state_size = state_size
        self.action_size = action_size

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size)
        self.actor_target = Actor(self.state_size, self.action_size)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        #         self.exploration_mu = 0
        #         self.exploration_theta = 0.15
        #         self.exploration_sigma = 0.2
        #         self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 20000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.95  # discount factor
        self.tau = 0.002  # for soft update of target parameters

        self.stats = np.array([])
Пример #6
0
    def __init__(self, eventQueue, commentatorQueue):
        Thread.__init__(self)

        self.eventQueue = eventQueue
        self.commentatorQueue = commentatorQueue

        self.commentators = []
        self.commentators.append(Actor("Rivington the 4th", "Brian", 0))
        self.commentators.append(
            Actor(
                random.choice([
                    "Robo shocks", "the queen of twitch", "Your mom",
                    "Pissed off", "undefined"
                ]), "Salli", 1))

        riv_bot = self.commentators[0]
        salli_bot = self.commentators[1]

        self.reset()

        with open("Config.json") as configFile:
            config = json.load(configFile)

        self.numberTh = [
            "first", "second", "third", "forth", "fifth", "sixth", "seventh",
            "eigth", "ninth", "tenth", "eleventh"
        ]
Пример #7
0
    def __init__(self,
                 gamma,
                 memory,
                 s,
                 a,
                 tau,
                 learningRate=1e-3,
                 criticpath=None,
                 actorpath=None):
        self.gamma = gamma
        self.memory = ReplayMemory(memory)
        self.actor = Actor(state=s, actions=a)
        self.critic = Critic(state=s, actions=a)
        if (not (criticpath == None)):
            self.critic.load_state_dict(torch.load(criticpath))
        if (not (actorpath == None)):
            self.actor.load_state_dict(torch.load(actorpath))
        self.targetActor = Actor(state=s, actions=a)
        self.targetActor.load_state_dict(self.actor.state_dict())
        self.targetCritic = Critic(state=s, actions=a)
        self.targetCritic.load_state_dict(self.critic.state_dict())
        self.tau = tau

        self.actorOptimizer = optim.Adam(self.actor.parameters(), learningRate)
        self.criticOptimizer = optim.Adam(self.critic.parameters(),
                                          learningRate)
        #more a dimensionality thing
        self.state = s
        self.action = a
        self.OUarray = np.zeros((1000, self.action), dtype="f")
        self.step = 0
Пример #8
0
    def __init__(self, state_size, action_size, num_agents):
        """
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            num_agents (int): number of agents in the environment
        """
        random_seed = 10.0
        self.state_size = state_size
        self.action_size = action_size
        self.random_seed = random.seed(random_seed)
        self.num_agents = num_agents

        # Replay memory
        self.memory = ReplayBuf(action_size, BUFFER_SIZE, BATCH_SIZE,
                                self.random_seed)

        # Actor Networks
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Make sure the Actor Target Network has the same weight values as the Local Network
        for target, local in zip(self.actor_target.parameters(),
                                 self.actor_local.parameters()):
            target.data.copy_(local.data)

        # Critic Network (w/ Target Network)

        self.critic_local = Critic(state_size * num_agents,
                                   action_size * num_agents,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size * num_agents,
                                    action_size * num_agents,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)
        """
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
        """

        # Make sure the Critic Target Network has the same weight values as the Local Network
        for target, local in zip(self.critic_target.parameters(),
                                 self.critic_local.parameters()):
            target.data.copy_(local.data)

        self.noise = Ornstein_Uhlenbeck_Noise(action_size, random_seed)
Пример #9
0
 def load_weights(self, option=None):
     if (option == None):
         self.trained = Actor(self.state_size, self.action_size,
                              self.action_low, self.action_high,
                              self.actor_lr, self.network)
         self.trained.model.load_weights('model_weights.h5')
     else:
         self.trained = Actor(self.state_size, self.action_size,
                              self.action_low, self.action_high,
                              self.actor_lr, self.network)
         self.trained.model.load_weights('weights-best.hdf5')
         print(self.trained.model.summary())
Пример #10
0
    def __init__(self,
                 transactionXmlNode,
                 simulation,
                 tid=None,
                 ppid=None,
                 entitiesXmlNode=None,
                 actor=None,
                 entities=None,
                 xcontext=None):
        super().__init__(sim=simulation)
        self.simulation = simulation
        try:
            self.entitiesXmlNode = entitiesXmlNode if entitiesXmlNode is not None else XmlSource(
            )
            self.template = transactionXmlNode.get("id")
            self.pid = self.simulation.getTId()
            self.id = tid if tid is not None else self.pid
            self.ppid = ppid

            if actor is not None:
                self.actor = actor
            else:
                path, base = transactionXmlNode.getWithBase("actor")
                if path is not None:
                    self.actor = Actor(self.simulation,
                                       xmlLoader(path, base=base),
                                       extraProperties=True)
                else:
                    self.actor = Actor(self.simulation, XmlSource())

            self.startTime = None
            if xcontext is None:
                self.xcontext = XValueContext(
                    lambda: self.simulation.now() - self.startTime)
            else:
                self.xcontext = xcontext
            self.t = self.xcontext.t

            path, base = transactionXmlNode.getWithBase("entities")
            if path is not None:
                self.entitiesXmlNode.append(xmlLoader(path, base=base))

            if entities is None:
                self.factory = EntityFactory(entitiesXmlNode)
                self.entities = populateEntities(self.factory, self,
                                                 transactionXmlNode)
            else:
                for entity in entities:
                    entity.setTransaction(self)
                self.entities = entities
        except Exception as e:
            print(e)
            traceback.print_exc(file=sys.stderr)
Пример #11
0
def main():
    world = World(7, 9)
    print("Number of objects in cell(4,4) = %d" %
          world.addObject(Actor(), 4, 4))
    print("Number of objects in cell(2,3) = %d" %
          world.addObject(Actor(), 2, 3))
    print("Number of objects in cell(4,4) = %d" %
          world.addObject(Actor(), 4, 4))
    print("Number of objects in cell(4,4) = %d" %
          world.addObject(Actor(), 4, 4))
    print(world)
    print("%r" % world)
Пример #12
0
 def test_exceptions(self):
     with self.assertRaises(NameError):
         self.world_one.addObject(None, 6, 5)
     with self.assertRaises(ValueError):
         self.world_one.addObject(Actor(), 10, 5)
     with self.assertRaises(ValueError):
         self.world_one.addObject(Actor(), -1, 5)
     with self.assertRaises(ValueError):
         self.world_one.addObject(Actor(), 6, 10)
     with self.assertRaises(ValueError):
         self.world_one.addObject(Actor(), 10, -1)
     with self.assertRaises(SyntaxError):
         for ind in range(6):
             self.world_one.addObject(Actor(), 6, 5)
 def consume(self):
     """
     Finds the IMDb id for a film if possible
     :param input_q: queue of input items
     :param output_q: queue of output items
     :return: Nothing
     """
     film_todo = self.input_q.get()
     self.input_q.task_done()
     film_todo.set_non_aggregate_fields()
     self.output_q.put(film_todo, film_todo.id)
     for a in film_todo.get_actors():
         self.actor_ins.put(Actor(a, False), a)
     self.actor_ins.put(Actor(film_todo.director, True),
                        "director-{0}".format(film_todo.director))
    def __init__(self, state_size, action_size, num_agents, random_seed):
        """
        Initialize an Agent

        Params
        ======
            state_size (int): state dimension
            action_size (int): action dimension
            num_agents (int): simultaneous running agents
            random_seed (int): random seed
        """

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        random.seed(random_seed)

        # Actor Network and its target network
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network and its target network
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise object
        self.noise = OUNoise((num_agents, action_size), random_seed)

        # Replay Memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   EXPERIENCES_PER_SAMPLING, device,
                                   random_seed)

        # Initialize time step (for updating every UPDATE_NN_EVERY steps)
        self.t_step_nn = 0
        # Initialize time step (for updating every UPDATE_MEM_PAR_EVERY steps)
        self.t_step_mem_par = 0
        # Initialize time step (for updating every UPDATE_MEM_EVERY steps)
        self.t_step_mem = 0
Пример #15
0
 def __init__(self, sess, scale_u, params):
     self.sess = sess
     self.scale_u = scale_u
     self.__dict__.update(params)
     # CREATE INPUT PLACEHOLDERS
     self.create_input_placeholders()
     # INITIALIZE ACTOR & CRITIC MODELS
     self.agents = [
         Actor(self.sess, self.inputs, i, **self.actor_params)
         for i in [1, 2, 3]
     ]
     self.critic = Critic(self.sess, self.inputs, **self.critic_params)
     # INITIALIZE EXPLORATION MODEL
     self.noise_params = {
         k: np.fromstring(v, sep=",", dtype="f")
         for k, v in self.noise_params.items()
     }
     self.noise = [Noise(**self.noise_params) for _ in range(3)]
     # INITIALIZE REPLAY BUFFER
     self.memory = Memory(self.memory_size)
     # AVERAGE AGENT POLICIES
     avg_pi = [
         tf.reduce_mean(i, axis=0)
         for i in zip(*[x.pi.net_params for x in self.agents])
     ]
     self.avg_op = [
         tf.assign(i, j) for x in self.agents
         for i, j in zip(x.pi.net_params, avg_pi)
     ]
    def read_csv_file(self):
        with open(self._file_name, mode='r', encoding='utf-8-sig') as csvfile:
            movie_file_reader = csv.DictReader(csvfile)

            for row in movie_file_reader:
                # Title Add
                title = row['Title']
                release_year = int(row['Year'])
                new_movie = Movie(title, release_year)
                self.dataset_of_movies.add(new_movie)

                # Actor Add
                actors = row['Actors']
                actors_list = actors.split(",")
                for people in actors_list:
                    new_actor = Actor(people.strip())
                    if new_actor not in self.dataset_of_actors:
                        self.dataset_of_actors.add(new_actor)

                # Director Add
                director = row['Director']
                new_director = Director(director)
                if new_director not in self.dataset_of_directors:
                    self.dataset_of_directors.add(new_director)

                # Genre Add
                genre = row['Genre']
                genre_list = genre.split(",")
                for g in genre_list:
                    new_genre = Genre(g.strip())
                    if new_genre not in self.dataset_of_genres:
                        self.dataset_of_genres.add(new_genre)
Пример #17
0
 def _actor(self, match):
     actorName = match.group(1)
     parentName = match.group(2)
     replaces = match.group(3)
     newActor = Actor(actorName, parentName, replaces)
     self.actors += [newActor]
     self.actorMap[actorName] = newActor
Пример #18
0
def __fill_actors(raw_actors: str) -> List[Actor]:
    actors: List[Actor] = []
    raw_actors = json.loads(raw_actors)
    for actor in raw_actors:
        actors.append(Actor(actor))

    return actors
Пример #19
0
 def makeDefault(self, meshSrc="Empty"):
     """Create an Actor with a default set of components, and specified mesh."""
     actor = Actor(self.renderer)
     actor.components['Mesh'] = Mesh.getMesh(meshSrc)  # NOTE Meshes are currently shared, therefore not linked to individual actors
     actor.components['Transform'] = Transform(actor=actor)
     actor.components['Material'] = Material(actor=actor)
     return actor
Пример #20
0
    def read_csv_file(self):
        with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile:
            movie_file_reader = csv.DictReader(csvfile)
            index = 0
            for row in movie_file_reader:
                movie = Movie(row["Title"], int(row["Year"]))
                movie.description = row["Description"]
                movie.runtime_minutes = int(row["Runtime (Minutes)"])
                self.__total_runtime_minutes += int(row["Runtime (Minutes)"])
                self.__runtime_minutes_number_of_movies += 1
                if row["Rating"] != "N/A":
                    movie.rating = float(row['Rating'])
                    self.__total_rating += float(row['Rating'])
                    self.__rating_number_of_movies += 1
                if row["Votes"] != "N/A":
                    movie.votes = int(row["Votes"])
                    self.__total_votes += int(row["Votes"])
                    self.__votes_number_of_movies += 1
                if row["Revenue (Millions)"] != "N/A":
                    movie.revenue_millions = float(row["Revenue (Millions)"])
                    self.__total_revenue_millions += float(
                        row["Revenue (Millions)"])
                    self.__revenue_millions_number_of_movies += 1
                if row["Metascore"] != "N/A":
                    movie.metascore = int(row["Metascore"])
                    self.__total_metascore += int(row["Metascore"])
                    self.__metascore_number_of_movies += 1

                self.__dataset_of_movies.append(movie)
                self.__dataset_of_directors.add(Director(row["Director"]))
                for actor in row["Actors"].split(","):
                    self.__dataset_of_actors.add(Actor(actor.strip()))
                for genre in row["Genre"].split(","):
                    self.__dataset_of_genres.add(Genre(genre.strip()))
                index += 1
Пример #21
0
    def __init__(self, state_size, action_size, random_seed, num_agents,
                 device, hps):
        self.noise = OUNoise(action_size, random_seed)
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.count = 0
        # setting the hyperparameters
        self.batch_size = hps.batch_size
        self.tau = hps.tau
        self.lr_actor = hps.lr_actor
        self.lr_critic = hps.lr_critic
        self.update_every = hps.update_every
        # shared replay buffer
        self.memory = ReplayBuffer(BUFFER_SIZE, self.batch_size, random_seed)

        # Critic networks - 1 network (local + target) per agent
        self.critics = [
            Critic(state_size, action_size, random_seed, self.lr_critic,
                   WEIGHT_DECAY, device) for i in range(num_agents)
        ]
        # Actor networks - 1 network (local + target) per agent
        self.actors = [
            Actor(state_size, action_size, random_seed, self.lr_actor,
                  self.noise, device) for i in range(num_agents)
        ]
def start(GAME_NAME, MAX_EPISODE):
    env = gym.make(GAME_NAME)  # create enviornment
    actor = Actor(env.observation_space, env.action_space)  # create actor
    critic = Critic(env.observation_space, env.action_space)  # create critic
    reward_per_epi = []
    durations_per_epi = []
    l_A = []
    l_C = []
    MAX_EPISODE = MAX_EPISODE
    RENDER = False
    MAX_EP_STEPS = 1000
    #DISPLAY_REWARD_THRESHOLD=200

    #print ("begin.\n\n")
    for i_episode in range(MAX_EPISODE):
        s = env.reset()
        critic.reset()
        actor.reset()
        track_r = []
        for t in count():
            if RENDER: env.render()

            a = actor.choose_action(s)

            s_, r, done, info = env.step(a)
            #if done: r = -20             # Penalty if die
            track_r.append(r)

            td_error, abs_error = critic.learn(s, r, s_)  # Critic Learn
            actor.learn(s, a, td_error)  # Actor Learn

            s = s_

            #print ("... in episode (%d) step (%d)" % (i_episode+1,t))
            if is_ipython:
                display.clear_output(wait=True)
                display.display(plt.gcf())

            #env.render()

            if done or t >= MAX_EP_STEPS:  # Episode finished, print results
                ep_rs_sum = sum(track_r)
                #if 'running_reward' not in globals():
                #    running_reward = ep_rs_sum
                #else:
                #    running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
                #if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True   # rendering
                running_reward_avg = ep_rs_sum / float(t)
                reward_per_epi.append(ep_rs_sum)
                durations_per_epi.append(t)
                l_A.append(np.mean(actor._loss_))
                l_C.append(np.mean(critic._loss_))
                #print("episode:", i_episode, "  reward:", ep_rs_sum)
                #plot(reward_per_epi, durations_per_epi, l_A, l_C)

                break

    return reward_per_epi, durations_per_epi, l_A, l_C
Пример #23
0
 def add_actor_to_map(self, tile, tile_pos, add_to_enemy_list=False):
     delay = choice([x / 1000. for x in range(80, 200, 5)])
     actor = Actor(tile.col,
                   tile.row,
                   pos=(tile_pos[0] + 10, tile_pos[1] + 253),
                   anim_delay=delay)
     self.enemy_list.append(actor)
     if add_to_enemy_list:
         self.add_widget(actor)
Пример #24
0
def DoEpisodes(episodes,
               boardSize,
               maxRemovePegs,
               boardType,
               epsilon=0.5,
               learningRate=0.9,
               policyTable={},
               valueTable={}):
    TotalError = 0
    stepsTaken = 1

    actor = Actor(0.9, learningRate, epsilon, policyTable)
    critic = Critic(0.9, learningRate, valueTable)

    for i in range(episodes):
        world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType)

        actor.resetEligibility()
        critic.resetEligibility()
        critic.tdError = 0
        reward = 0
        state = world.stateToHash()

        chosenAction = actor.ChooseActionByPolicy(world)

        while True:
            reward = world.makeAction(chosenAction)
            nextAction = actor.ChooseActionByPolicy(world)
            nextState = world.stateToHash()

            actor.eligibility[state + str(chosenAction)] = 1
            critic.updateTDError(reward, state, nextState)
            critic.eligibility[state] = 1
            TotalError += abs(critic.tdError)
            for SAP in world.getGameLog():

                critic.updateValue(SAP)
                critic.decayEligibility(SAP)

                actor.updatePolicy(SAP, critic.tdError)
                actor.decayEligibility(SAP)

            if reward == 10:
                #print(world.startRemoveLocations, stepsTaken, world.getGameLog()[-1].stateHash)
                updateSolvableStates(boardType + str(boardSize),
                                     world.startRemoveLocations)
            if chosenAction == None:
                break
            chosenAction = nextAction
            state = nextState
            stepsTaken += 1

        print('Episode:', i, 'MeanError', TotalError / stepsTaken)

    WriteTables(critic.getValueTable(), actor.getPolicyTable(), boardType,
                boardSize)
Пример #25
0
 def CreateActors(self, actorNumber):
     self.actorList = numpy.zeros(0, dtype=Actor)
     for name in range(0, actorNumber):
         #Actor obj takes name,
         self.actorList = numpy.append(self.actorList, [
             Actor(name, self.market, self.resource,
                   self.min_init_amountToSell, self.max_init_amountToSell,
                   self.min_init_priceDivergence,
                   self.max_init_priceDivergence, self.currency)
         ])
Пример #26
0
 def __init__(self):
     tf.reset_default_graph()
     self.sess = tf.Session()
     self.actor = Actor(self.sess, \
                     n_features=Config.PLAYER_DIMENSION*(Config.DEFENDER_COUNT+Config.INTRUDER_COUNT), \
                     lr=Config.LEARNING_RATE_START, action_bound=[-math.pi, math.pi])
     self.critic = Critic(self.sess, \
                     n_features=Config.PLAYER_DIMENSION*(Config.DEFENDER_COUNT+Config.INTRUDER_COUNT), \
                     lr=Config.LEARNING_RATE_START)
     self.sess.run(tf.global_variables_initializer())
Пример #27
0
    def __init__(self, env, batchSize = 10, bufferSize = 100,
                 gamma = 0.98, actorLR = 1e-4, criticLR = 1e-3,
                 maxSteps = 200, targetUpdate = 1e-3, epsilon = 1,
                 decay = 0.99, rewardScale = 1e-3, logFile = 'run.log'):
        self.env = env
        self.gamma = gamma
        self.batchSize = batchSize
        self.bufferSize = bufferSize
        self.maxSteps = maxSteps + 1
        self.rewardScale = rewardScale
        self.epsilon = epsilon
        self.decay = decay

        # Useful helpers.
        self.actionDim = self.env.action_space.shape[0]
        self.stateDim = self.env.observation_space.shape[0]
        self.featureDim = self.actionDim + self.stateDim
        self.minAction = self.env.action_space.low
        self.maxAction = self.env.action_space.high

        # For scaling output action values.
        self.actionBiasZeroOne = self.minAction
        self.actionScaleZeroOne = self.maxAction - self.minAction
        self.actionBiasTanH = (self.maxAction + self.minAction) / 2.0
        self.actionScaleTanH = self.maxAction - self.actionBiasTanH 

        # Initialize noise process.
        self.noise = OUNoise(self.actionDim)

        # Initialize replay buffer.
        self.buffer = ReplayBuffer(self.bufferSize)

        # Initialize logging.
        logging.basicConfig(filename = logFile,
                            level = logging.INFO,
                            format = '[%(asctime)s] %(message)s',
                            datefmt = '%m/%d/%Y %I:%M:%S %p')
        logging.info('Initializing DRPG agent with passed settings.')

        # Tensorflow GPU optimization.
        config = tf.ConfigProto() # GPU fix?
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config = config)
        from keras import backend as K
        K.set_session(self.sess)

        # Make actor network (creates target model internally).
        self.actor = Actor(self.sess, self.maxSteps, self.featureDim,
                           self.actionDim, self.batchSize, targetUpdate,
                           actorLR, self.actionScaleTanH, self.actionBiasTanH)

        # Make critic network (creates target model internally).
        self.critic = Critic(self.sess, self.maxSteps, self.featureDim,
                             self.actionDim, self.batchSize, targetUpdate,
                             actorLR)
    def __init__(self, params):
        self.action_size = params['action_size']
        self.state_size = params['state_size']
        self.num_agents = params['num_agents']
        self.buffer_size = params['buffer_size']
        self.batch_size = params['batch_size']
        self.__gamma = params['gamma']
        self.__tau = params['tau']
        self.__update_every = params['update_every']
        self.__save_to = params['save_to']
        self.__memory = ReplayBuffer(self.buffer_size, self.batch_size)
        self.__lr = params['lr']
        self.noise_type = params['noise_type']

        actor_params = dict()
        actor_params['arch_params_actor'] = params['arch_params_actor']
        actor_params['action_size'] = self.action_size
        actor_params['state_size'] = self.state_size
        actor_params['eps'] = params['eps']
        actor_params['eps_decay'] = params['eps_decay']
        actor_params['eps_min'] = params['min_eps']
        actor_params['noise_type'] = params['noise_type']
        self.actor = Actor(actor_params)
        self.actor_target = Actor(actor_params)
        self.optimizer_actor = optim.Adam(self.actor.parameters(),
                                          lr=self.__lr)
        self.scheduler_actor = optim.lr_scheduler.StepLR(self.optimizer_actor,
                                                         step_size=100,
                                                         gamma=0.95)

        critic_params = dict()
        critic_params['arch_params_critic'] = params['arch_params_critic']
        critic_params['action_size'] = self.action_size
        critic_params['state_size'] = self.state_size
        self.critic = Critic(critic_params)
        self.critic_target = Critic(critic_params)
        self.optimizer_critic = optim.Adam(self.critic.parameters(),
                                           lr=self.__lr)
        self.scheduler_critic = optim.lr_scheduler.StepLR(self.optimizer_actor,
                                                          step_size=100,
                                                          gamma=0.95)
        self.__t = 0
Пример #29
0
    def __init__(self, state_size, action_size, action_low, action_high):
        # self.task = task
        self.state_size = state_size
        self.action_size = action_size
        self.action_low = action_low
        self.action_high = action_high

        # learning rates
        self.lr_actor = 1e-4
        self.lr_critic = 1e-3

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.lr_actor)
        self.actor_target = Actor(self.state_size, self.action_size, self.lr_actor)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size, self.lr_critic)
        self.critic_target = Critic(self.state_size, self.action_size, self.lr_critic)

        # store model architecture of actor and critic locally
        # keras.utils.plot_model(self.actor_local.model, '/home/danie/catkin_ws/src/ddpg/src/actor.png', show_shapes=True)        
        # keras.utils.plot_model(self.critic_local.model, '/home/danie/catkin_ws/src/ddpg/src/critic.png', show_shapes=True)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Initialize OU noise
        self.noise = OUNoise(action_size=self.action_size)

        # Currently testing with Gaussian noise instead of OU. Parameters for Gaussian follow
        self.noise_mean = 0.0
        self.noise_stddev = 0.2

        # Initialize replay buffer
        self.buffer_size = 1e6
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Parameters for DDPG
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
Пример #30
0
def main(args=None):
    numItr = 5
    if len(args) > 1:
        numItr = (args[1])
    print('Simulation of MyWorld')
    world = MyWorld()
    for x in range(numItr):
        world.act()
        obj = world.getObjects()
        for each in obj:
            each.act()
    print('Simulation of World')
    world = World(100, 100)
    world.addObject(Actor(), 10, 10)
    world.addObject(Actor(), 90, 90)
    for x in range(numItr):
        world.act()
        obj = world.getObjects()
        for each in obj:
            each.act()