示例#1
0
    def __init__(self, background, player, enemy):
        Environment.__init__(self, background)
        self.player = player
        self.enemy = enemy
        if isinstance(player, BattleGroup):
            self.players = sprite.Group(player.groupmembers)
        else:
            self.players = sprite.Group(player)
        if isinstance(enemy, BattleGroup):
            self.enemies = sprite.Group(enemy.groupmembers)
        else:
            self.enemies = sprite.Group(enemy)

        self.sprites = sprite.RenderUpdates(self.players, self.enemies)
        self.combatants = sprite.Group(self.players, self.enemies)

        self.alignCombatants(self.players, 608 - 16, Direction.LEFT)
        self.alignCombatants(self.enemies, 32 + 16, Direction.RIGHT)

        # TODO: Battlefield status needs to be updated
        self.statusBar = BattlefieldStatus(self.players.sprites()[0], self.enemies.sprites()[0])

        self.frameCount = settings.fps / 2

        self.battleQueue = [(c.speed, c) for c in self.combatants]
        self.battleQueue.sort(key=itemgetter(0))
示例#2
0
	def __init__(self, env_name):
		Environment.__init__(self)

		self.conn, child_conn = Pipe()
		self.proc = Process(target=worker, args=(child_conn, env_name))
		self.proc.start()
		self.conn.recv()
		self.reset()
示例#3
0
   def __init__(self, background, player, location, walls=()):
      Environment.__init__(self, background)
      self.player = player
      self.playergroup = sprite.GroupSingle(player)
      self.walls = sprite.RenderPlain(walls)
      self.npcgroup = sprite.Group()

      self.sprites = sprite.RenderUpdates()
      self.sprites.add(self.player)

      self.statusBar = OverWorldStatus(self.player, location)
示例#4
0
 def __init__(self):
     #read name file to load all the names and their types
     enemies = []
     self._file = os.getcwd() + "\\frozen_enemy.txt"
     enemies = self.read_enemy_file(self._file)
     Environment.__init__(self, "Frozen Mountains of Absolute Doom",
                          enemies, 0.75, randint(1, 2))
     self.boss = Warrior("The Great Ice Giant, Halafor", False)
     self.boss.hero_stats.hit_points += 250
     self.boss.hero_stats.mana_points += 150
     self.boss.hero_stats.attack_power += 70
     self.boss.hero_stats.defense += 70
示例#5
0
    def __init__(self, lake, slip, max_steps, seed=None):
        """
        lake: A matrix that represents the lake. For example:
         lake =  [['&', '.', '.', '.'],
                  ['.', '#', '.', '#'],
                  ['.', '.', '.', '#'],
                  ['#', '.', '.', '$']]
        slip: The probability that the agent will slip
        max_steps: The maximum number of time steps in an episode
        seed: A seed to control the random number generator (optional)
        """
        # start (&), frozen (.), hole (#), goal ($)
        # self.lake = np.zeros(np.array(lake).shape)
        self.lake = np.array(lake)
        self.lake_flat = self.lake.reshape(-1)
        self.slip = slip
        n_states = self.lake.size + 1
        n_actions = 4
        pi = np.zeros(n_states, dtype=float)
        pi[np.where(self.lake_flat == '&')[0]] = 1.0
        self.absorbing_state = n_states - 1

        # TODO:
        Environment.__init__(self, n_states, 4, max_steps, pi, seed)

        # Up, left, down, right.
        self.actions = [(-1, 0), (0, -1), (1, 0), (0, 1)]

        self.itos = list(
            product(range(self.lake.shape[0]), range(self.lake.shape[1])))
        self.stoi = {s: i for (i, s) in enumerate(self.itos)}

        self._p = np.zeros((n_states, n_states, 4))

        for state_index, state in enumerate(self.itos):
            for action_index, action in enumerate(self.actions):
                next_state = (state[0] + action[0], state[1] + action[1])
                if (state_index == 5 or state_index == 7 or state_index == 12
                        or state_index == 11 or state_index == 15):
                    self._p[state_index, state_index, action_index] = 1.0
                else:
                    next_state_index = self.stoi.get(next_state, state_index)
                    self._p[next_state_index, state_index,
                            action_index] = 1 - self.slip
                    for act in self.actions:
                        next_state_action = (state[0] + act[0],
                                             state[1] + act[1])
                        next_state_index = self.stoi.get(
                            next_state_action, state_index)
                        self._p[next_state_index, state_index,
                                action_index] += self.slip / 4
示例#6
0
    def __init__(self, env_name):
        Environment.__init__(self)
        self.right_decision = None
        #self.env_instance = gym.make(env_name)
        #self.last_state=self.env_instance.reset()
        self.conn, child_conn = Pipe(
        )  ##create a 2way pipe with self.conn=parent and child_conn=child
        self.proc = Process(target=worker_without_instance,
                            args=(child_conn, env_name))
        #self.proc = Process(target=worker_with_instance, args=(child_conn, env_name,self.env_instance))

        #self.last_action = 0
        #self.last_reward = 0

        self.proc.start()
        self.conn.recv()
        self.reset()
示例#7
0
    # this network will not be trained
    target_net.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    agent = Agent(policy_net.to(device), target_net.to(device))

    # initialize initial state after an episode has started
    state = torch.zeros(INPUT_SIZE)
    # create a namedtuple for easy storage of replay memory and create 10000
    # storage spaces for experiences
    replay = ReplayMemory(10000)
    Experience = namedtuple('Experience',
                    ('State', 'Action', 'Reward', 'Next_State', 'Done'))
    rewards_per_episode = []
    rewards_episode = 0
    for episode in range(EPISODES):
        env.__init__()
        alive = True
        print('Episode: {}, Rewards Last Episode: {}'.format(
            episode, rewards_episode))
        rewards_episode = 0
        while alive:
            # choose an action and perform it
            # action = 0
            action = agent.choose_action(state)
            done, next_state, reward = env.step(action)

            # however, for the 4 frames thereafter we don't do anything and
            # observe our action. This gives more valuable input to the
            # network as to what the action leads to in the game. We
            # concatenate this all into one state.
            for frame in range(4):
示例#8
0
 def __init__(self):
     Environment.__init__(self)
     self.loginfo = Environment()
     self.testcase = ''
 def __init__(self):
     Environment.__init__(self)