def __init__(self, background, player, enemy): Environment.__init__(self, background) self.player = player self.enemy = enemy if isinstance(player, BattleGroup): self.players = sprite.Group(player.groupmembers) else: self.players = sprite.Group(player) if isinstance(enemy, BattleGroup): self.enemies = sprite.Group(enemy.groupmembers) else: self.enemies = sprite.Group(enemy) self.sprites = sprite.RenderUpdates(self.players, self.enemies) self.combatants = sprite.Group(self.players, self.enemies) self.alignCombatants(self.players, 608 - 16, Direction.LEFT) self.alignCombatants(self.enemies, 32 + 16, Direction.RIGHT) # TODO: Battlefield status needs to be updated self.statusBar = BattlefieldStatus(self.players.sprites()[0], self.enemies.sprites()[0]) self.frameCount = settings.fps / 2 self.battleQueue = [(c.speed, c) for c in self.combatants] self.battleQueue.sort(key=itemgetter(0))
def __init__(self, env_name): Environment.__init__(self) self.conn, child_conn = Pipe() self.proc = Process(target=worker, args=(child_conn, env_name)) self.proc.start() self.conn.recv() self.reset()
def __init__(self, background, player, location, walls=()): Environment.__init__(self, background) self.player = player self.playergroup = sprite.GroupSingle(player) self.walls = sprite.RenderPlain(walls) self.npcgroup = sprite.Group() self.sprites = sprite.RenderUpdates() self.sprites.add(self.player) self.statusBar = OverWorldStatus(self.player, location)
def __init__(self): #read name file to load all the names and their types enemies = [] self._file = os.getcwd() + "\\frozen_enemy.txt" enemies = self.read_enemy_file(self._file) Environment.__init__(self, "Frozen Mountains of Absolute Doom", enemies, 0.75, randint(1, 2)) self.boss = Warrior("The Great Ice Giant, Halafor", False) self.boss.hero_stats.hit_points += 250 self.boss.hero_stats.mana_points += 150 self.boss.hero_stats.attack_power += 70 self.boss.hero_stats.defense += 70
def __init__(self, lake, slip, max_steps, seed=None): """ lake: A matrix that represents the lake. For example: lake = [['&', '.', '.', '.'], ['.', '#', '.', '#'], ['.', '.', '.', '#'], ['#', '.', '.', '$']] slip: The probability that the agent will slip max_steps: The maximum number of time steps in an episode seed: A seed to control the random number generator (optional) """ # start (&), frozen (.), hole (#), goal ($) # self.lake = np.zeros(np.array(lake).shape) self.lake = np.array(lake) self.lake_flat = self.lake.reshape(-1) self.slip = slip n_states = self.lake.size + 1 n_actions = 4 pi = np.zeros(n_states, dtype=float) pi[np.where(self.lake_flat == '&')[0]] = 1.0 self.absorbing_state = n_states - 1 # TODO: Environment.__init__(self, n_states, 4, max_steps, pi, seed) # Up, left, down, right. self.actions = [(-1, 0), (0, -1), (1, 0), (0, 1)] self.itos = list( product(range(self.lake.shape[0]), range(self.lake.shape[1]))) self.stoi = {s: i for (i, s) in enumerate(self.itos)} self._p = np.zeros((n_states, n_states, 4)) for state_index, state in enumerate(self.itos): for action_index, action in enumerate(self.actions): next_state = (state[0] + action[0], state[1] + action[1]) if (state_index == 5 or state_index == 7 or state_index == 12 or state_index == 11 or state_index == 15): self._p[state_index, state_index, action_index] = 1.0 else: next_state_index = self.stoi.get(next_state, state_index) self._p[next_state_index, state_index, action_index] = 1 - self.slip for act in self.actions: next_state_action = (state[0] + act[0], state[1] + act[1]) next_state_index = self.stoi.get( next_state_action, state_index) self._p[next_state_index, state_index, action_index] += self.slip / 4
def __init__(self, env_name): Environment.__init__(self) self.right_decision = None #self.env_instance = gym.make(env_name) #self.last_state=self.env_instance.reset() self.conn, child_conn = Pipe( ) ##create a 2way pipe with self.conn=parent and child_conn=child self.proc = Process(target=worker_without_instance, args=(child_conn, env_name)) #self.proc = Process(target=worker_with_instance, args=(child_conn, env_name,self.env_instance)) #self.last_action = 0 #self.last_reward = 0 self.proc.start() self.conn.recv() self.reset()
# this network will not be trained target_net.eval() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") agent = Agent(policy_net.to(device), target_net.to(device)) # initialize initial state after an episode has started state = torch.zeros(INPUT_SIZE) # create a namedtuple for easy storage of replay memory and create 10000 # storage spaces for experiences replay = ReplayMemory(10000) Experience = namedtuple('Experience', ('State', 'Action', 'Reward', 'Next_State', 'Done')) rewards_per_episode = [] rewards_episode = 0 for episode in range(EPISODES): env.__init__() alive = True print('Episode: {}, Rewards Last Episode: {}'.format( episode, rewards_episode)) rewards_episode = 0 while alive: # choose an action and perform it # action = 0 action = agent.choose_action(state) done, next_state, reward = env.step(action) # however, for the 4 frames thereafter we don't do anything and # observe our action. This gives more valuable input to the # network as to what the action leads to in the game. We # concatenate this all into one state. for frame in range(4):
def __init__(self): Environment.__init__(self) self.loginfo = Environment() self.testcase = ''
def __init__(self): Environment.__init__(self)