Пример #1
0
def main():
    sess = tf.Session()
    K.set_session(sess)
    #env = gym.make("Pendulum-v0")
    env = gym_gvgai.make("gvgai-" + game + "-lvl1-v0")
    actor_critic = ActorCritic(env, sess)

    num_trials = 10000
    trial_len = 500

    cur_state = env.reset()
    cur_state = numpy.array(cur_state)
    cur_state = cur_state.flatten()
    action = env.action_space.sample()
    while True:
        env.render()
#        cur_state = cur_state.reshape((1, env.observation_space.shape[0]))
        #cur_state =
        action = actor_critic.act(cur_state)
        action = numpy.array(action)
#        action = action.reshape((1, env.action_space.shape[0]))

        new_state, reward, done, _ = env.step(action)
        new_state = numpy.array(new_state)
        new_state = new_state.flatten()
        #new_state = new_state.reshape((1, env.observation_space.shape[0]))

        actor_critic.remember(cur_state, action, reward, new_state, done)
        actor_critic.train()

        cur_state = new_state
Пример #2
0
def make_env(env_name):
    """Helper function to create dm_control environment"""
    env = gym_gvgai.make(env_name)
    gym_gvgai.envs.gvgai_env.gvgai.LEARNING_SSO_TYPE.IMAGE = (
        gym_gvgai.envs.gvgai_env.gvgai.LEARNING_SSO_TYPE.BOTH
    )
    return env
Пример #3
0
 def __init__(self, game, lvl):
     self.env = gvg.make('gvgai-' + game + '-' + lvl + '-v0')
     self.stateObs = self.env.reset()
     size = (len(self.stateObs), len(self.stateObs[0]))
     self.transpose = size[0] < size[1]
     if self.transpose:
         self.size = (size[1] * 2, size[0] * 2)
     else:
         self.size = (size[0] * 2, size[1] * 2)
     self.done = False
     self.score = 0
     self.frame = 0
     self.nAction = self.env.action_space.n
    def __init__(self,
                 game,
                 play_length,
                 shape,
                 path=None,
                 id=0,
                 reward_mode='time',
                 reward_scale=2,
                 elite_prob=0):
        """Returns Grid instead of pixels
        Sets the reward
        Generates new level on reset
        #PPO wants to maximize, Generator wants a score of 0
        --------
        """
        self.id = id
        self.name = game
        self.levels = path
        self.level_id = -1
        self.version = 1
        self.env = gym_gvgai.make('gvgai-{}-lvl0-v{}'.format(
            game, self.version))
        gym.Wrapper.__init__(self, self.env)

        self.compiles = False
        self.state = None
        self.steps = 0
        self.score = 0
        self.play_length = play_length
        self.shape = shape
        self.observation_space = gym.spaces.Box(low=0,
                                                high=1,
                                                shape=shape,
                                                dtype=np.float32)

        self.elitep = elite_prob
        self.rmode = reward_mode
        self.rscale = reward_scale
Пример #5
0
import gym
import gym_gvgai
import Agent as Agent
import json
import datetime

#just as testRLAgent.py do to load the game and reset the environment
env = gym_gvgai.make('gvgai-aai-lvl0-v0')

#build a trainAgent object
train_agent = Agent.trainAgent()
print('Starting ' + env.env.game + " with Level " + str(env.env.lvl))

# reset environment
stateObs = env.reset()

#get the actions
actions = env.env.GVGAI.actions()

#inital the Qtable for the first time and inital "lastState"
train_agent.inital(stateObs)

#read the model from a json file
with open("model.json", 'r', encoding='utf-8') as json_file:
    train_agent.Qtable = json.load(json_file)

print("start training...")
#This is just for recording time
start = datetime.datetime.now()

for i in range(10):
Пример #6
0
import gym_gvgai
import Agent as Agent

games = ['gvgai-testgame1', 'gvgai-testgame2', 'gvgai-testgame3']
validateLevels = ['lvl1-v0', 'lvl2-v0', 'lvl3-v0']
totalTimes = 20

# variables for recording the results
results = {}

for game in games:
    levelRecord = {}
    for level in validateLevels:
        timeRecord = {}
        for t in range(totalTimes):
            env = gym_gvgai.make(game + '-' + level)
            agent = Agent.Agent()
            print('Starting ' + env.env.game + " with Level " + str(env.env.lvl))
            stateObs = env.reset()
            actions = env.unwrapped.get_action_meanings()
            totalScore = 0
            for tick in range(2000):
                action_id = agent.act(stateObs, actions)
                stateObs, diffScore, done, debug = env.step(action_id)
                totalScore += diffScore
                print("Action " + str(action_id) + " tick " + str(tick+1) + " reward " + str(diffScore) + " win " + debug["winner"])
                if done:
                    break
            timeRecord[t] = [tick, totalScore, debug["winner"]]
        levelRecord[level] = timeRecord
    results[game] = levelRecord
Пример #7
0
    return action_id


usr = sys.argv[1]
new_team = team(user, [], [])

testing_times = 20
# Predefined names referring to framework
games = ['golddigger', 'treasurekeeper', 'waterpuzzle']
test_levels = ['lvl2', 'lvl3', 'lvl4']

try:
    import Agent as Agent
    for game_name in games:
        for level in test_levels:
            env = gym_gvgai.make('gvgai-' + game_name + '-' + level + '-v0')
            agent = Agent.Agent()
            # print('Starting ' + env.env.game + " with Level " + str(env.env.lvl))
            total_score = []  # record every testing score
            win_num = 0
            # reset environment
            actions = env.env.GVGAI.actions()
            state_obs = None
            INFO = game_name + level + "\n"
            print(game_name, level)
            for i in range(testing_times):  # testing 20 times
                current_score = 0  # record current testing round score
                state_obs = env.reset()
                for t in range(2000):
                    action_id = agent_action(state_obs, actions=actions)
                    state_obs, incre_score, done, debug = env.step(action_id)
Пример #8
0
#!/usr/bin/env python
import gym
import gym_gvgai
import Agent as Agent
import json

env = gym_gvgai.make('gvgai-aai-lvl0-v0')  #load the game
agent = Agent.Agent()  #build the agent object
print('Starting ' + env.env.game + " with Level " + str(env.env.lvl))
# reset environment
stateObs = env.reset()
actions = env.env.GVGAI.actions()  #get the actions

#############this is new to load the model#########
with open("model.json", 'r', encoding='utf-8') as json_file:
    # load the model
    agent.Qtable = json.load(json_file)
#############this is new to load the model#########

#############this is new to compute the total score#########
Score = 0
#############this is new to compute the total score#########

for t in range(1000):

    # choose action based on trained policy
    action_id = agent.act(stateObs, actions)

    # do action and get new state and its reward
    stateObs, increScore, done, debug = env.step(action_id)
Пример #9
0
            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(log_dir + 'best_model.pkl')
    n_steps += 1
    return True


# Create log dir
log_dir = args.log_path
os.makedirs(log_dir, exist_ok=True)

env = gym_gvgai.make(args.env)
env = WarpFrame(env)
env = Monitor(env, log_dir, allow_early_resets=True)

if args.save_video_interval != 0:
    env = gym.wrappers.Monitor(
        env,
        os.path.join(log_dir, "videos"),
        video_callable=(lambda ep: ep % args.save_video_interval == 0),
        force=True)

model = DQN(CnnPolicy,
            env,
            verbose=1,
            exploration_fraction=args.exploration_fraction,
            exploration_final_eps=args.exploration_final_eps,
Пример #10
0
def gvgai_test_old():
    import gym_gvgai
    return gym_gvgai.make("gvgai-sokoban-lvl0-v0")
 def restart(self, e, path):
     #self.log(e)
     open(path + ".no_compile", 'w').close()
     self.env = gym_gvgai.make('gvgai-{}-lvl0-v{}'.format(
         self.name, self.version))