示例#1
0
def run_bbox(verbose=False):
    n_features = n_actions = max_time = -1

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    av_table = ActionValueTable(n_features, n_actions)
    av_table.initialize(0.2)
    print av_table._params
    learner = Q(0.5, 0.1)
    learner._setExplorer(EpsilonGreedyExplorer(0.4))
    agent = LearningAgent(av_table, learner)
    environment = GameEnvironment()
    task = GameTask(environment)
    experiment = Experiment(task, agent)

    while environment.finish_flag:
        experiment.doInteractions(1)
        agent.learn()
 
    bbox.finish(verbose=1)
示例#2
0
def run_bbox():
	f_35_penalty = 0.15; k = 0; w0 = 0.13
	bbox.load_level("levels/test_level.data", verbose=0)
	has_next = True; last_score = 0
	act = -1; act_len = 0; crit_len = 150
	predict = np.zeros(2); cum_sum = np.zeros(4)
	while has_next:
		last_act = act
		state = bbox.get_state()
		predict[:2] = np.dot(lr_coefs_1,state[:-1]) + lr_free_coefs_1

		if state[35] > 0:
			cum_sum[1] = predict[0] + k
			cum_sum[2] = -predict[0] + k
		elif state[35] < 0:
			cum_sum[1] = -predict[1] + k
			cum_sum[2] = predict[1] + k
		elif state[35] == 0:
			cum_sum[1] = predict[0] + k
			cum_sum[2] = predict[1] + k

		cum_sum[0] = (cum_sum[1]+cum_sum[2])/2 + k
		cum_sum[1]-=f_35_penalty*state[35]
		cum_sum[2]+=f_35_penalty*state[35]
		if act_len > crit_len: cum_sum[last_act]-=0.0078125
		act = (w0*(np.dot(lr_coefs_0,state) + lr_free_coefs_0)/6.366 + (1-w0)*cum_sum).argmax()

		has_next = bbox.do_action(act)
		if last_act==act: act_len+=1
		else: act_len = 0

	bbox.finish(verbose=1)
示例#3
0
	def reset(self):
		#n = np.random.randint(0, self.grid_size-1, size=1)
		#m = np.random.randint(1, self.grid_size-2, size=1)
		if bbox.is_level_loaded():
			bbox.reset_level()
		else:
			bbox.load_level("../../../levels/train_level.data", verbose=1)
		self.state = bbox.get_state() #np.asarray([0, n, m])[np.newaxis]
示例#4
0
 def reset(self):
     #n = np.random.randint(0, self.grid_size-1, size=1)
     #m = np.random.randint(1, self.grid_size-2, size=1)
     if bbox.is_level_loaded():
         bbox.reset_level()
     else:
         bbox.load_level("../../../levels/train_level.data", verbose=1)
     self.state = bbox.get_state()  #np.asarray([0, n, m])[np.newaxis]
示例#5
0
def prepare_bbox():
    global n_features, n_actions
    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/test_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
示例#6
0
def prepare_bbox():
	global n_features, n_actions
	if bbox.is_level_loaded():
		bbox.reset_level()
	else:
		bbox.load_level("../levels/test_level.data", verbose=1)
		n_features = bbox.get_num_of_features()
		n_actions = bbox.get_num_of_actions() 
示例#7
0
def prepare_bbox():
    global n_f, n_a, max_time
 
    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_f = bbox.get_num_of_features()
        n_a = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()
示例#8
0
def prepare_bbox(train_level=True):
    """
  Load the training level by default, use 'test_level' as a means
  of validating the bots generalization performance.
  :param train_level: boolean, load the training level if True
  :return: None
  """
    if train_level:
        bbox.load_level("../levels/train_level.data", verbose=1)
    else:
        bbox.load_level("../levels/test_level.data", verbose=1)
示例#9
0
def prepare_bbox(train_level=True):
  """
  Load the training level by default, use 'test_level' as a means
  of validating the bots generalization performance.
  :param train_level: boolean, load the training level if True
  :return: None
  """
  if train_level:
    bbox.load_level("../levels/train_level.data", verbose=1)
  else:
    bbox.load_level("../levels/test_level.data", verbose=1)
示例#10
0
def prepare_box():
    global n_features, n_actions, max_time

    # Reset the environment to the initial state, just in case
    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        # Load the game level
        bbox.load_level('levels/train_level.data', verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()
示例#11
0
def prepare_bbox():
    global n_features, n_actions, max_time

    # Reset environment to the initial state, just in case
    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        # Load the game level
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()
示例#12
0
    def reset(self):
        if bbox.is_level_loaded():
            bbox.reset_level()
        else:
            bbox.load_level(self.level, verbose=1)
            self.n_features = bbox.get_num_of_features()
            self.n_actions = bbox.get_num_of_actions()
            self.max_time = bbox.get_max_time()

        self._steps = 0
        self._state = np.zeros((1, self.n_features))
        self._is_over = False
        self._prev_score = -float('inf')
        self._actions_log = []
示例#13
0
    def reset(self):
        if bbox.is_level_loaded():
            bbox.reset_level()
        else:
            bbox.load_level(self.level, verbose=1)
            self.n_features = bbox.get_num_of_features()
            self.n_actions = bbox.get_num_of_actions()
            self.max_time = bbox.get_max_time()

        self._steps = 0
        self._state = np.zeros((1, self.n_features))
        self._is_over = False
        self._prev_score = -float('inf')
        self._actions_log = []
示例#14
0
文件: fribot.py 项目: Pyro2266/FriBOT
def prepare_bbox():

    global n_features, n_actions, max_time, vectors, pool, num_of_vectors

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    vectors = np.zeros((num_of_vectors, n_features), np.float32)
    print("preparing")
    pool = multiprocessing.Pool(processes=processes)
示例#15
0
def run_bbox(verbose=False):
    bbox.load_level("../levels/train_level.data", verbose=True)

    states, actions, scores, rewards = [], [], [], []
    with open('utility_models.pkl', 'rb') as f:
        utility_models = pickle.load(f)

    step = 0
    has_next = 1
    while has_next:
        step += 1
        state = bbox.get_state()
        action = np.random.choice(n_actions)
        utilities = [m.predict([state]) for m in utility_models]
        action = np.argmax(utilities)
        # Do action and bookkeeping
        has_next = bbox.do_action(action)
        states.append(np.array(state))
        actions.append(action)
        score = bbox.get_score()
        rewards.append(score if not scores else (score - scores[-1]))
        scores.append(score)
        if verbose and step % 10000 == 0:
            print(step, score)

    i = 1
    get_outdir = 'run_{}'.format
    outdir = get_outdir(i)
    while os.path.exists(outdir):
        i += 1
        outdir = get_outdir(i)
    os.mkdir(outdir)
    print('saving to {}'.format(outdir))
    scores = np.array(scores, dtype=np.float32)
    scores.tofile(os.path.join(outdir, 'scores'))
    actions = np.array(actions, dtype=np.int8)
    actions.tofile(os.path.join(outdir, 'actions'))
    states = np.array(states, dtype=np.float32)
    states.tofile(os.path.join(outdir, 'states'))

    bbox.finish(verbose=True)
示例#16
0
def prepare_bbox():
    global n_features, n_actions, max_time
    ## TODO: Save the interactions with the environment as an output data frame
    global interaction_list
    interaction_list = []
    
    ## Reset the environment to initial state, just in case
    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        ## Load the game level
        bbox.load_level("../levels/train_level.data", verbose=True)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()
        
        ## The matrix that contains the output data frame
        states = ['state_'] * n_features
        state_list = [states[i] + str(i) for i in range(n_features)]
        header_list = state_list + ['reward', 'action']
        interaction_list.append(header_list)
示例#17
0
def prepare_bbox():
    '''
    Prepares the environment (learning/test data).
    '''
    
    global n_features
    global n_actions
    global max_time
    global q_function
    global epsilon
    global gamma
    global alpha
    global valid_actions
    global init_value
    
    if bbox.is_level_loaded():
        ## Reset the environment to initial state
        bbox.reset_level()
    else:
        ## Load the training/test data
        bbox.load_level('../levels/train_level.data', verbose=True)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()
示例#18
0
文件: bot.py 项目: etrushkin/bb
 def load_level(self):
     bbox.load_level(self.level, verbose=0)
示例#19
0
文件: bot.py 项目: wrwrwr/blackbox
#!/usr/bin/env python3

"""
A minimal bot player.

Loads the level and params and lets the bot act.
"""
from interface import (get_max_time, get_num_of_actions, get_num_of_features,
                       finish, load_level)
from numpy import get_include, load
from pyximport import install

install(setup_args={'include_dirs': get_include()}, reload_support=True)
from bot_wrapper import do_act

if __name__ == '__main__':
    load_level('../levels/train_level.data', verbose=1)
    level = {
        'steps': get_max_time(),
        'actions': get_num_of_actions(),
        'features': get_num_of_features()
    }
    params = dict(load('params.npz'))
    do_act(level, params)
    finish(verbose=1)
示例#20
0
 def load_level(self):
     bbox.load_level(self.level, verbose=0)
示例#21
0
文件: bot.py 项目: wrwrwr/blackbox
#!/usr/bin/env python3
"""
A minimal bot player.

Loads the level and params and lets the bot act.
"""
from interface import (get_max_time, get_num_of_actions, get_num_of_features,
                       finish, load_level)
from numpy import get_include, load
from pyximport import install

install(setup_args={'include_dirs': get_include()}, reload_support=True)
from bot_wrapper import do_act

if __name__ == '__main__':
    load_level('../levels/train_level.data', verbose=1)
    level = {
        'steps': get_max_time(),
        'actions': get_num_of_actions(),
        'features': get_num_of_features()
    }
    params = dict(load('params.npz'))
    do_act(level, params)
    finish(verbose=1)
示例#22
0
def main():
    epsilon = .1  # exploration
    num_actions = 4
    input_size = 36
    hidden_size = 24
    activation = 'relu'
    max_memory = 2000
    batch_size = 50
    mini_epoch = 5
    epoch = 10

    model = Sequential()
    model.add(
        Dense(hidden_size, input_shape=[input_size], activation=activation))
    model.add(Dense(hidden_size, activation=activation))
    model.add(Dense(num_actions))
    model.compile('adam', 'mse')

    # model.load_weights('model.h5')

    # Define environment/game
    bbox.load_level('../levels/train_level.data', verbose=True)

    # Initialize experience replay object
    exp_replay = ExperienceReplay(max_memory=max_memory)

    # FIXME
    #states = np.fromfile('run_random/states', dtype=np.float32)\
    #    .reshape([1214494, 36])
    #scaler = preprocessing.StandardScaler()
    #scaler.fit(states)
    #with open('scaler.pkl', 'wb') as f:
    #    scaler = pickle.dump(scaler, f, protocol=-1)
    with open('scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Train
    for e in range(epoch):
        loss = 0.
        bbox.reset_level()
        game_over = False
        # get initial input
        get_state = lambda: scaler.transform(np.array([bbox.get_state()]))[0]
        input_t = get_state()
        score = 0
        step = 0
        report_steps = 100

        while not game_over:
            step += 1
            input_tm1 = input_t
            # get next action
            if np.random.rand() <= epsilon:
                action = np.random.randint(0, num_actions, size=1)
            else:
                q = model.predict(np.array([input_tm1]))[0]
                action = np.argmax(q)

            # apply action, get rewards and new state
            game_over = not bbox.do_action(action)
            input_t = get_state()
            new_score = bbox.get_score()
            reward = new_score - score
            score = new_score

            # store experience
            exp_replay.remember([input_tm1, action, reward, input_t],
                                game_over)

            # adapt model
            for _ in range(mini_epoch):
                inputs, targets = exp_replay.get_batch(model,
                                                       batch_size=batch_size)
                loss += model.train_on_batch(inputs, targets)[0]

            if step % report_steps == 0:
                print('Step {:07d} | Loss {:.4f} | Score {}'.format(
                    step, loss / (report_steps * mini_epoch), score))
                loss = 0.

        print('Epoch {:03d}/{} | Score {}'.format(e, epoch - 1, score))

    # Save trained model weights
    model.save_weights('q_model.h5', overwrite=True)
示例#23
0
文件: naive_bot.py 项目: lopuhin/bbot
def run_bbox(verbose=False):
    bbox.load_level("../levels/train_level.data", verbose=True)

    states, actions, scores, rewards = [], [], [], []
    utility_models = [
        SGDRegressor(learning_rate='constant',
                     #penalty='elasticnet',
                     ) for _ in range(n_actions)
    ]
    zero_utilities = np.zeros([n_actions])

    n_past_act = 1
    n_past_st = 0  # in addition to current
    discount = 0.9
    random_steps = 10000

    step = 0
    has_next = 1
    while has_next:
        step += 1
        state = bbox.get_state()
        utilities = zero_utilities
        # Choose action using current utility_models
        if step > random_steps:
            clf_state = np.concatenate(states[-n_past_st:] + [state]) \
                        if n_past_st else state
            try:
                utilities = np.array(
                    [m.predict([clf_state])[0] for m in utility_models])
            except NotFittedError:
                pass
    #utilities -= utilities.min()
    #p = None if np.isclose(utilities, 0).all() else \
    #    utilities / utilities.sum()
        if np.random.rand() < 0.1 or step <= random_steps:
            action = np.random.choice(n_actions)
        else:
            action = np.argmax(utilities)
        # Do action and bookkeeping
        has_next = bbox.do_action(action)
        states.append(np.array(state))
        actions.append(action)
        score = bbox.get_score()
        rewards.append(score if not scores else (score - scores[-1]))
        scores.append(score)
        # Train classifiers
        if len(rewards) >= n_past_act + n_past_st:
            total_reward = sum(r * np.power(discount, i)
                               for i, r in enumerate(rewards[-n_past_act:]))
            if n_past_act == 1:
                clf_state = np.concatenate(states[-(n_past_act + n_past_st):])
            else:
                clf_state = np.concatenate(
                    states[-(n_past_act + n_past_st):-n_past_act + 1])
            utility_models[actions[-n_past_act]].partial_fit([clf_state],
                                                             [total_reward])
        if verbose and step % 1000 == 0:
            print(step, score)

    i = 1
    get_outdir = 'run_{}'.format
    outdir = get_outdir(i)
    while os.path.exists(outdir):
        i += 1
        outdir = get_outdir(i)
    os.mkdir(outdir)
    print('saving to {}'.format(outdir))
    scores = np.array(scores, dtype=np.float32)
    scores.tofile(os.path.join(outdir, 'scores'))
    actions = np.array(actions, dtype=np.int8)
    actions.tofile(os.path.join(outdir, 'actions'))
    states = np.array(states, dtype=np.float32)
    states.tofile(os.path.join(outdir, 'states'))

    bbox.finish(verbose=True)