示例#1
0
def launch_rlglue_agent(parameters):
    """Start the rlglue agent.

    (This function is executed in a separate process using
    multiprocessing.)
    """
    import rl_glue_ale_agent
    agent = rl_glue_ale_agent.NeuralAgent(parameters.discount,
                                          parameters.learning_rate,
                                          parameters.rms_decay,
                                          parameters.rms_epsilon,
                                          parameters.momentum,
                                          parameters.epsilon_start,
                                          parameters.epsilon_min,
                                          parameters.epsilon_decay,
                                          parameters.phi_length,
                                          parameters.replay_memory_size,
                                          parameters.experiment_prefix,
                                          parameters.nn_file,
                                          parameters.pause,
                                          parameters.network_type,
                                          parameters.update_rule,
                                          parameters.batch_accumulator,
                                          parameters.freeze_interval,
                                          parameters.batch_size,
                                          parameters.replay_start_size,
                                          parameters.update_frequency,
                                          parameters.image_resize)
    AgentLoader.loadAgent(agent)
示例#2
0
def runAgent(agent_class):
    """Use the agent_parameters function to parse command line arguments
    and run the RL agent in network mode.
    """
    parser = argparse.ArgumentParser(parents=[agent_class.agent_parameters()], add_help=True)
    params = vars(parser.parse_args())
    AgentLoader.loadAgent(agent_class(**params))
示例#3
0
def runAgent(agent_class):
    """Use the agent_parameters function to parse command line arguments
    and run the RL agent in network mode.
    """
    parser = argparse.ArgumentParser(parents=[agent_class.agent_parameters()],
                                     add_help=True)
    params = vars(parser.parse_args())
    AgentLoader.loadAgent(agent_class(**params))
def main(args):
    """
    Mostly just read command line arguments here. We do this here
    instead of agent_init to make it possible to use --help from
    the command line without starting an experiment.
    """

    from logutils import setupLogging

    # Handle command line argument:
    parser = addScriptArguments()

    # ignore unknowns
    parameters, _ = parser.parse_known_args(args)

    setupLogging(parameters.verbosity)

    if not parameters.recording:
        best_video = epoch_network = learning_log = inner_video = every_video = False
    else:
        best_video = parameters.video
        inner_video = parameters.inner_video
        every_video = parameters.every_video
        epoch_network = learning_log = True

    if parameters.nips:
        default_parameters = NIPSParameters
    else:
        default_parameters = DefaultParameters
        


    AgentLoader.loadAgent(NeuralAgent(parameters.game_name,
        network_size=default_parameters.get_default(parameters, 'network_size'),
        learning_rate=default_parameters.get_default(parameters, 'learning_rate'),
        batch_size=default_parameters.get_default(parameters, 'batch_size'),
        discount_rate=default_parameters.get_default(parameters, 'discount_rate'),
        momentum=default_parameters.get_default(parameters, 'momentum'),
        rms_decay=default_parameters.get_default(parameters, 'RMS_decay'),
        experiment_prefix=default_parameters.get_default(parameters, 'experiment_prefix'),
        experiment_directory=default_parameters.get_default(parameters, 'experiment_directory'),
        nn_file=default_parameters.get_default(parameters, 'nn_file'),
        pause=default_parameters.get_default(parameters, 'pause'),
        epsilon_start=default_parameters.get_default(parameters, 'epsilon_start'),
        epsilon_min=default_parameters.get_default(parameters, 'epsilon_min'),
        epsilon_decay=default_parameters.get_default(parameters, 'epsilon_decay'),
        testing_epsilon=default_parameters.get_default(parameters, 'testing_epsilon'),        
        history_length=default_parameters.get_default(parameters, 'history_length'),
        max_history=default_parameters.get_default(parameters, 'history_max'),
        best_video=best_video,
        every_video=every_video,
        inner_video=inner_video,
        keep_epoch_network=epoch_network,
        learning_log=learning_log,
        target_reset_frequency=default_parameters.get_default(parameters, 'target_reset_frequency')))
示例#5
0
def main(args):
    """
    Mostly just read command line arguments here. We do this here
    instead of agent_init to make it possible to use --help from
    the command line without starting an experiment.
    """

    from logutils import setupLogging

    # Handle command line argument:
    parser = argparse.ArgumentParser(description='Neural rl agent.')
    parser.add_argument("-v", "--verbose", dest="verbosity", default=0, action="count",
                      help="Verbosity.  Invoke many times for higher verbosity")
    parser.add_argument("-g", '--game-name', dest="game_name", default=None,
        help='Name of the game')
    parser.add_argument('-b', '--batch-size', dest="batch_size", type=int, default=TestingNeuralAgent.DefaultBatchSize,
        help='Batch size (default: %(default)s)')
    parser.add_argument('-e', '--experiment-directory', dest="experiment_directory", type=str, required=True,
        help='Directory where experiment details were saved')
    parser.add_argument('-t', '--test-epsilon', dest="testing_epsilon", type=float, default=TestingNeuralAgent.DefaultTestingEpsilon,
        help='Epsilon to use during testing (default: %(default)s)')    
    parser.add_argument("-p", '--pause', dest="pause", type=float, default=TestingNeuralAgent.DefaultPauseTime,
        help='Amount of time to pause display while testing. (default: %(default)s)')
    parser.add_argument("-hl", '--history-length', dest="history_length", type=int, default=TestingNeuralAgent.DefaultHistoryLength,
        help='History length (default: %(default)s)')
    parser.add_argument('--no-video', dest="video", default=True, action="store_false",
        help='Do not make a "video" record of the best run in each game')    
    parser.add_argument('--no-records', dest="recording", default=True, action="store_false",
        help='Do not record anything about the experiment (best games, epoch networks, test results, etc)')


    # ignore unknowns
    parameters, _ = parser.parse_known_args(args)

    setupLogging(parameters.verbosity)

    if not parameters.recording:
        best_video = learning_log = False
    else:
        best_video = parameters.video
        learning_log = True

    AgentLoader.loadAgent(TestingNeuralAgent(parameters.game_name,
        batch_size=parameters.batch_size,
        experiment_directory=parameters.experiment_directory,
        testing_epsilon=parameters.testing_epsilon,
        pause=parameters.pause,
        history_length=parameters.history_length,
        best_video=best_video,
        learning_log=learning_log))
示例#6
0
def launch_rlglue_agent(parameters):
    """Start the rlglue agent.

    (This function is executed in a separate process using
    multiprocessing.)
    """
    import rl_glue_ale_agent
    agent = rl_glue_ale_agent.NeuralAgent(
        parameters.discount, parameters.learning_rate, parameters.rms_decay,
        parameters.rms_epsilon, parameters.momentum, parameters.epsilon_start,
        parameters.epsilon_min, parameters.epsilon_decay,
        parameters.phi_length, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.nn_file, parameters.pause,
        parameters.network_type, parameters.update_rule,
        parameters.batch_accumulator, parameters.freeze_interval,
        parameters.batch_size, parameters.replay_start_size,
        parameters.update_frequency, parameters.image_resize)
    AgentLoader.loadAgent(agent)
示例#7
0
	def agent_start(self,observation):
		return Action()
	
	def agent_step(self,reward, observation):
		return Action()
	
	def agent_end(self,reward):
		pass
	
	def agent_cleanup(self):
		pass

	def agent_message(self,inMessage):
		if inMessage==None:
			return "null"

		if inMessage=="":
			return "empty"

		if inMessage=="null":
			return None

		if inMessage=="empty":
			return ""
		
		return inMessage;


if __name__=="__main__":
	AgentLoader.loadAgent(test_message_agent())
示例#8
0
        states = numpy.vstack([e.prev_state for e in self.experiences])
        actions = numpy.array([e.prev_action for e in self.experiences],dtype='int32')
        targets = numpy.zeros(len(self.experiences))
        costs = []
        for n in xrange(10):
            # Recompute target Q values with current estimate
            for i in xrange(len(self.experiences)-1):
                max_q = self.max_action(self.experiences[i].state)[0]
                targets[i] = self.experiences[i].reward + discount_factor*max_q
            targets[-1] = self.experiences[i].reward

            cost = self.update(states,actions,targets)
            costs.append(cost)
        print 'Costs:',costs
        self.experiences = []

        self.p_exploration *= p_exploration_decay
        if self.p_exploration < 1:
            self.p_exploration = 0
        print 'p_exploration',self.p_exploration

    def agent_cleanup(self):
        pass

    def agent_message(self, message):
        pass

if __name__=="__main__":
    AgentLoader.loadAgent(mlp_agent())

def main():
    AgentLoader.loadAgent(PredictiveMockAgent())
示例#10
0
            print "########### MODEL UPDATED ######################"
            self.DN.target_model_update()

        # Simple text based visualization
        print '  REWARD %.1f   / EPSILON  %.5f' % (np.sign(reward), self.epsilon)

        # Time count
        if self.policyFrozen is False:
            self.time += 1

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        if inMessage.startswith("freeze learning"):
            self.policyFrozen = True
            return "message understood, policy frozen"

        if inMessage.startswith("unfreeze learning"):
            self.policyFrozen = False
            return "message understood, policy unfrozen"

        if inMessage.startswith("save model"):
            serializers.save_npz('resume.model', self.DN.model) # save current model
            np.savez('stored_D012.npz', D0=self.DN.D[0], D1=self.DN.D[1], D2=self.DN.D[2])
            np.savez('stored_D34.npz', D3=self.DN.D[3], D4=self.DN.D[4])
            return "message understood, model saved"

if __name__ == "__main__":
    AgentLoader.loadAgent(dn_agent())
示例#11
0
                action = 'E'

            self.window.refresh()

        except KeyboardInterrupt:
            RLGlue.RL_cleanup()

        a = Action()

        if action:
            a.charArray = [action]

        return a

    # (double) -> void
    def agent_end(self, reward):
        pass

    # () -> void
    def agent_cleanup(self):
        curses.endwin()
        print 'BYE!'

    # (string) -> string
    def agent_message(self, message):
        pass


if __name__ == "__main__":
    AgentLoader.loadAgent(ManualAgent())
示例#12
0
        self.write_data(observation.doubleArray, "observation")
        return returnAction

    def agent_step(self, reward, observation):
        print "Observation: ", observation.doubleArray
        print "Reward: ", reward
        returnAction = Action()
        returnAction.doubleArray = self.agent_policy(observation)

        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)

        self.write_data(observation.doubleArray, "observation")
        self.write_data([reward], "reward")
        return returnAction

    def agent_end(self, reward):
        print "Agent Down!"

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        print inMessage

        return "Message received"


if __name__ == "__main__":
    AgentLoader.loadAgent(weak_baseline())
示例#13
0
def main():
    AgentLoader.loadAgent(CaclaAgentExperimenter())
def main():
    AgentLoader.loadAgent(cacla_agent())
示例#15
0
        return returnAction
    
    def agent_step(self,reward, observation):
        #Generate random action, 0 or 1
        thisIntAction=self.randGenerator.randint(0,1)
        returnAction=Action()
        returnAction.intArray=[thisIntAction]
        
        last_action=copy.deepcopy(returnAction)
        last_observation=copy.deepcopy(observation)

        return returnAction
    
    def agent_end(self,reward):
        pass
    
    def agent_cleanup(self):
        pass
    
    def agent_message(self,inMessage):
        print inMessage


if __name__=="__main__":
    parser = argparse.ArgumentParser(description='Run DQN Recurrent experiment')
    parser.add_argument('--learn_start', metavar='L', type=int, default=5*10**4,
                        help='only start learning after an amount of steps in order to build a db')
    args = parser.parse_args()

    AgentLoader.loadAgent(QAgent(args))
示例#16
0
                action = 'E'
            
            self.window.refresh()
        
        except KeyboardInterrupt:
            RLGlue.RL_cleanup()
            
        
        a = Action()
        
        if action:
            a.charArray = [action]
        
        return a
    
    # (double) -> void
    def agent_end(self, reward):
        pass
    
    # () -> void
    def agent_cleanup(self):
        curses.endwin()
        print 'BYE!'

    # (string) -> string
    def agent_message(self, message):
        pass

if __name__=="__main__":
    AgentLoader.loadAgent(ManualAgent())
        """
        pass
        
    def save_params(self, filename="cnnparams.pkl"):
        the_file = open(filename, "w")
        
        cPickle.dump(self.cnn, the_file, -1)

    def agent_message(self, in_message):
        """
        The experiment will cause this method to be called.  Used
        to save data to the indicated file. 
        """
        if in_message.startswith("save_data"):
            total_time = time.time() - self.start_time
            file_name=in_message.split(" ")[1]
            the_file = open(file_name, "w")
            all_data = (self.cnn.get_params())
            print "PICKLING: " + file_name
            #cPickle.dump(all_data, the_file, -1)
            #print "Simulated at a rate of {}/s".format(len(self.rewards) / 
            #                                           total_time)
            return "File saved successfully"

        else:
            return "I don't know how to respond to your message"


if __name__=="__main__":
    AgentLoader.loadAgent(NeuralQLearnAgent())
示例#18
0
import os, sys, time
import numpy as np
import scipy.misc as spm
from rlglue.agent import AgentLoader

sys.path.append(os.path.split(os.getcwd())[0])
from PIL import Image
from config import config
from agent import Agent

# Override config
config.apply_batchnorm = True
config.ale_actions = [0, 3, 4]
config.ale_screen_size = [210, 160]
config.ale_scaled_screen_size = [84, 84]
config.rl_replay_memory_size = 10**5
config.rl_replay_start_size = 10**4
config.q_conv_hidden_channels = [32, 64, 64]
config.q_conv_strides = [4, 2, 1]
config.q_conv_filter_sizes = [8, 4, 3]
config.q_conv_output_vector_dimension = 512
config.q_fc_hidden_units = [256, 128]


# Override agent
class PongAgent(Agent):
    pass


AgentLoader.loadAgent(PongAgent())
示例#19
0
        #	Message Description
        # unfreeze learning
        # Action: Set flag to resume updating policy
        #
        if inMessage.startswith("unfreeze learning"):
            self.policyFrozen = False
            return "message understood, policy unfrozen"

        #Message Description
        # freeze exploring
        # Action: Set flag to stop exploring (greedy actions only)
        #
        if inMessage.startswith("freeze exploring"):
            self.exploringFrozen = True
            return "message understood, exploring frozen"

        #Message Description
        # unfreeze exploring
        # Action: Set flag to resume exploring (e-greedy actions)
        #
        if inMessage.startswith("unfreeze exploring"):
            self.exploringFrozen = False
            return "message understood, exploring frozen"

        return "Invasive agent does not understand your message."


if __name__ == "__main__":
    AgentLoader.loadAgent(InvasiveAgent())
示例#20
0
        states, actions, rewards, next_states, next_actions = samples
        repeats = 1 if not self.replay_size is None else self.replay_times 

        for _ in xrange(repeats):
            for i in xrange(sample_size):
                state, action, reward, next_state, next_action = states[i], \
                    actions[i], rewards[i], next_states[i], next_actions[i]
                n_rew = self.normalize_reward(reward)
                # assert np.unique(state), 'state contains duplicate values'
                delta = n_rew - self.get_value(state, action, self.sparse)
                assert not np.any(np.isnan(delta), np.isinf(delta)), \
                        'delta is nan or infinite: %s' % str(delta)
                ns_values = self.get_all_values(next_state, self.sparse)
                # Here's the difference with Q-learning: next_action is used
                delta += self.gamma*ns_values[next_action]
                # Normalize alpha with # of active features
                alpha = self.alpha / float(np.sum(state!=0.))
                # TODO I might be missing out on something, compare formula
                # Maybe trace made up for the fact that a factor is missing
                self.theta += alpha * delta * self.trace
            
    def create_projector(self):
        return RAMALEFeatures()


if __name__=="__main__":
    parser = argparse.ArgumentParser(description='run Sarsa Replay Agent')
    ALEReplayAgent.register_with_parser(parser)
    args = parser.parse_args()
    AgentLoader.loadAgent(ALEReplayAgent(args))
示例#21
0
def run_agent(agent=None):
    AgentLoader.loadAgent(agent)
示例#22
0
def main():
    AgentLoader.loadAgent(cacla_agent())
示例#23
0
文件: basic.py 项目: amoliu/hedgehog
    agent.agent_start(observation)
    agent.agent_train(False)

    for i in range(2, 256):
        print "Round %d" % i
        reward = float(i)
        color = i
        observation = Observation()
        observation.intArray = np.ones(size_of_observation, dtype=np.uint8)
        observation.intArray *= color

        agent.agent_step(reward, observation)
        agent.agent_train(False)

    reward = float(i)
    color = i
    observation = Observation()
    observation.intArray = np.ones(size_of_observation, dtype=np.uint8)
    observation.intArray *= color

    agent.agent_step(reward, observation)

    agent.agent_train(True)

    #ipdb.set_trace()

if __name__ == '__main__':
    #test_agent_step()
    agent = setup()
    AgentLoader.loadAgent(agent)
	def agent_start(self,observation):
		return Action()
	
	def agent_step(self,reward, observation):
		return Action()
	
	def agent_end(self,reward):
		pass
	
	def agent_cleanup(self):
		pass

	def agent_message(self,inMessage):
		if inMessage==None:
			return "null"

		if inMessage=="":
			return "empty"

		if inMessage=="null":
			return None

		if inMessage=="empty":
			return ""
		
		return inMessage;


if __name__=="__main__":
	AgentLoader.loadAgent(test_message_agent())
示例#25
0
from rlglue.types import Observation
from rlglue.agent import AgentLoader as AgentLoader
from rlglue.agent.Agent import Agent
from rlglue.utils.TaskSpecVRLGLUE3 import TaskSpecParser
from LambdaSARSA import LambdaSARSA
import tool
import pickle
from ModelAgent import ModelAgent
if __name__=="__main__":        
    import atexit
    agent = tool.Load('mario_sarsa_981_0.04_0.db')
    #agent = LinearSarsaAgent()
    #atexit.register(lambda: saveObj(agent)) #workaround to the NoneType error in hte descructorn
    #agent = tool.Load("Speed.db")
    #AgentLoader.loadAgent(agent)

    #while True:
    
    AgentLoader.loadAgent(agent)
    #time.sleep(2)
示例#26
0
def main():
    AgentLoader.loadAgent(NeuralAgent())
示例#27
0
    def agent_step(self, reward, observation):
        # print "Observation: ",observation.doubleArray
        # print "Reward: ",reward
        returnAction = Action()
        returnAction.doubleArray = self.agent_policy(observation)

        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)

        self.write_data(observation.doubleArray, "observation")
        self.write_data([reward], "reward")
        return returnAction

    def agent_end(self, reward):
        self.episode += 1

        print "Agent Down!"

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        print inMessage

        return "Message received"


if __name__ == "__main__":
    AgentLoader.loadAgent(random_agent())
示例#28
0
    parser.add_argument('--potential', metavar='F', type=str, default='less_enemies',
                    help='potentials to use: less_enemies or lowest_enemy')
    parser.add_argument('--actions', metavar='C',type=int, default=None, 
                        nargs='*',help='list of allowed actions')

    args = parser.parse_args()
    
    act = None
    if not (args.actions is None):
        act = np.array(args.actions)

    if args.potential == 'less_enemies':
        AgentLoader.loadAgent(ALESarsaShapingAgent(agent_id=args.id,
            alpha =args.alpha,
            lambda_=args.lambda_,
            eps =args.eps,
            gamma=args.gamma, 
            save_path=args.savepath,
            actions = act,
            selected_potential = args.potential))
    elif args.potential == 'lowest_enemy':
        AgentLoader.loadAgent(ALESarsaShapingAgent(agent_id=args.id,
                                     alpha =args.alpha,
                                     lambda_=args.lambda_,
                                     eps =args.eps,
                                     gamma=args.gamma, 
                                     save_path=args.savepath,
                                     actions = act,
                                     selected_potential = args.potential))
    else:
        print 'unknown potential type'
示例#29
0
def main():
    AgentLoader.loadAgent(CaclaAgentNolearn())
示例#30
0
            return "message understood, exploring frozen"

        #Message Description
        # save_policy FILENAME
        # Action: Save current value function in binary format to 
        # file called FILENAME
        #
        if inMessage.startswith("save_policy"):
            splitString=inMessage.split(" ");
            self.save_value_function(splitString[1]);
            print "Saved.";
            return "message understood, saving policy"

        #Message Description
        # load_policy FILENAME
        # Action: Load value function in binary format from 
        # file called FILENAME
        #
        if inMessage.startswith("load_policy"):
            splitString=inMessage.split(" ")
            self.load_value_function(splitString[1])
            print "Loaded."
            return "message understood, loading policy"

        return "SampleqAgent(Python) does not understand your message."



if __name__=="__main__":
    AgentLoader.loadAgent(q_agent())
示例#31
0
        a = np.asarray(a).astype(np.int32)
        r = np.asarray(r).astype(np.float32)
        s2 = np.asarray(s2).astype(np.float32)
        t = np.asarray(t).astype(np.float32)
        
        #Q 値推測用ネットワーク targetQ を取得し、s2の Q 値を求める
        s2 = chainer.Variable(self.xp.asarray(s2))
        Q = self.targetQ.value(s2)
        Q_data = Q.data
        
        if type(Q_data).__module__ == np.__name__:
            max_Q_data = np.max(Q_data, axis=1)
        else:
            max_Q_data = np.max(self.xp.asnumpy(Q_data).astype(np.float32), axis=1)
        
        #targetQで推測した Q 値を使用して 教師データ t 作成
        t = np.sign(r) + (1 - t)*self.gamma*max_Q_data
        
        self.optimizer.update(self.Q, s, a, t)
    
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Deep Q-Learning')
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--size', '-s', default=6, type=int,
                        help='Reversi board size')
    args = parser.parse_args()
    
    AgentLoader.loadAgent(KmoriReversiAgent(args.gpu,args.size))
		self.qvalues=pickle.load(theFile)
		theFile.close()
	
	def agent_message(self,inMessage):
		#Message Description
	 	# save_policy FILENAME
	 	# Action: Save current value function in binary format to 
		# file called FILENAME
		#
		if inMessage.startswith("save_policy"):
			splitString=inMessage.split(" ");
			self.save_value_function(splitString[1]);
			print "Saved.";
			return "message understood, saving policy"

		#Message Description
	 	# load_policy FILENAME
	 	# Action: Load value function in binary format from 
		# file called FILENAME
		#
		if inMessage.startswith("load_policy"):
			splitString=inMessage.split(" ")
			self.load_value_function(splitString[1])
			print "Loaded."
			return "message understood, loading policy"

		return "QLearnAgent(Python) does not understand your message."

if __name__=="__main__":
	AgentLoader.loadAgent(QLearnAgent())
示例#33
0
def main():
    AgentLoader.loadAgent(NeuralAgent())
示例#34
0
        F = self.gamma * (current_potential - self.last_potential)

        if not self.allow_negative_rewards:
            F = max(0, F)

        a_ns = self.step(reward,phi_ns)
        #log state data
        self.last_phi = copy.deepcopy(phi_ns)
        self.last_action = copy.deepcopy(a_ns)
        self.last_potential = current_potential
        
        return self.create_action(self.actions[a_ns])#create RLGLUE action

    def agent_end(self, reward):
        with open(self.shaping_data_filename, 'a') as f:
            for i in range(len(self.outstanding_shaping_data['alien_bonus'])):
                f.write(','.join(map(str, [
                    self.outstanding_shaping_data['alien_bonus'][i],
                    self.outstanding_shaping_data['laser_penalty'][i],
                    self.outstanding_shaping_data['shield_bonus'][i],
                    self.outstanding_shaping_data['lowest_enemy_penalty'][i]
                    ])) + '\n')
        super(ALEShapingAgent, self).agent_end(reward)


if __name__=="__main__":
    parser = argparse.ArgumentParser(description='run Sarsa Agent')
    ALEShapingAgent.register_with_parser(parser)
    args = parser.parse_args()
    AgentLoader.loadAgent(ALEShapingAgent(args))
示例#35
0
	def agent_freeze(self):
		pass
	
	def agent_message(self,inMessage):
		return None
	
	def randomify(self):
		self.action.intArray = []
		self.action.doubleArray = []
		

		for min_action,max_action in self.int_action_ranges:					
				act = random.randrange(min_action,max_action+1)
				self.action.intArray.append(act)

		for min_action,max_action in self.double_action_ranges:					
				act = random.uniform(min_action,max_action)
				self.action.doubleArray.append(act)
				
		self.action.charArray   = GenPasswd2(self.action.numChars)
		#print self.action.intArray
		#print self.action.doubleArray
		#print self.action.charArray
		
		

			

if __name__=="__main__":        
	AgentLoader.loadAgent(RandomAgent())
		maze = detect_maze(screen)
		self.image = pacman_image(maze)
		return_action = Action()
		action = randrange(self.numActions)
		return_action.intArray = [action]
		self.lastAction = copy.deepcopy(return_action)
		self.lastObservation = copy.deepcopy(observation)

		return return_action

	def agent_step(self, reward, observation):
		screen = observation.intArray[128:]
		screen = np.reshape(screen, (210, -1))
		self.image.new_image(screen)
		return_action = Action()
		action = randrange(self.numActions)
		return_action.intArray = [action]
		self.lastAction=copy.deepcopy(return_action)
		self.lastObservation=copy.deepcopy(observation)

		return return_action

	def agent_end(self, reward):
		pass

	def agent_cleanup(self):
		pass

if __name__=="__main__":
	AgentLoader.loadAgent(pacmanAgent())
示例#37
0
        if not self.allow_negative_rewards:
            F = max(0, F)

        a_ns = self.step(reward, phi_ns)
        #log state data
        self.last_phi = copy.deepcopy(phi_ns)
        self.last_action = copy.deepcopy(a_ns)
        self.last_potential = current_potential

        return self.create_action(self.actions[a_ns])  #create RLGLUE action

    def agent_end(self, reward):
        with open(self.shaping_data_filename, 'a') as f:
            for i in range(len(self.outstanding_shaping_data['alien_bonus'])):
                f.write(','.join(
                    map(str, [
                        self.outstanding_shaping_data['alien_bonus'][i],
                        self.outstanding_shaping_data['laser_penalty'][i],
                        self.outstanding_shaping_data['shield_bonus'][i], self.
                        outstanding_shaping_data['lowest_enemy_penalty'][i]
                    ])) + '\n')
        super(ALEShapingAgent, self).agent_end(reward)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='run Sarsa Agent')
    ALEShapingAgent.register_with_parser(parser)
    args = parser.parse_args()
    AgentLoader.loadAgent(ALEShapingAgent(args))
示例#38
0
        # log state data
        self.last_phi = copy.deepcopy(phi_ns)
        self.last_action = copy.deepcopy(a_ns)

        return self.create_action(self.actions[a_ns])  # create RLGLUE action

    def agent_end(self, reward):
        super(ALESarsaAgent, self).agent_end(reward)
        self.step(reward)


class BasicALESarsaAgent(BasicALEAgent, ALESarsaAgent):
    pass


class RAMALESarsaAgent(RAMALEAgent, ALESarsaAgent):
    pass


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="run Sarsa Agent")
    parser.add_argument("--features", metavar="F", type=str, default="RAM", help="features to use: RAM or BASIC")
    ALESarsaAgent.register_with_parser(parser)
    args = parser.parse_args()
    if args.features == "RAM":
        AgentLoader.loadAgent(RAMALESarsaAgent(args))
    elif args.features == "BASIC":
        AgentLoader.loadAgent(BasicALESarsaAgent(args))
    else:
        raise Exception("unknown feature type")
示例#39
0
            return "message understood, exploring frozen";

        #Message Description
         # save_policy FILENAME
         # Action: Save current value function in binary format to 
        # file called FILENAME
        #
        if inMessage.startswith("save_policy"):
            splitString=inMessage.split(" ");
            self.save_value_function(splitString[1]);
            print "Saved.";
            return "message understood, saving policy";

        #Message Description
         # load_policy FILENAME
         # Action: Load value function in binary format from 
        # file called FILENAME
        #
        if inMessage.startswith("load_policy"):
            splitString=inMessage.split(" ");
            self.load_value_function(splitString[1]);
            print "Loaded.";
            return "message understood, loading policy";

        return "SampleSarsaAgent(Python) does not understand your message.";



if __name__=="__main__":
    AgentLoader.loadAgent(sarsa_agent())
示例#40
0
        self.nonEmptyAction.intArray = (0, 1, 2, 3, 4, 5, 6)
        self.nonEmptyAction.doubleArray = (0.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0)
        self.nonEmptyAction.charArray = "a"

    def agent_start(self, observation):
        self.whichEpisode = self.whichEpisode + 1

        if self.whichEpisode % 2 == 0:
            return self.emptyAction
        else:
            return self.nonEmptyAction

    def agent_step(self, reward, observation):
        if self.whichEpisode % 2 == 0:
            return self.emptyAction
        else:
            return self.nonEmptyAction

    def agent_end(self, reward):
        pass

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        return ""


if __name__ == "__main__":
    AgentLoader.loadAgent(test_empty_agent())
示例#41
0
            self.exploringFrozen = False
            return "message understood, exploring frozen"

        #Message Description
        # save_policy FILENAME
        # Action: Save current value function in binary format to
        # file called FILENAME
        #
        if inMessage.startswith("save_policy"):
            splitString = inMessage.split(" ")
            self.save_value_function(splitString[1])
            print "Saved."
            return "message understood, saving policy"

        #Message Description
        # load_policy FILENAME
        # Action: Load value function in binary format from
        # file called FILENAME
        #
        if inMessage.startswith("load_policy"):
            splitString = inMessage.split(" ")
            self.load_value_function(splitString[1])
            print "Loaded."
            return "message understood, loading policy"

        return "SampleSarsaAgent(Python) does not understand your message."


if __name__ == "__main__":
    AgentLoader.loadAgent(Qlearning_agent())
示例#42
0
                    
    parser.add_argument('--actions', metavar='C',type=int, default=None, 
                        nargs='*',help='list of allowed actions')

    args = parser.parse_args()
    
    act = None
    if not (args.actions is None):
        act = np.array(args.actions)

    if args.features == 'RAM':
        AgentLoader.loadAgent(RAMALEERSarsaAgent(agent_id=args.id,
                                     alpha =args.alpha,
                                     lambda_=args.lambda_,
                                     eps =args.eps,
                                     gamma=args.gamma, 
                                     save_path=args.savepath,
                                     actions = act,
                                     db_size= args.db_size,
                                     trajectory_length=args.trajectory_length,
                                     replays=args.replays))
    elif args.features == 'BASIC':
        AgentLoader.loadAgent(BasicALEERSarsaAgent(agent_id=args.id,
                                     alpha =args.alpha,
                                     lambda_=args.lambda_,
                                     eps =args.eps,
                                     gamma=args.gamma, 
                                     save_path=args.savepath,
                                     actions = act,
                                     db_size= args.db_size,
                                     trajectory_length=args.trajectory_length,
                                     replays=args.replays))
def main():
    AgentLoader.loadAgent(CaclaAgentLasagne())
            self.exploringFrozen = False
            return "message understood, exploring frozen"

        #Message Description
        # save_policy FILENAME
        # Action: Save current value function in binary format to
        # file called FILENAME
        #
        if inMessage.startswith("save_policy"):
            splitString = inMessage.split(" ")
            self.save_value_function(splitString[1])
            print "Saved."
            return "message understood, saving policy"

        #Message Description
        # load_policy FILENAME
        # Action: Load value function in binary format from
        # file called FILENAME
        #
        if inMessage.startswith("load_policy"):
            splitString = inMessage.split(" ")
            self.load_value_function(splitString[1])
            print "Loaded."
            return "message understood, loading policy"

        return "SampleSarsaAgent(Python) does not understand your message."


if __name__ == "__main__":
    AgentLoader.loadAgent(sarsa_agent())
示例#45
0
        return returnAction

    def agent_step(self, reward, observation):
        #print observation.intArray
        #Generate random action, 0 or 1
        thisIntAction = self.randGenerator.randint(0, 1)
        returnAction = Action()
        returnAction.intArray = [thisIntAction]

        lastAction = copy.deepcopy(returnAction)
        lastObservation = copy.deepcopy(observation)

        return returnAction

    def agent_end(self, reward):
        pass

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        if inMessage == "what is your name?":
            return "my name is skeleton_agent, Python edition!"
        else:
            return "I don't know how to respond to your message"


if __name__ == "__main__":
    AgentLoader.loadAgent(skeleton_agent())
示例#46
0
            self.DQN.experienceReplay(self.time)

        # Simple text based visualization
        print '  REWARD %.1f   / EPSILON  %.5f' % (np.sign(reward),
                                                   self.epsilon)

        # Time count
        if not self.policyFrozen:
            self.time += 1

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        if inMessage.startswith("freeze learning"):
            self.policyFrozen = True
            return "message understood, policy frozen"

        if inMessage.startswith("unfreeze learning"):
            self.policyFrozen = False
            return "message understood, policy unfrozen"

        if inMessage.startswith("save model"):
            with open('dqn_model.dat', 'w') as f:
                pickle.dump(self.DQN.model, f)
            return "message understood, model saved"


if __name__ == "__main__":
    AgentLoader.loadAgent(dqn_agent())
示例#47
0
				if(reward > action_reward):
					action = randAction
					action_reward = reward
				 
			if(action_reward > numpy.inner(self.reward_weight,self.lastObservation.doubleArray)):				
				#print "predicted state====    ",numpy.add(numpy.inner				 		(self.value_function_weight,randAction),self.last_observation_list[-1])		
				  				
				return action
			 
						

	def randomAction(self):
		"""
			generate random action.--- test purpose

		"""
		 
		action = []
		action_length = len(self.rangeAction) 
		for i in range(0,action_length):
			action.append(self.randGenerator.uniform(self.rangeAction[i][0],self.rangeAction[i][1]))		
		
		 
		return action
		 		
	 	
		 		  
		return action
if __name__=="__main__":
	AgentLoader.loadAgent(helicopter_agent())
	def printQue(self):
		if self.strategyIndex ==0: #BFS
			h = Queue.Queue()
		elif self.strategyIndex == 1 or self.strategyIndex == 2: #DFS , ID
			h = Queue.LifoQueue()
		elif self.strategyIndex == 3 or self.strategyIndex == 4: #UCS, A* 
			h = Queue.PriorityQueue()
		
		while not self.heapQueue.empty():
			temp= self.heapQueue.get()
			h.put(temp)
			print temp[1],
		print 

	
	def agent_end(self, reward):
		print reward
	
	def agent_cleanup(self):
		pass
	
	def agent_message(self, inMessage):
		if inMessage == "what is your name?":
			return "my name is skeleton_agent, Python edition!";
		else:
			return "I don't know how to respond to your message";
	
if __name__ == "__main__":
	AgentLoader.loadAgent(skeleton_agent())

示例#49
0
    def agent_step(self,reward, observation):
        self.stepCount=self.stepCount+1
        action=Action()
        action.intArray=observation.intArray
        action.doubleArray=observation.doubleArray
        action.charArray=observation.charArray

        return action

    def agent_end(self,reward):
        pass

    def agent_cleanup(self):
        pass

    def agent_message(self,inMessage):
        timesToPrint=self.stepCount%3

        outMessage=inMessage+"|"
        for i in range(0, timesToPrint):
            outMessage=outMessage+"%d" % (self.stepCount)
            outMessage=outMessage+"."

        outMessage=outMessage+"|"+inMessage

        return outMessage


if __name__=="__main__":
    AgentLoader.loadAgent(test_1_agent())
示例#50
0
            plt.savefig('plots/%s_episode_qvals.png' % self.prefix)
            plt.close()
        except:
            print "Failed to render plots"

    def resize_image(self, observation):
        image = observation[128:].reshape(IMAGE_HEIGHT, IMAGE_WIDTH, 3)
        image = np.array(image, dtype='uint8')

        offset = 10  # remove ACTIVISION logo
        width = RESIZED_WIDTH
        height = int(round(float(IMAGE_HEIGHT) * RESIZED_HEIGHT / IMAGE_WIDTH))

        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        image = cv2.resize(image, (width, height),
                           interpolation=cv2.INTER_LINEAR)
        image = image[height - offset - RESIZED_HEIGHT:height - offset, :]

        if False:
            plt.figure()
            plt.imshow(image, cmap=cm.Greys_r)
            plt.show()

        return image


if __name__ == '__main__':
    prefix = sys.argv[1] if len(sys.argv) > 1 else ""
    network_file = sys.argv[2] if len(sys.argv) > 2 else None
    AgentLoader.loadAgent(DeepQLearningAgent(prefix, network_file))
示例#51
0
    def agent_step(self, reward, observation):
        self.stepCount = self.stepCount + 1
        action = Action()
        action.intArray = observation.intArray
        action.doubleArray = observation.doubleArray
        action.charArray = observation.charArray

        return action

    def agent_end(self, reward):
        pass

    def agent_cleanup(self):
        pass

    def agent_message(self, inMessage):
        timesToPrint = self.stepCount % 3

        outMessage = inMessage + "|"
        for i in range(0, timesToPrint):
            outMessage = outMessage + "%d" % (self.stepCount)
            outMessage = outMessage + "."

        outMessage = outMessage + "|" + inMessage

        return outMessage


if __name__ == "__main__":
    AgentLoader.loadAgent(test_1_agent())
        theFile.close()

    def agent_message(self, inMessage):
        #Message Description
        # save_policy FILENAME
        # Action: Save current value function in binary format to
        # file called FILENAME
        #
        if inMessage.startswith("save_policy"):
            splitString = inMessage.split(" ")
            self.save_value_function(splitString[1])
            print "Saved."
            return "message understood, saving policy"

        #Message Description
        # load_policy FILENAME
        # Action: Load value function in binary format from
        # file called FILENAME
        #
        if inMessage.startswith("load_policy"):
            splitString = inMessage.split(" ")
            self.load_value_function(splitString[1])
            print "Loaded."
            return "message understood, loading policy"

        return "QLearnAgent(Python) does not understand your message."


if __name__ == "__main__":
    AgentLoader.loadAgent(QLearnAgent())
示例#53
0
                        default=None,
                        help='image height')
    parser.add_argument('--width',
                        metavar='W',
                        type=int,
                        default=None,
                        help='image width')
    parser.add_argument('--color',
                        metavar='L',
                        type=str,
                        default='ale',
                        help='frame color mode')
    parser.add_argument('--name',
                        metavar='N',
                        type=str,
                        default='frames',
                        help='output file name')

    args = parser.parse_args()
    if (args.width is None) or (args.height is None):
        resize = None
    else:
        resize = (args.height, args.width)
    print "Vision agent with capacity " + str(args.capacity)

    AgentLoader.loadAgent(
        ALEVisionAgent(mem_capacity=args.capacity,
                       resize=resize,
                       color_mode=args.color,
                       name=args.name))
示例#54
0
        print "--------------------------------------------------"
        s = hf.getOkolica(observation,ok,ok,ok,ok)
        print "step: %d     reward: %.2f   " % \
                (self.trial_steps, self.trial_reward)
        print "\n".join(["".join(i) for i in s])
        print "x: %2.2f    y: %2.2f    q-len: %d " % \
                (mario.x, mario.y, len(self.Q))
        print ""

    def print_stats(self):
        time_passed = time.time() - self.trial_start
        self.best_reward = max(self.best_reward,self.trial_reward)
        
        self.print_world()

        print "trial number:      %d -" % (self.trial_number)
        print "number of steps:   %d" % (self.trial_steps)
        print "steps per second:  %d" % (self.trial_steps/time_passed)
        print "trial reward pos:  %.2f" % (self.trial_reward_pos)
        print "trial reward neg:  %.2f" % (self.trial_reward_neg)
        print "trial reward:      %.2f" % (self.trial_reward)
        print "best score so far: %.2f" % (self.best_reward)
        print ""
        
       

if __name__=="__main__":        
    AgentLoader.loadAgent(FixedPolicyAgent())

示例#55
0
        alpha = self.alpha / float(np.sum(self.phi != 0.))
        self.theta += alpha * delta * self.trace
        if not greedy:
            self.trace *= 0.  #reset trace
        return a_ns  #a_ns is action index (not action value)


class BasicALEQLearningAgent(BasicALEAgent, ALEQLearningAgent):
    pass


class RAMALEQLearningAgent(RAMALEAgent, ALEQLearningAgent):
    pass


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='run Sarsa Agent')
    parser.add_argument('--features',
                        metavar='F',
                        type=str,
                        default='RAM',
                        help='features to use: RAM or BASIC')
    ALEQLearningAgent.register_with_parser(parser)
    args = parser.parse_args()
    if args.features == 'RAM':
        AgentLoader.loadAgent(RAMALEQLearningAgent(args))
    elif args.features == 'BASIC':
        AgentLoader.loadAgent(BasicALEQLearningAgent(args))
    else:
        raise Exception('unknown feature type')
示例#56
0
                    help='features to use: RAM or BASIC')
                    
    parser.add_argument('--actions', metavar='C',type=int, default=None, 
                        nargs='*',help='list of allowed actions')

    args = parser.parse_args()
    
    act = None
    if not (args.actions is None):
        act = np.array(args.actions)

    if args.features == 'RAM':
        AgentLoader.loadAgent(RAMALESarsaAgent(agent_id=args.id,
                                     alpha =args.alpha,
                                     lambda_=args.lambda_,
                                     eps =args.eps,
                                     gamma=args.gamma, 
                                     save_path=args.savepath,
                                     actions = act))
    elif args.features == 'BASIC':
        AgentLoader.loadAgent(BasicALESarsaAgent(agent_id=args.id,
                                     alpha =args.alpha,
                                     lambda_=args.lambda_,
                                     eps =args.eps,
                                     gamma=args.gamma, 
                                     save_path=args.savepath,
                                     actions = act))
    else:
        print 'unknown feature type'
    
        
示例#57
0
        alpha = 0.1
        self.thetax[x][last_action] = self.thetax[x][
            last_action] + alpha * delta * self.ex[x]
        self.thetax[y][last_action] = self.thetax[y][
            last_action] + alpha * delta * self.ey[y]
        self.thetaxy[x][y][last_action] = self.thetaxy[x][y][
            last_action] + alpha * delta * self.exy[x][y]

        #print Reward,self.thetax,self.thetay

    def maxim(self, state):
        return max(self.qfunction[state])

    def epsilon_greedy(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, 3)
        else:
            k = self.qfunction[state].index(max(self.qfunction[state]))
            #print k
            return k

    def agent_cleanup(self):
        pass

    def agent_message(self, Message):
        pass


if __name__ == "__main__":
    AgentLoader.loadAgent(q_agent())
    parser.add_argument("--alpha", metavar="A", type=float, default=0.5, help="learning rate")
    parser.add_argument("--lambda_", metavar="L", type=float, default=0.9, help="trace decay")
    parser.add_argument("--eps", metavar="E", type=float, default=0.05, help="exploration rate")
    parser.add_argument("--savepath", metavar="P", type=str, default=".", help="save path")
    parser.add_argument("--features", metavar="F", type=str, default="BASIC", help="features to use: RAM or BASIC")
    parser.add_argument("--actions", metavar="C", type=int, default=None, nargs="*", help="list of allowed actions")

    args = parser.parse_args()

    if args.features == "RAM":
        AgentLoader.loadAgent(
            RAMALEQlearningAgent(
                agent_id=args.id,
                alpha=args.alpha,
                lambda_=args.lambda_,
                eps=args.eps,
                gamma=args.gamma,
                save_path=args.savepath,
                actions=args.actions,
            )
        )
    elif args.features == "BASIC":
        AgentLoader.loadAgent(
            BasicALEQlearningAgent(
                agent_id=args.id,
                alpha=args.alpha,
                lambda_=args.lambda_,
                eps=args.eps,
                gamma=args.gamma,
                save_path=args.savepath,
                actions=args.actions,