示例#1
0
    def __init__(self,
                 n_input,
                 actions,
                 file,
                 epsilon=.05,
                 kappa=.1,
                 alpha=0.0002,
                 horizon=100,
                 gamma=0.98,
                 qlambda=0.8,
                 reward_as_input=False):
        self.epsilon = epsilon
        self.horizon = horizon

        self.alpha = alpha
        self.gamma = gamma
        self.qlambda = qlambda
        self.kappa = kappa
        CompleteLearner.__init__(self, actions, file)
        self.numActions = len(self.actions)
        self.laststate = None
        self.state = None
        self.dataset = {'state': [], 'action': [], 'reward': []}
        self.nn = None
        self.target_nn = None
        self.lastaction = None
        self.t = 0
        self.reward_as_input = reward_as_input
        self.n_input = n_input + 1 if self.reward_as_input else n_input
        self.next_advantages = [0 for i in range(self.numActions)]

        self.nn = self.buildLSTMNetwork()
        self.td_nn = self.buildTDnetwork()
        self.td_trainer = BackpropTrainer(self.td_nn)
        self.TDnext = [0]
示例#2
0
 def __init__(self, n_input,actions,file,states=None, epsilon=.05, alpha=0.02, horizon=100,gamma=0.95, qlambda=0.9,network=False):
     self.epsilon=epsilon
     self.horizon=horizon
     self.n_input=n_input
     self.alpha = alpha
     self.gamma = gamma
     self.qlambda = qlambda
     CompleteLearner.__init__(self,actions,file)
     self.numActions=len(self.actions)
     self.laststate=None
     self.state=None
     self.dataset = {'laststate':[],'state':[],'action':[],'reward': []}
     self.nn = None
     self.target_nn=None
     self.t=0
     self.updates=[]
     if network:
         self.nn , self.params= self.initNetwork()
         self.target_nn = self.nn
         self.target_params=self.params
     else:
         self.states=states
         self.Q = {}
         for state in self.states:
             self.Q[state]=[]
             for a in range(self.numActions):
                 self.Q[state].append(self.gamma + random())
示例#3
0
    def __init__(self, task_features, use_task_bias, use_task_gain, n_inputs, trace_length, actions, file, episodic,
                 loss=None,
                 target_model=False, num_neurons=80,large_scale=False,terminal_known=False,recurrent=True,
                 agent=None, intervals=[],params={}):
        self.terminal_states_known = terminal_known
        CompleteLearner.__init__(self, actions, file, episodic)

        self.init_variables()
        self.action_size = len(actions)
        if agent is None:
            self.agent = PPO_Learner.init_agent(n_inputs, actions, trace_length, episodic,
                                                task_features, use_task_bias, use_task_gain,
                                                num_neurons,params,large_scale, recurrent)
        self.state_size = PPO_Learner.set_state_size(n_inputs, trace_length,recurrent)
        self.continue_experiment(intervals)

        self.s_t = np.zeros(self.state_size)
        # self.model_objective = EWC_objective(lbda_task,learning_rate,batch_size,model,n_in, n_out,lbda,output_type=OutputType.linear,epochs=200,
        #          objective_type=ObjectiveType.EWC,task_weights=None)
        # self.target_model_objective = EWC_objective_linoutput(out,lbda)
        # agent.model.compile(loss=self.model_objective.objective)
        #
        # agent.target_model.compile()

        print(self.__dict__)
        print(self.agent.__dict__)
示例#4
0
 def __init__(self, actions, n_inputs, file=''):
     CompleteLearner.__init__(actions, file)
     self.n_inputs = n_inputs
     self.n_actions = len(actions)
     self.actions = actions
     self.units = [
         0.0 for i in self.sensoryIndexes() + self.actionIndexes()
     ]
示例#5
0
 def __init__(self, n_input,actions,file,states=None, alpha=0.02, horizon=100,gamma=0.95):
     #self.epsilon=epsilon
     self.horizon=horizon
     self.n_input=n_input
     self.alpha = alpha
     self.gamma = gamma
     CompleteLearner.__init__(self,actions,file)
     self.t=0
     self.rpg=PolicyGradient(self, Task, config_or_savefile, seed, dt=None, load='best')
示例#6
0
    def setTime(self,t):

        increment=t-self.t
        if self.episodic:
            self.task_t+=increment
        self.pols[self.current_pol].update_task_time(increment)
        CompleteLearner.setTime(self,t)

        if self.num_policies > 1:
            self.policy_chosen=False # allow choose_policy again when time has passed
        if DEBUG_MODE:
            self.check_policy_variables()
示例#7
0
 def __init__(self,
              n_input,
              actions,
              file,
              states=None,
              epsilon=.05,
              alpha=0.02,
              horizon=100,
              gamma=0.95,
              qlambda=0.9,
              network=False,
              alpha_schedule=None,
              batch_size=1000,
              reward_as_input=False,
              stateful=False):
     self.epsilon = epsilon
     self.horizon = horizon
     self.batch_size = batch_size
     self.n_input = n_input
     self.alpha = alpha
     self.gamma = gamma
     self.qlambda = qlambda
     self.stateful = stateful
     CompleteLearner.__init__(self, actions, file)
     self.numActions = len(self.actions)
     self.laststate = None
     self.state = None
     self.reward_as_input = reward_as_input
     if self.reward_as_input:
         self.n_input += 1
     self.dataset = {'state': [], 'action': [], 'reward': []}
     self.nn = None
     self.target_nn = None
     self.t = 0
     self.states_batch = []
     self.targets_batch = []
     self.alpha_schedule = alpha_schedule
     if network:
         self.nn, self.params = self.initNetwork()
         self.target_nn = self.nn
         self.target_params = self.params
     else:
         self.states = states
         self.Q = {}
         for state in self.states:
             self.Q[state] = []
             for a in range(self.numActions):
                 self.Q[state].append(self.gamma + random())
示例#8
0
 def __init__(self,
              n_input,
              actions,
              file,
              states=None,
              epsilon=.05,
              alpha=0.02,
              horizon=100,
              gamma=0.95,
              qlambda=0.9,
              network=False):
     self.epsilon = epsilon
     self.horizon = horizon
     self.n_input = n_input
     self.alpha = alpha
     self.gamma = gamma
     self.qlambda = qlambda
     CompleteLearner.__init__(self, actions, file)
     self.numActions = len(self.actions)
     self.laststate = None
     self.state = None
     self.dataset = {
         'laststate': [],
         'state': [],
         'action': [],
         'reward': []
     }
     self.nn = None
     self.target_nn = None
     self.t = 0
     if network:
         self.batch_size = BATCH_SIZE
         self.replay_memory = REPLAY_MEMORY
         self.update_freq = UPDATE_FREQ
         if self.replay_memory:
             self.replay_buffer = ReplayBuffer(self.replay_memory,
                                               batch_size=self.batch_size,
                                               horizon=self.horizon)
         self.nn, self.params = self.initNetwork()
         self.target_nn = self.nn
         self.target_params = self.params
     else:
         self.states = states
         self.Q = {}
         for state in self.states:
             self.Q[state] = []
             for a in range(self.numActions):
                 self.Q[state].append(self.gamma + random())
示例#9
0
 def __init__(self,
              n_input,
              actions,
              file,
              states=None,
              epsilon=.05,
              alpha=0.02,
              horizon=100,
              gamma=0.95,
              qlambda=0.9,
              network=False,
              alpha_schedule=None):
     self.epsilon = epsilon
     self.horizon = horizon
     self.n_input = n_input
     self.alpha = alpha
     self.gamma = gamma
     self.qlambda = qlambda
     CompleteLearner.__init__(self, actions, file)
     self.numActions = len(self.actions)
     self.laststate = None
     self.state = None
     self.dataset = {
         'laststate': [],
         'state': [],
         'action': [],
         'reward': []
     }
     self.nn = None
     self.target_nn = None
     self.t = 0
     self.alpha_schedule = alpha_schedule
     if network:
         self.net = CascadeNet(input_nodes, output_nodes,
                               num_candidate_nodes)
         self.net.learn_rate = 0.05
         self.net.momentum_coefficent = 0.0
         self.net.output_connection_dampening = 1.0
         self.net.use_quick_prop = True
     else:
         self.states = states
         self.Q = {}
         for state in self.states:
             self.Q[state] = []
             for a in range(self.numActions):
                 self.Q[state].append(self.gamma + random())
示例#10
0
    def printDevelopmentAtari(self,frames):
        CompleteLearner.printDevelopmentAtari(self,frames)
        if self.t % self.policy_space_picture_freq == 0:

            if self.do_taskdrift():
                self.stats.develop()
                self.get_performance_diversity()
                self.stats.diversity.append(self.get_diversity())

                self.stats.provision.append([prov for prov in self.provision])
                for task in self.task_coords:
                    self.stats.snapshot_taskcoords[task].append(self.task_coords[task])
                    for pol in range(self.num_policies):
                        self.stats.mindist_performances[task][pol].append(self.avg_velocities[task][pol])

            else:
                if self.num_policies > 1:
                    self.stats.diversity.append(self.get_diversity())
示例#11
0
    def __init__(self,episodic,actions,filename,pols,weights,decision_frequency=100,stepsize=0.10,init_provision=1.0,
                 consumption=.10,reward_scale=.50,initialise_unseen=False,one_to_one=False,unadaptive=False,
                 probability_rule=ProbabilityRule.epsilon):
        CompleteLearner.__init__(self,actions,filename)
        self.occurence_weights=weights
        self.pols=pols
        self.num_policies=len(pols)
        self.init_provision=init_provision*self.num_policies
        self.provision=[self.init_provision for _ in range(self.num_policies)]
        self.testing_ticks=0
        self.consumption=consumption
        self.reward_scale=reward_scale*consumption*self.num_policies
        self.stepsize=stepsize
        self.decision_frequency=decision_frequency
        self.current_feature=None
        self.one_to_one = one_to_one
        self.unadaptive=unadaptive
        self.probability_rule=probability_rule
        if self.do_taskdrift():
            self.construct_policy_space()
        self.current_pol=0 # convention
        self.policy_chosen=False
        self.episodic=episodic
        self.initialise_unseen=initialise_unseen






        if self.initialise_unseen:
            self.N_trainings={}
        #self.offline_updates=offline_updates
        if self.episodic:
            self.task_t=0
            self.task_R=0.0
        if weights is not None:
            self.initialise_task_coords()
示例#12
0
 def __init__(self, n_input,actions, alpha=0.5, gamma=0.99, qlambda=0.9,explorer=EpsilonGreedyExplorer(epsilon=0.20,decay=1)):
     CompleteLearner.__init__(self,actions)
     controller = ActionValueNetwork(dimState=n_input, numActions=len(actions))
     learner = NFQ()
     learner.explorer = explorer
     self.learning_agent = LearningAgent(controller, learner)
示例#13
0
 def setReward(self,reward):
     CompleteLearner.setReward(self,reward)
     self.add_sample()
示例#14
0
 def setReward(self,reward):
     CompleteLearner.setReward(self,reward)
     self.pols[self.current_pol].update_task_reward(reward)
     if self.episodic:
         self.task_R+=reward
示例#15
0
 def save_stats(self,filename):
     CompleteLearner.save_stats(self,filename)
     for pol in range(self.num_policies):
         self.pols[pol].save_stats(filename+'pol%d'%(pol))