def setReward(self, reward): SSA_with_WM.setReward(self, reward) self.Stack.task_Rs[self.current_feature] += reward if self.check_baseline_condition(): t = self.get_time_passed() R = self.get_reward_passed() if t > 0: self.Stack.compute_baseline(R, t) self.after_estimation() if self.weighting_type == WeightingType.time_consumption and \ self.Stack.task_ts[self.current_feature] % WEIGHT_FREQUENCY == 0: self.weights = self.get_task_weight_timeconsumption() self.after_estimation()
def printDevelopment(self): SSA_with_WM.printDevelopment(self) self.stats.num_checksOverTime.append(self.stats.num_checks) self.stats.num_evaluationsOverTime.append(self.stats.num_evaluations) self.stats.num_backchecksOverTime.append(self.stats.num_backchecks) self.stats.num_backtrackingOverTime.append(self.stats.num_backtracking) self.stats.num_earlystopsOverTime.append(self.stats.num_earlystops) self.stats.num_prestopsOverTime.append(self.stats.num_prestops) self.stats.num_checks = 0 self.stats.num_evaluations = 0 self.stats.num_backchecks = 0 self.stats.num_backtracking = 0 self.stats.num_earlystops = 0 self.stats.num_prestops = 0 if self.t > 0: # add two additional to verify it stopped if len(self.stats.baselineOverTime[ self.current_feature]) < NUM_BASE_LINE_TRACKS: self.stats.baselineOverTime[self.current_feature].append( self.Stack.get_base(self.current_feature))
def __init__(self, weighting_type, absolute, SSA_with_WMparams): SSA_with_WM.__init__(self, **SSA_with_WMparams) self.current_feature = None self.seen_tasks = set() self.absolute = absolute self.V_old_second = float("inf") if weighting_type == "fixed": self.weighting_type = WeightingType.fixed elif weighting_type == "time_consumption": self.weighting_type = WeightingType.time_consumption else: raise Exception("unknown weighting type") self.stats.baselineOverTime = {} self.stats.num_checks = 0 self.stats.num_checksOverTime = [] self.stats.num_evaluations = 0 self.stats.num_evaluationsOverTime = [] self.stats.num_backchecks = 0 self.stats.num_backchecksOverTime = [] self.stats.num_backtracking = 0 self.stats.num_backtrackingOverTime = [] self.stats.num_earlystops = 0 self.stats.num_earlystopsOverTime = [] self.stats.num_prestops = 0 self.stats.num_prestopsOverTime = [] self.max_V = {} #self.t_M={} self.first_task_instance = False self.set_sorted_firsts([0]) print( "initialised taskspecific with weighting type %s and absolute=%s criterion" % (weighting_type, str(absolute)))
def main(arg_list): # e = pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_WMwm120p100ff20actions_reset_stats_object")) # stats=pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_NEAT_WMwm120p100ff20both_noreset_stats_object")) # for i in range(30): # start_coords=pickle.load(open(maze_dir+"/"+DIFFICULTY[MAZES[i]]+"/maze"+str(MAZES[i])+"startCoords","rb")) # # feasible = pickle.load(open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "_feasible", "rb")) # print("%d / %d "%(len(start_coords),len(feasible))) # startCoord = pickle.load( # open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "start", "rb")) # foodCoord = pickle.load( # open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "end", "rb")) # print (sys.version) V1, V2, V3, V4 = calc_velocity_random() # else: # freeze_str='no_freeze' walltime = 60 * 3600 # 60 hours by default if args.walltime: ftr = [3600, 60, 1] walltime = sum( [a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))]) print(walltime) probabilistic = False probstring = '' if args.probabilistic: probabilistic = True probstring = 'prob' ffstring = '' ff = False if args.ff == True: ff = True ffstring = 'ff' actionstring = '' if args.actions == 'random': SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom actionstring = 'random' methodname = args.method if args.method is not None else "" run = args.run if args.run is not None else 0 pacmanparams['seed'] = run wm = '' stoptime = STOPTIME if args.STOPTIME is None else args.STOPTIME #catastrophicforgetting_tasks(stoptime) features, weights, velocities = curriculum_tasks(stoptime) task_inputs = 0 if not pacmanparams['include_task_features'] else len( features[0]) time_inputs = 0 if not pacmanparams['inform_task_time'] else 1 pacmanparams['observation_length'] += task_inputs + time_inputs inputs = pacmanparams['observation_length'] SSA_WM_Params['num_inputs'] = pacmanparams['observation_length'] SSA_WM_Params['wm_cells'] = WORKING_CELLS SSA_WM_Params['num_program'] = PROGRAM_CELLS # if methodname.startswith("SSA") or methodname.startswith("PredictiveSSA"): # if args.working_memory and methodname in ['SSA_WM','SSA_NEAT_WM']: # SSA_WM_Params['wm_cells'] = args.working_memory # else: # SSA_WM_Params['wm_cells'] = WORKING_CELLS # wm = 'wm' + str(SSA_WM_Params['wm_cells']) # p = '' # if methodname.startswith("SSA") or methodname.startswith("PredictiveSSA"): # if args.program_cells: # SSA_WM_Params['num_program'] = args.program_cells # # else: # SSA_WM_Params['num_program'] = PROGRAM_CELLS # p = 'p' + str(SSA_WM_Params['num_program']) print(SSA_WM_Params) filename = 'POcman' if args.filename: filename = args.filename + '_' + methodname + actionstring + probstring + ffstring + str( run) if args.config_file is not None: configfile = str( os.environ['HOME'] ) + '/PycharmProjects/PhD/Configs/IS-NEAT-Singlemaze_' + args.config_file + ".ini" parse_config_file(filename, configfile, pacmanparams) filename += args.config_file setSSA_WM_Params(SSA_WM_Params) setIS_NEAT_params(pacmanparams, SSANeat_Params, networktypeMap) print(pacmanparams) print(SSA_WM_Params) print(SSANeat_Params) # if SSA_WM_Params['eval']: # internalActionsSSA_WM.update(searchPset) # elif SSA_WM_Params['predictiveSelfMod']: # internalActionsSSA_WM.update(predictiveSelfMod) # else: # internalActionsSSA_WM.update(incPset) internalActionsSSA_WM.update(incPset) num_PLAs = 0 for key in internalActionsSSA_WM: if key in [ 'searchP', 'incP', 'decP', 'inc_means', 'dec_means', 'sample' ]: num_PLAs += 1 visual = False if args.VISUAL: visual = args.VISUAL print(visual) pacmanparams['record_intervals'] = None if args.record_video: visual = True pacmanparams['record_intervals'] = get_record_intervals(STOPTIME) #pacmanparams['record_intervals']=[[.4*10**6,0.405*10**6]] #method = RandomLearner(externalActions) print(pacmanparams) print(internalActionsSSA_WM) #method = SSA_with_WM(**SSA_WM_Params) environmentfile = None if args.environment_file: environmentfile = filename + '_environment' # environmentfile = "/media/david/Acer/POmazeFinalData/POmazeFinal_RL_LSTM_nonepisodicff87actions_noreset_environment" #args.run_type = "stats" if args.run_type == "stats": #environmentfile="POmazeFinal_SSA_NEAT_WMwm120p100ff55actions_noreset_environment" # #e=getEnvironmentSFTP(environmentfile) #e.reset_type=ResetType.fixed # # try: print("reading environmentfile:" + str(environmentfile)) with open(environmentfile, "rb") as f: e = pickle.load(f) print("file read") # except: # e = getEnvironmentSFTP(environmentfile) getStatistics(e, filename) return if environmentfile is not None: print("reading enviroment file") # Getting back the objects: with open(environmentfile, "rb") as f: e = pickle.load(f) if hasattr(e.agent.learner, "action_model"): e.agent.learner.action_model.load(filename + '_actionmodel.h5') if hasattr(e.agent.learner, "evalPol"): e.agent.learner.evalPol.load(filename + '_evalPol.h5') print("starting at " + str(e.t)) #e.agent.learner.displayNetPolicy() e.start = time.time() if args.run_type == "test": print("preparing test run") if switching: initializeSwitchingTasks(seed=run, stoptime=e.t + TEST_ITS, generate_new=GENERATE_NEW, start_time=e.t) else: initializeDefaultNavigationTask( filename, default_task, pacmanparams, run, SAMPLING_RATE if not pacmanparams['real_time'] else REAL_SAMPLING_RATE, stoptime, reward_func) e.tasks = pacmanparams['tasks'] if args.VISUAL: e.visual = True e.initVisualisation(pacmanparams['record_intervals'], filename + "_video") else: if not (switching and GENERATE_NEW ) or e.interrupted: #else the new task is already in place if stoptime: #assuming one task print("a") e.currentTask.end_time = stoptime print(e.currentTask.end_time) else: print("b") e.currentTask.end_time = STOPTIME e.currentTask.initialized = True e.tasks = [e.currentTask] + e.tasks if args.VISUAL: e.visual = True e.initVisualisation(pacmanparams['record_intervals'], filename + "_video") print(e.start) else: #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype)) # create environment n_input = inputs # if FULLOBS: # SSA_WM_Params['num_inputs'] = 3 if switching and inform_mazenum else 2 # else: # SSA_WM_Params['num_inputs'] = 5 if switching and inform_mazenum else 4 # # create agent x = randint(0, MAPSIZEX - 1) y = randint(0, MAPSIZEY - 1) if methodname == 'SSA_WM': enhance_PLA = 20 SSA_WM_Params['filename'] = filename SSA_WM_Params[ 'enhance_PLA'] = enhance_PLA / num_PLAs if not pacmanparams[ 'real_time'] else 0 print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA'])) method = SSA_with_WM(**SSA_WM_Params) elif methodname == 'RandomLearner': method = RandomLearner(externalActions, filename) elif methodname == "DRQN": from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner task_features = [1] batch_size = 32 n_input = 4 trace_length = 1 use_task_bias = True use_task_gain = True method = DRQN_Learner(task_features, use_task_bias, use_task_gain, batch_size, n_input, trace_length, externalActions, filename, episodic=False, loss=None) else: from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner task_features = [1] batch_size = 32 n_input = 4 trace_length = 50 use_task_bias = True use_task_gain = True # (self, task_features, use_task_bias, use_task_gain, batch_size,n_inputs, trace_length, actions, file, episodic, # loss = None) method = DRQN_Learner(task_features, use_task_bias, use_task_gain, n_input, trace_length, externalActions, filename, episodic=True, loss=None) # e = POcman(PacmanAgent(method, pacmanparams), visual, pacmanparams) e.run(walltime) contin = continue_experiment(e.interrupt, arg_list) save_stats = not contin finalise_experiment(e, filename, arg_list, NO_SAVING, args, save_stats=save_stats)
def setTime(self, t): increment = t - self.t SSA_with_WM.setTime(self, t) self.Stack.task_ts[self.current_feature] += increment
def main(arg_list): print(sys.version) freeze = False freeze_str = '' if args.freeze != None: freeze = args.freeze if freeze: internalActionsSSA_WM['freeze'] = 2 # else: # freeze_str='no_freeze' walltime = 60 * 3600 # 60 hours by default if args.walltime: ftr = [3600, 60, 1] walltime = sum( [a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))]) print(walltime) #parse_config_file("IS-NEAT-Singlemaze.ini") probabilistic = False probstring = '' if args.probabilistic: probabilistic = True probstring = 'prob' ffstring = '' ff = False if args.ff == True: ff = True ffstring = 'ff' actionstring = '' if args.actions == 'random': SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom actionstring = 'random' methodname = args.method filename = 'new' run = args.run if args.run is not None else 666 switching = False if args.switch is not None: switching = args.switch inform_mazenum = False if args.inform_mazenum is not None: inform_mazenum = args.inform_mazenum difficulty = getDifficultySwitch() if switching else getDifficulty(run) wm = '' if methodname in ['SSA_WM', 'SSA_NEAT_WM', 'SSA_WM_FixedNN']: if args.working_memory and methodname in ['SSA_WM', 'SSA_NEAT_WM']: SSA_WM_Params['wm_cells'] = args.working_memory else: SSA_WM_Params['wm_cells'] = WORKING_CELLS[difficulty] wm = 'wm' + str(SSA_WM_Params['wm_cells']) p = '' if methodname in ['SSA_WM', 'SSA_NEAT_Environments/WM', 'SSA_WM_FixedNN']: if args.program_cells: SSA_WM_Params['num_program'] = args.program_cells else: SSA_WM_Params['num_program'] = PROGRAM_CELLS[difficulty] p = 'p' + str(SSA_WM_Params['num_program']) print(SSA_WM_Params) filename = '' if args.filename: filename = args.filename + '_' + methodname + wm + p + actionstring + probstring + ffstring + freeze_str + str( run) if args.config_file is not None: configfile = str(os.environ['HOME'] ) + '/PycharmProjects/PhD/Configs/' + args.config_file filename = parse_config_file(filename, configfile, defaultparams) setSSA_WM_Params(defaultparams, SSA_WM_Params) setIS_NEAT_params(defaultparams, SSANeat_Params, networktypeMap) print(defaultparams) print(SSA_WM_Params) print(SSANeat_Params) if SSA_WM_Params['eval']: internalActionsSSA_WM.update(searchPset) else: internalActionsSSA_WM.update(incPset) num_PLAs = 0 for key in internalActionsSSA_WM: if key in ['searchP', 'incP', 'decP']: num_PLAs += 1 visual = False if args.VISUAL: visual = args.VISUAL print(visual) #method = RandomLearner(externalActions) print(defaultparams) print(internalActionsSSA_WM) #method = SSA_with_WM(**SSA_WM_Params) environmentfile = None if args.environment_file: environmentfile = filename + '_environment' if environmentfile: print("reading enviroment file") # Getting back the objects: e = load(open(environmentfile)) #e.agent.learner.displayNetPolicy() e.start = time.time() if args.test_run: print("preparing test run") if switching: initializeSwitchingTasks(seed=run, stoptime=e.t + TEST_ITS, generate_new=GENERATE_NEW, start_time=e.t) else: initializeDefaultTasks(run=run, stoptime=e.t + TEST_ITS, real_time=e.real_time) e.tasks = defaultparams['tasks'] if args.VISUAL: e.visual = True e.initVisualisation() else: if not (switching and GENERATE_NEW ) or e.interrupted: #else the new task is already in place if args.STOPTIME: #assuming one task e.currentTask.end_time = args.STOPTIME else: e.currentTask.end_time = STOPTIMES[difficulty] e.currentTask.initialized = True e.tasks = [e.currentTask] + e.tasks print(e.start) else: #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype)) # create environment defaultparams['seed'] = run n_input = inputs if switching: inform_mazenum = True initializeSwitchingTasks(seed=run, stoptime=args.STOPTIME, generate_new=GENERATE_NEW) else: initializeDefaultTasks(run=run, stoptime=args.STOPTIME, real_time=defaultparams['real_time']) # if FULLOBS: # SSA_WM_Params['num_inputs'] = 3 if switching and inform_mazenum else 2 # else: # SSA_WM_Params['num_inputs'] = 5 if switching and inform_mazenum else 4 # # create agent x = randint(0, MAPSIZEX - 1) y = randint(0, MAPSIZEY - 1) if methodname == 'SSA_WM': SSA_WM_Params['filename'] = filename SSA_WM_Params[ 'enhance_PLA'] = 20 / num_PLAs - 1 if not defaultparams[ 'real_time'] else 0 method = SSA_with_WM(**SSA_WM_Params) elif methodname == 'RandomLearner': method = RandomLearner(externalActions, filename) elif methodname == "DRQN": from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner task_features = [] # batch_size=32 n_input = inputs trace_length = 30 use_task_bias = False use_task_gain = False epsilon_change = True method = DRQN_Learner(task_features, use_task_bias, use_task_gain, n_input, trace_length, externalActions, filename, episodic=True, loss=None, target_model=True, num_neurons=50, epsilon_change=epsilon_change) else: from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner task_features = [] # batch_size=32 n_input = inputs trace_length = 15 use_task_bias = False use_task_gain = False epsilon_change = True method = DRQN_Learner(task_features, use_task_bias, use_task_gain, n_input, trace_length, externalActions, filename, episodic=True, loss=None, target_model=True, num_neurons=50, epsilon_change=epsilon_change) defaultparams["filename"] = filename e = TMaze(NavigationAgent(method, defaultparams), visual, switching, defaultparams) e.set_tasks(defaultparams['tasks'], statfreq=1 * 10**6) if inform_mazenum: e.inform_mazenum = True print("starting from " + str(e.t)) #print("will run until " + str(stoptime)) print("real time " + str(e.start)) # run environment e.run(walltime) if args.test_run: return if not e.real_time and e.t < e.currentTask.end_time: submit_job(arg_list) # Saving the objects: begintime = time.time() dump(e, open(filename + '_environment', "w")) time_passed = time.time() - begintime print("save time=%.3f" % (time_passed))
def main(arg_list): #e = pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinal_LSTM_SSA_with_WM_deltasuncertainty38actions_reset_environment")) # stats=pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_NEAT_WMwm120p100ff20both_noreset_stats_object")) # for i in range(30): # start_coords=pickle.load(open(maze_dir+"/"+DIFFICULTY[MAZES[i]]+"/maze"+str(MAZES[i])+"startCoords","rb")) # # feasible = pickle.load(open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "_feasible", "rb")) # print("%d / %d "%(len(start_coords),len(feasible))) # startCoord = pickle.load( # open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "start", "rb")) # foodCoord = pickle.load( # open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "end", "rb")) # print (sys.version) #print(e.agent.learner.printStack()) # args.filename = "POmazeExploration_" # args.config_file="actions_reset" # args.method="DRQN" # args.environment_file=False ms=args.ms if args.ms is not None else 10 freeze=False freeze_str='' if args.freeze!=None: freeze=args.freeze if freeze: internalActionsSSA_WM['freeze']=2 # else: # freeze_str='no_freeze' walltime=60*3600# 60 hours by default if args.walltime: ftr = [3600, 60, 1] walltime = sum([a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))]) print(walltime) probabilistic=False probstring = '' if args.probabilistic: probabilistic=True probstring='prob' ffstring='' ff=True if args.ff==True: ff=True ffstring='ff' actionstring='' if args.actions=='random': SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom actionstring='random' methodname = args.method if args.method is not None else "DRQN" run=args.run if args.run is not None else 50 switching=False if args.switch is not None: switching = args.switch inform_mazenum=False if args.inform_mazenum is not None: inform_mazenum=args.inform_mazenum difficulty = getDifficultySwitch() if switching else getDifficulty(run) statfreq = 1*10 ** 6 if difficulty == "Easy" else 16 * 10 ** 6 wm = '' stoptime=STOPTIMES[difficulty] if args.STOPTIME is None else args.STOPTIME defaultparams['stoptime']=stoptime if methodname.startswith("SSA"): if args.working_memory and methodname in ['SSA_WM','SSA_NEAT_WM']: SSA_WM_Params['wm_cells'] = args.working_memory else: SSA_WM_Params['wm_cells'] = WORKING_CELLS[difficulty] wm = 'wm' + str(SSA_WM_Params['wm_cells']) p = '' if methodname.startswith("SSA"): if args.program_cells: SSA_WM_Params['num_program'] = args.program_cells else: SSA_WM_Params['num_program'] = PROGRAM_CELLS[difficulty] p = 'p' + str(SSA_WM_Params['num_program']) print(SSA_WM_Params) filename='' if args.filename: filename=get_filename(args.filename,methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run) if args.config_file is not None: configfile=str(os.environ['HOME']) + '/PycharmProjects/PhD/Configs/IS-NEAT-Singlemaze_'+args.config_file+".ini" parse_config_file(filename,configfile,defaultparams) filename+=args.config_file setSSA_WM_Params( SSA_WM_Params) setIS_NEAT_params(defaultparams, SSANeat_Params,networktypeMap) print(defaultparams) print(SSA_WM_Params) print(SSANeat_Params) # if SSA_WM_Params['eval']: # internalActionsSSA_WM.update(searchPset) # elif SSA_WM_Params['predictiveSelfMod']: # internalActionsSSA_WM.update(predictiveSelfMod) # else: # internalActionsSSA_WM.update(incPset) internalActionsSSA_WM.update(incPset) num_PLAs=0 for key in internalActionsSSA_WM: if key in ['searchP','incP','decP','inc_means','dec_means','sample']: num_PLAs+=1 visual=False args.record_video=False if args.VISUAL: visual=args.VISUAL print(visual) defaultparams['record_intervals']=None if args.record_video: visual = True defaultparams['record_intervals'] = [[1+80 * 10 ** 6, 80.005 * 10 ** 6]] print('record intervals set') #defaultparams['record_intervals'] = get_record_intervals(STOPTIMES[difficulty]) #defaultparams['record_intervals']=[[.4*10**6,0.405*10**6]] reward_func=reward_fun3 if defaultparams['real_time'] else reward_fun if args.l is not None: defaultparams['reset_period']=args.l filename+="l"+str(args.l) print(defaultparams) print(internalActionsSSA_WM) environmentfile=None explor_schedule = False if args.environment_file: environmentfile=filename else: if filename.startswith("POmazeExploration"): environmentfile=get_filename("Exploration/POmaze30Mil_",methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run) environmentfile+=args.config_file explor_schedule=get_exploration_schedule(run) print(environmentfile) if filename.startswith("POmazeTest"): environmentfile=get_filename("POmazeFinal_",methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run) environmentfile+=args.config_file if args.run_type is not None and args.run_type.startswith("stats"): print("reading environmentfile:"+str(environmentfile)) e=read_incremental("/media/david/BackupDrive/POmazeFinalData/"+environmentfile+"_environment") print("file read") # except: # e = getEnvironmentSFTP(environmentfile) if args.run_type=="stats": getStatistics(e,filename) return elif args.run_type == "statsP": from StatsAndVisualisation.Statistics import PolType e.printStatistics(PolType.P_MATRIX) return elif args.run_type == "statsN": from StatsAndVisualisation.Statistics import PolType e.printStatistics(PolType.NETWORK) return elif args.run_type == "statsEpsilon": for interval in loss_intervals: recordings_file='/home/david/PycharmProjects/PhD/Environments/'+environmentfile for stat in e.statistics.values(): stat.make_epsilon_map(e, maze_dir='/home/david/PycharmProjects/PhD/Environments/Mazes/', recordings_file=recordings_file,time_interval=interval) print("") return else: from StatsAndVisualisation.Statistics import PolType e.printStatistics(PolType.NETWORK) #filename="POmazeFinal__DRQN80actions_reset" #environmentfile=filename if environmentfile is not None: print("reading enviroment file") # Getting back the objects: e=read_incremental(environmentfile+'_environment') e.agent.learner.load(environmentfile) e.agent.learner.continue_experiment(intervals=loss_intervals) if difficulty=="Difficult": stoptime=81*10**6 else: stoptime=5*10**6 if filename.startswith("POmaze30Mil"): stoptime=32.001*10**6 if explor_schedule: e.agent.learner.exploration_schedule=explor_schedule print("exploration schedule set:") print(explor_schedule) if filename.startswith("POmazeExploration"): stoptime=40*10**6 if filename.startswith("POmazeTest"): stoptime=82.1*10**6 e.stoptime=stoptime #print(e.agent.learner.Stack) #print(e.agent.learner.Pol.summary()) print("starting at "+str(e.t)) #e.agent.learner.displayNetPolicy() e.start = time.time() # if args.run_type == "test": # print("preparing test run") # if switching: # initializeSwitchingTasks(seed=run, stoptime=e.t+TEST_ITS, generate_new=GENERATE_NEW,start_time=e.t) # else: # initializeDefaultNavigationTask(filename,default_task,defaultparams,run,SAMPLING_RATE if not defaultparams['real_time'] else REAL_SAMPLING_RATE, # stoptime,reward_func) # e.set_tasks(defaultparams['tasks'],statfreq) # # else: if not (switching and GENERATE_NEW) or e.interrupt: #else the new task is already in place if stoptime: #assuming one task print("a") e.currentTask.end_time=stoptime print(e.currentTask.end_time) else: print("b") e.currentTask.end_time = STOPTIMES[difficulty] e.currentTask.initialized=True e.set_tasks([e.currentTask] + e.tasks,statfreq) if args.VISUAL: # use these lines to convert old-style stack #from Stack import Stack #e.agent.learner.Stack = Stack.listToStack(e.agent.learner.Stack) for action in e.agent.learner.actions: print(action) print(action.n_args) e.rng = np.random.RandomState(run) e.visual = True print(defaultparams['record_intervals']) e.initVisualisation( defaultparams['record_intervals'], filename+"_video") if defaultparams['record_intervals']: e.vis.on=False print("video initialised") print(e.start) else: #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype)) # create environment n_input = inputs if switching: inform_mazenum=True initializeSwitchingTasks(seed=run, stoptime=stoptime,generate_new=GENERATE_NEW) else: initializeDefaultNavigationTask(filename,default_task, defaultparams, run, SAMPLING_RATE if not defaultparams['real_time'] else REAL_SAMPLING_RATE, stoptime, reward_func) if methodname=='SSA_WM': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=20 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) method = SSA_with_WM(**SSA_WM_Params) elif methodname=='SSA_gradientQ': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQ) method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params) elif methodname=='SSA_gradientQ2': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(0,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQ) method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params) elif methodname=='SSA_gradientQsequence': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence) method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index, SSA_WM_Params,intervals=loss_intervals) method.Qlearner.batch_size=32 print("batch size:"+str(method.Qlearner.batch_size)) elif methodname=='SSA_gradientQsequence_greedy': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequencenoeps) method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params) elif methodname=='SSA_gradientQsequence_nomodification': enhance_PLA=0 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence) del SSA_WM_Params['internal_actionsSSA']['incP'] del SSA_WM_Params['internal_actionsSSA']['decP'] method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params) elif methodname=='SSA_gradientQsequence_fixedexperience': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_fixedexperience) method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params) elif methodname=='SSA_gradientQsequence_internalgreedy': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA=0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval']=1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA=0 else: enhance_PLA=18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequencenoeps) method = SSA_gradientQ(len(externalActions),trace_length, input_addresses,ConversionType.double_index,SSA_WM_Params,Q_internal=True) elif methodname == 'SSA_gradientQsequence_internal': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA = 0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA = 0 else: enhance_PLA = 18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA / num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA'])) input_addresses = range(4, 8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence) method = SSA_gradientQ(len(externalActions), trace_length, input_addresses, ConversionType.double_index,SSA_WM_Params,Q_internal=True) elif methodname == 'SSA_gradientQsequence_direct': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA = 0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA = 0 else: enhance_PLA = 18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA / num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA'])) input_addresses = range(4, 8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence) method = SSA_gradientQ(len(externalActions), trace_length, input_addresses, ConversionType.direct,SSA_WM_Params) elif methodname == 'SSA_gradientQsequence_notrainreplay': if filename.startswith("POmazeFinalPrepEval"): enhance_PLA = 0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA = 0 else: enhance_PLA = 18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_notrainreplay) method = SSA_gradientQ(len(externalActions), trace_length, input_addresses, ConversionType.direct,SSA_WM_Params,fixed_training=True,intervals=loss_intervals) method.Qlearner.batch_size=32 elif methodname=='RandomLearner': method=RandomLearner(externalActions,filename) elif methodname=="DRQN": from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner task_features=[] #batch_size=32 n_input=inputs trace_length=40 if difficulty == "Difficult" else 25 use_task_bias=False use_task_gain=False epsilon_change=True method = DRQN_Learner(task_features,use_task_bias,use_task_gain,n_input,trace_length,externalActions, filename,episodic=False,loss=None,num_neurons=50,epsilon_change=epsilon_change,target_model=True) method.agent.batch_size=32 print("batch size:"+str(method.agent.batch_size)) elif methodname == "A2C2": from Catastrophic_Forgetting_NNs.A2C_Learner2 import A2C_Learner settings = get_A2C_configs(inputs, externalActions, filename, True) method = A2C_Learner(**settings) else: if methodname!='': raise Exception("methodname %s not found"%(methodname)) if filename.startswith("POmazeFinalPrepEval"): enhance_PLA = 0 del SSA_WM_Params['internal_actionsSSA']['endSelfMod'] SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1 elif filename.startswith("POmazeFinalNoDupl"): enhance_PLA = 0 else: enhance_PLA = 18 SSA_WM_Params['filename'] = filename SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0 print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA'])) input_addresses=range(4,8) from IS.SSA_gradientQ import ConversionType trace_length = 40 if difficulty == "Difficult" else 25 SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_notrainreplay) method = SSA_gradientQ(len(externalActions), trace_length, input_addresses, ConversionType.direct,SSA_WM_Params,fixed_training=True,intervals=loss_intervals) method.Qlearner.batch_size=32 e = POmaze(NavigationAgent(method,defaultparams),visual,switching,defaultparams) e.set_tasks(defaultparams['tasks'],statfreq) e.agent.learner.set_tasks(defaultparams['tasks']) if args.run_type == "create_mazes": e.createMazes() e.run(walltime) if args.run_type=="test": return continue_experiment(e.interrupt,arg_list) save_stats=not e.interrupt print("save stats %s"%(save_stats)) #e, filename, arg_list, no_saving, args, save_stats = True, save_learner = True finalise_experiment(e, filename, arg_list, NO_SAVING, args, save_stats=save_stats)