示例#1
0
    def setReward(self, reward):
        SSA_with_WM.setReward(self, reward)
        self.Stack.task_Rs[self.current_feature] += reward

        if self.check_baseline_condition():
            t = self.get_time_passed()
            R = self.get_reward_passed()
            if t > 0:
                self.Stack.compute_baseline(R, t)
                self.after_estimation()
        if self.weighting_type == WeightingType.time_consumption and \
                self.Stack.task_ts[self.current_feature] % WEIGHT_FREQUENCY == 0:

            self.weights = self.get_task_weight_timeconsumption()
            self.after_estimation()
示例#2
0
    def printDevelopment(self):

        SSA_with_WM.printDevelopment(self)
        self.stats.num_checksOverTime.append(self.stats.num_checks)
        self.stats.num_evaluationsOverTime.append(self.stats.num_evaluations)
        self.stats.num_backchecksOverTime.append(self.stats.num_backchecks)
        self.stats.num_backtrackingOverTime.append(self.stats.num_backtracking)
        self.stats.num_earlystopsOverTime.append(self.stats.num_earlystops)
        self.stats.num_prestopsOverTime.append(self.stats.num_prestops)
        self.stats.num_checks = 0
        self.stats.num_evaluations = 0
        self.stats.num_backchecks = 0
        self.stats.num_backtracking = 0
        self.stats.num_earlystops = 0
        self.stats.num_prestops = 0
        if self.t > 0:  # add two additional to verify it stopped
            if len(self.stats.baselineOverTime[
                    self.current_feature]) < NUM_BASE_LINE_TRACKS:
                self.stats.baselineOverTime[self.current_feature].append(
                    self.Stack.get_base(self.current_feature))
示例#3
0
    def __init__(self, weighting_type, absolute, SSA_with_WMparams):

        SSA_with_WM.__init__(self, **SSA_with_WMparams)
        self.current_feature = None
        self.seen_tasks = set()
        self.absolute = absolute
        self.V_old_second = float("inf")

        if weighting_type == "fixed":
            self.weighting_type = WeightingType.fixed
        elif weighting_type == "time_consumption":
            self.weighting_type = WeightingType.time_consumption
        else:
            raise Exception("unknown weighting type")

        self.stats.baselineOverTime = {}
        self.stats.num_checks = 0
        self.stats.num_checksOverTime = []
        self.stats.num_evaluations = 0
        self.stats.num_evaluationsOverTime = []
        self.stats.num_backchecks = 0
        self.stats.num_backchecksOverTime = []
        self.stats.num_backtracking = 0
        self.stats.num_backtrackingOverTime = []
        self.stats.num_earlystops = 0
        self.stats.num_earlystopsOverTime = []
        self.stats.num_prestops = 0
        self.stats.num_prestopsOverTime = []

        self.max_V = {}
        #self.t_M={}
        self.first_task_instance = False
        self.set_sorted_firsts([0])

        print(
            "initialised taskspecific with weighting type %s and absolute=%s criterion"
            % (weighting_type, str(absolute)))
示例#4
0
def main(arg_list):
    # e = pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_WMwm120p100ff20actions_reset_stats_object"))
    # stats=pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_NEAT_WMwm120p100ff20both_noreset_stats_object"))
    # for i in range(30):
    #     start_coords=pickle.load(open(maze_dir+"/"+DIFFICULTY[MAZES[i]]+"/maze"+str(MAZES[i])+"startCoords","rb"))
    #
    #     feasible = pickle.load(open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "_feasible", "rb"))
    #     print("%d / %d "%(len(start_coords),len(feasible)))
    #     startCoord = pickle.load(
    #         open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "start", "rb"))
    #     foodCoord = pickle.load(
    #         open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "end", "rb"))

    # print (sys.version)

    V1, V2, V3, V4 = calc_velocity_random()

    # else:
    #     freeze_str='no_freeze'

    walltime = 60 * 3600  # 60 hours by default
    if args.walltime:
        ftr = [3600, 60, 1]
        walltime = sum(
            [a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))])

    print(walltime)

    probabilistic = False
    probstring = ''
    if args.probabilistic:
        probabilistic = True
        probstring = 'prob'
    ffstring = ''
    ff = False
    if args.ff == True:
        ff = True
        ffstring = 'ff'

    actionstring = ''
    if args.actions == 'random':
        SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom
        actionstring = 'random'

    methodname = args.method if args.method is not None else ""

    run = args.run if args.run is not None else 0
    pacmanparams['seed'] = run

    wm = ''

    stoptime = STOPTIME if args.STOPTIME is None else args.STOPTIME

    #catastrophicforgetting_tasks(stoptime)
    features, weights, velocities = curriculum_tasks(stoptime)
    task_inputs = 0 if not pacmanparams['include_task_features'] else len(
        features[0])
    time_inputs = 0 if not pacmanparams['inform_task_time'] else 1
    pacmanparams['observation_length'] += task_inputs + time_inputs
    inputs = pacmanparams['observation_length']
    SSA_WM_Params['num_inputs'] = pacmanparams['observation_length']
    SSA_WM_Params['wm_cells'] = WORKING_CELLS
    SSA_WM_Params['num_program'] = PROGRAM_CELLS
    # if methodname.startswith("SSA")  or methodname.startswith("PredictiveSSA"):
    #     if args.working_memory and methodname in ['SSA_WM','SSA_NEAT_WM']:
    #         SSA_WM_Params['wm_cells'] = args.working_memory
    #     else:
    #         SSA_WM_Params['wm_cells'] = WORKING_CELLS
    #     wm = 'wm' + str(SSA_WM_Params['wm_cells'])
    # p = ''
    # if  methodname.startswith("SSA") or methodname.startswith("PredictiveSSA"):
    #     if args.program_cells:
    #         SSA_WM_Params['num_program'] = args.program_cells
    #
    #     else:
    #         SSA_WM_Params['num_program'] = PROGRAM_CELLS
    #     p = 'p' + str(SSA_WM_Params['num_program'])
    print(SSA_WM_Params)
    filename = 'POcman'
    if args.filename:
        filename = args.filename + '_' + methodname + actionstring + probstring + ffstring + str(
            run)

    if args.config_file is not None:

        configfile = str(
            os.environ['HOME']
        ) + '/PycharmProjects/PhD/Configs/IS-NEAT-Singlemaze_' + args.config_file + ".ini"
        parse_config_file(filename, configfile, pacmanparams)
        filename += args.config_file

    setSSA_WM_Params(SSA_WM_Params)

    setIS_NEAT_params(pacmanparams, SSANeat_Params, networktypeMap)
    print(pacmanparams)
    print(SSA_WM_Params)
    print(SSANeat_Params)
    # if SSA_WM_Params['eval']:
    #     internalActionsSSA_WM.update(searchPset)
    # elif SSA_WM_Params['predictiveSelfMod']:
    #     internalActionsSSA_WM.update(predictiveSelfMod)
    # else:
    #     internalActionsSSA_WM.update(incPset)
    internalActionsSSA_WM.update(incPset)
    num_PLAs = 0
    for key in internalActionsSSA_WM:
        if key in [
                'searchP', 'incP', 'decP', 'inc_means', 'dec_means', 'sample'
        ]:
            num_PLAs += 1

    visual = False
    if args.VISUAL:
        visual = args.VISUAL
        print(visual)

    pacmanparams['record_intervals'] = None
    if args.record_video:
        visual = True
        pacmanparams['record_intervals'] = get_record_intervals(STOPTIME)
    #pacmanparams['record_intervals']=[[.4*10**6,0.405*10**6]]

    #method = RandomLearner(externalActions)
    print(pacmanparams)
    print(internalActionsSSA_WM)

    #method = SSA_with_WM(**SSA_WM_Params)
    environmentfile = None
    if args.environment_file:
        environmentfile = filename + '_environment'
    # environmentfile = "/media/david/Acer/POmazeFinalData/POmazeFinal_RL_LSTM_nonepisodicff87actions_noreset_environment"
    #args.run_type = "stats"
    if args.run_type == "stats":
        #environmentfile="POmazeFinal_SSA_NEAT_WMwm120p100ff55actions_noreset_environment"
        #
        #e=getEnvironmentSFTP(environmentfile)
        #e.reset_type=ResetType.fixed
        #
        # try:
        print("reading environmentfile:" + str(environmentfile))
        with open(environmentfile, "rb") as f:
            e = pickle.load(f)
        print("file read")
        # except:
        #     e = getEnvironmentSFTP(environmentfile)
        getStatistics(e, filename)
        return

    if environmentfile is not None:
        print("reading enviroment file")

        # Getting back the objects:
        with open(environmentfile, "rb") as f:
            e = pickle.load(f)
            if hasattr(e.agent.learner, "action_model"):
                e.agent.learner.action_model.load(filename + '_actionmodel.h5')
            if hasattr(e.agent.learner, "evalPol"):
                e.agent.learner.evalPol.load(filename + '_evalPol.h5')

        print("starting at " + str(e.t))

        #e.agent.learner.displayNetPolicy()
        e.start = time.time()
        if args.run_type == "test":
            print("preparing test run")
            if switching:
                initializeSwitchingTasks(seed=run,
                                         stoptime=e.t + TEST_ITS,
                                         generate_new=GENERATE_NEW,
                                         start_time=e.t)
            else:
                initializeDefaultNavigationTask(
                    filename, default_task, pacmanparams, run, SAMPLING_RATE
                    if not pacmanparams['real_time'] else REAL_SAMPLING_RATE,
                    stoptime, reward_func)
            e.tasks = pacmanparams['tasks']
            if args.VISUAL:

                e.visual = True
                e.initVisualisation(pacmanparams['record_intervals'],
                                    filename + "_video")
        else:
            if not (switching and GENERATE_NEW
                    ) or e.interrupted:  #else the new task is already in place
                if stoptime:  #assuming one task
                    print("a")
                    e.currentTask.end_time = stoptime
                    print(e.currentTask.end_time)
                else:
                    print("b")
                    e.currentTask.end_time = STOPTIME
                    e.currentTask.initialized = True
                    e.tasks = [e.currentTask] + e.tasks
            if args.VISUAL:
                e.visual = True
                e.initVisualisation(pacmanparams['record_intervals'],
                                    filename + "_video")

        print(e.start)

    else:
        #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype))
        # create environment

        n_input = inputs

        # if FULLOBS:
        #     SSA_WM_Params['num_inputs'] = 3 if switching and inform_mazenum else 2
        # else:
        #     SSA_WM_Params['num_inputs'] = 5 if switching and inform_mazenum else 4
        # # create agent
        x = randint(0, MAPSIZEX - 1)
        y = randint(0, MAPSIZEY - 1)
        if methodname == 'SSA_WM':
            enhance_PLA = 20
            SSA_WM_Params['filename'] = filename
            SSA_WM_Params[
                'enhance_PLA'] = enhance_PLA / num_PLAs if not pacmanparams[
                    'real_time'] else 0
            print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA']))
            method = SSA_with_WM(**SSA_WM_Params)
        elif methodname == 'RandomLearner':
            method = RandomLearner(externalActions, filename)

        elif methodname == "DRQN":

            from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner
            task_features = [1]
            batch_size = 32
            n_input = 4
            trace_length = 1
            use_task_bias = True
            use_task_gain = True
            method = DRQN_Learner(task_features,
                                  use_task_bias,
                                  use_task_gain,
                                  batch_size,
                                  n_input,
                                  trace_length,
                                  externalActions,
                                  filename,
                                  episodic=False,
                                  loss=None)
        else:

            from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner
            task_features = [1]
            batch_size = 32
            n_input = 4
            trace_length = 50
            use_task_bias = True
            use_task_gain = True
            # (self, task_features, use_task_bias, use_task_gain, batch_size,n_inputs, trace_length, actions, file, episodic,
            #  loss = None)
            method = DRQN_Learner(task_features,
                                  use_task_bias,
                                  use_task_gain,
                                  n_input,
                                  trace_length,
                                  externalActions,
                                  filename,
                                  episodic=True,
                                  loss=None)
        #
        e = POcman(PacmanAgent(method, pacmanparams), visual, pacmanparams)

    e.run(walltime)

    contin = continue_experiment(e.interrupt, arg_list)
    save_stats = not contin
    finalise_experiment(e,
                        filename,
                        arg_list,
                        NO_SAVING,
                        args,
                        save_stats=save_stats)
示例#5
0
    def setTime(self, t):
        increment = t - self.t
        SSA_with_WM.setTime(self, t)

        self.Stack.task_ts[self.current_feature] += increment
示例#6
0
def main(arg_list):
    print(sys.version)
    freeze = False
    freeze_str = ''
    if args.freeze != None:
        freeze = args.freeze
    if freeze:
        internalActionsSSA_WM['freeze'] = 2
    # else:
    #     freeze_str='no_freeze'

    walltime = 60 * 3600  # 60 hours by default
    if args.walltime:
        ftr = [3600, 60, 1]
        walltime = sum(
            [a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))])

    print(walltime)

    #parse_config_file("IS-NEAT-Singlemaze.ini")
    probabilistic = False
    probstring = ''
    if args.probabilistic:
        probabilistic = True
        probstring = 'prob'
    ffstring = ''
    ff = False
    if args.ff == True:
        ff = True
        ffstring = 'ff'

    actionstring = ''
    if args.actions == 'random':
        SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom
        actionstring = 'random'
    methodname = args.method
    filename = 'new'

    run = args.run if args.run is not None else 666
    switching = False
    if args.switch is not None:
        switching = args.switch
    inform_mazenum = False
    if args.inform_mazenum is not None:
        inform_mazenum = args.inform_mazenum
    difficulty = getDifficultySwitch() if switching else getDifficulty(run)
    wm = ''

    if methodname in ['SSA_WM', 'SSA_NEAT_WM', 'SSA_WM_FixedNN']:
        if args.working_memory and methodname in ['SSA_WM', 'SSA_NEAT_WM']:
            SSA_WM_Params['wm_cells'] = args.working_memory
        else:
            SSA_WM_Params['wm_cells'] = WORKING_CELLS[difficulty]
        wm = 'wm' + str(SSA_WM_Params['wm_cells'])
    p = ''
    if methodname in ['SSA_WM', 'SSA_NEAT_Environments/WM', 'SSA_WM_FixedNN']:
        if args.program_cells:
            SSA_WM_Params['num_program'] = args.program_cells

        else:
            SSA_WM_Params['num_program'] = PROGRAM_CELLS[difficulty]
        p = 'p' + str(SSA_WM_Params['num_program'])
    print(SSA_WM_Params)
    filename = ''
    if args.filename:
        filename = args.filename + '_' + methodname + wm + p + actionstring + probstring + ffstring + freeze_str + str(
            run)

    if args.config_file is not None:
        configfile = str(os.environ['HOME']
                         ) + '/PycharmProjects/PhD/Configs/' + args.config_file
        filename = parse_config_file(filename, configfile, defaultparams)

        setSSA_WM_Params(defaultparams, SSA_WM_Params)

        setIS_NEAT_params(defaultparams, SSANeat_Params, networktypeMap)
    print(defaultparams)
    print(SSA_WM_Params)
    print(SSANeat_Params)
    if SSA_WM_Params['eval']:
        internalActionsSSA_WM.update(searchPset)
    else:
        internalActionsSSA_WM.update(incPset)
    num_PLAs = 0
    for key in internalActionsSSA_WM:
        if key in ['searchP', 'incP', 'decP']:
            num_PLAs += 1

    visual = False
    if args.VISUAL:
        visual = args.VISUAL
        print(visual)

    #method = RandomLearner(externalActions)
    print(defaultparams)
    print(internalActionsSSA_WM)

    #method = SSA_with_WM(**SSA_WM_Params)
    environmentfile = None
    if args.environment_file:
        environmentfile = filename + '_environment'

    if environmentfile:
        print("reading enviroment file")
        # Getting back the objects:
        e = load(open(environmentfile))
        #e.agent.learner.displayNetPolicy()
        e.start = time.time()
        if args.test_run:
            print("preparing test run")
            if switching:
                initializeSwitchingTasks(seed=run,
                                         stoptime=e.t + TEST_ITS,
                                         generate_new=GENERATE_NEW,
                                         start_time=e.t)
            else:
                initializeDefaultTasks(run=run,
                                       stoptime=e.t + TEST_ITS,
                                       real_time=e.real_time)
            e.tasks = defaultparams['tasks']
            if args.VISUAL:

                e.visual = True
                e.initVisualisation()
        else:
            if not (switching and GENERATE_NEW
                    ) or e.interrupted:  #else the new task is already in place
                if args.STOPTIME:  #assuming one task
                    e.currentTask.end_time = args.STOPTIME
                else:
                    e.currentTask.end_time = STOPTIMES[difficulty]
                    e.currentTask.initialized = True
                    e.tasks = [e.currentTask] + e.tasks

        print(e.start)

    else:
        #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype))
        # create environment
        defaultparams['seed'] = run
        n_input = inputs
        if switching:
            inform_mazenum = True
            initializeSwitchingTasks(seed=run,
                                     stoptime=args.STOPTIME,
                                     generate_new=GENERATE_NEW)
        else:
            initializeDefaultTasks(run=run,
                                   stoptime=args.STOPTIME,
                                   real_time=defaultparams['real_time'])
        # if FULLOBS:
        #     SSA_WM_Params['num_inputs'] = 3 if switching and inform_mazenum else 2
        # else:
        #     SSA_WM_Params['num_inputs'] = 5 if switching and inform_mazenum else 4
        # # create agent
        x = randint(0, MAPSIZEX - 1)
        y = randint(0, MAPSIZEY - 1)
        if methodname == 'SSA_WM':
            SSA_WM_Params['filename'] = filename
            SSA_WM_Params[
                'enhance_PLA'] = 20 / num_PLAs - 1 if not defaultparams[
                    'real_time'] else 0
            method = SSA_with_WM(**SSA_WM_Params)
        elif methodname == 'RandomLearner':
            method = RandomLearner(externalActions, filename)
        elif methodname == "DRQN":
            from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner
            task_features = []
            # batch_size=32
            n_input = inputs
            trace_length = 30
            use_task_bias = False
            use_task_gain = False
            epsilon_change = True
            method = DRQN_Learner(task_features,
                                  use_task_bias,
                                  use_task_gain,
                                  n_input,
                                  trace_length,
                                  externalActions,
                                  filename,
                                  episodic=True,
                                  loss=None,
                                  target_model=True,
                                  num_neurons=50,
                                  epsilon_change=epsilon_change)
        else:
            from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner
            task_features = []
            # batch_size=32
            n_input = inputs
            trace_length = 15
            use_task_bias = False
            use_task_gain = False
            epsilon_change = True
            method = DRQN_Learner(task_features,
                                  use_task_bias,
                                  use_task_gain,
                                  n_input,
                                  trace_length,
                                  externalActions,
                                  filename,
                                  episodic=True,
                                  loss=None,
                                  target_model=True,
                                  num_neurons=50,
                                  epsilon_change=epsilon_change)

        defaultparams["filename"] = filename

        e = TMaze(NavigationAgent(method, defaultparams), visual, switching,
                  defaultparams)
        e.set_tasks(defaultparams['tasks'], statfreq=1 * 10**6)
        if inform_mazenum:
            e.inform_mazenum = True
    print("starting from " + str(e.t))
    #print("will run until " + str(stoptime))

    print("real time " + str(e.start))

    # run environment
    e.run(walltime)
    if args.test_run:
        return
    if not e.real_time and e.t < e.currentTask.end_time:
        submit_job(arg_list)

    # Saving the objects:
    begintime = time.time()
    dump(e, open(filename + '_environment', "w"))
    time_passed = time.time() - begintime
    print("save time=%.3f" % (time_passed))
示例#7
0
def main(arg_list):
    #e = pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinal_LSTM_SSA_with_WM_deltasuncertainty38actions_reset_environment"))
    # stats=pickle.load(open("/home/david/PycharmProjects/PhD/Environments/POmazeFinalTest_SSA_NEAT_WMwm120p100ff20both_noreset_stats_object"))
    # for i in range(30):
    #     start_coords=pickle.load(open(maze_dir+"/"+DIFFICULTY[MAZES[i]]+"/maze"+str(MAZES[i])+"startCoords","rb"))
    #
    #     feasible = pickle.load(open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "_feasible", "rb"))
    #     print("%d / %d "%(len(start_coords),len(feasible)))
    #     startCoord = pickle.load(
    #         open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "start", "rb"))
    #     foodCoord = pickle.load(
    #         open(maze_dir + "/" + DIFFICULTY[MAZES[i]] + "/maze" + str(MAZES[i]) + "end", "rb"))


    # print (sys.version)

    #print(e.agent.learner.printStack())
    # args.filename = "POmazeExploration_"
    # args.config_file="actions_reset"
    # args.method="DRQN"
    # args.environment_file=False
    ms=args.ms if args.ms is not None else 10
    freeze=False
    freeze_str=''
    if args.freeze!=None:
        freeze=args.freeze
    if freeze:
        internalActionsSSA_WM['freeze']=2
    # else:
    #     freeze_str='no_freeze'

    walltime=60*3600# 60 hours by default
    if args.walltime:
        ftr = [3600, 60, 1]
        walltime = sum([a * b for a, b in zip(ftr, map(int, args.walltime.split(':')))])

    print(walltime)

    probabilistic=False
    probstring = ''
    if args.probabilistic:
        probabilistic=True
        probstring='prob'
    ffstring=''
    ff=True
    if args.ff==True:
        ff=True
        ffstring='ff'

    actionstring=''
    if args.actions=='random':
        SSANeat_Params['internal_actionsNEAT'] = internalActionsNEATrandom
        actionstring='random'

    methodname = args.method if args.method is not None else "DRQN"


    run=args.run if args.run is not None else 50
    switching=False
    if args.switch is not None:
        switching = args.switch
    inform_mazenum=False
    if args.inform_mazenum is not None:
        inform_mazenum=args.inform_mazenum
    difficulty = getDifficultySwitch() if switching else getDifficulty(run)
    statfreq = 1*10 ** 6 if difficulty == "Easy" else 16 * 10 ** 6
    wm = ''

    stoptime=STOPTIMES[difficulty] if args.STOPTIME is None else args.STOPTIME
    defaultparams['stoptime']=stoptime


    if methodname.startswith("SSA"):
        if args.working_memory and methodname in ['SSA_WM','SSA_NEAT_WM']:
            SSA_WM_Params['wm_cells'] = args.working_memory
        else:
            SSA_WM_Params['wm_cells'] = WORKING_CELLS[difficulty]
        wm = 'wm' + str(SSA_WM_Params['wm_cells'])
    p = ''
    if  methodname.startswith("SSA"):
        if args.program_cells:
            SSA_WM_Params['num_program'] = args.program_cells

        else:
            SSA_WM_Params['num_program'] = PROGRAM_CELLS[difficulty]
        p = 'p' + str(SSA_WM_Params['num_program'])
    print(SSA_WM_Params)

    filename=''

    if args.filename:
        filename=get_filename(args.filename,methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run)

    if args.config_file is not None:

        configfile=str(os.environ['HOME']) + '/PycharmProjects/PhD/Configs/IS-NEAT-Singlemaze_'+args.config_file+".ini"
        parse_config_file(filename,configfile,defaultparams)
        filename+=args.config_file


    setSSA_WM_Params( SSA_WM_Params)

    setIS_NEAT_params(defaultparams, SSANeat_Params,networktypeMap)
    print(defaultparams)
    print(SSA_WM_Params)
    print(SSANeat_Params)
    # if SSA_WM_Params['eval']:
    #     internalActionsSSA_WM.update(searchPset)
    # elif SSA_WM_Params['predictiveSelfMod']:
    #     internalActionsSSA_WM.update(predictiveSelfMod)
    # else:
    #     internalActionsSSA_WM.update(incPset)
    internalActionsSSA_WM.update(incPset)
    num_PLAs=0
    for key in internalActionsSSA_WM:
        if key in ['searchP','incP','decP','inc_means','dec_means','sample']:
            num_PLAs+=1

    visual=False
    args.record_video=False
    if args.VISUAL:
        visual=args.VISUAL
        print(visual)

    defaultparams['record_intervals']=None
    if args.record_video:
        visual = True
        defaultparams['record_intervals'] = [[1+80 * 10 ** 6, 80.005 * 10 ** 6]]
        print('record intervals set')
        #defaultparams['record_intervals'] = get_record_intervals(STOPTIMES[difficulty])
    #defaultparams['record_intervals']=[[.4*10**6,0.405*10**6]]


    reward_func=reward_fun3 if defaultparams['real_time'] else reward_fun

    if args.l is not None:
        defaultparams['reset_period']=args.l
        filename+="l"+str(args.l)


    print(defaultparams)
    print(internalActionsSSA_WM)


    environmentfile=None
    explor_schedule = False
    if args.environment_file:
        environmentfile=filename

    else:
        if filename.startswith("POmazeExploration"):
            environmentfile=get_filename("Exploration/POmaze30Mil_",methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run)
            environmentfile+=args.config_file
            explor_schedule=get_exploration_schedule(run)
            print(environmentfile)
        if filename.startswith("POmazeTest"):
            environmentfile=get_filename("POmazeFinal_",methodname,wm,p,actionstring,probstring,ffstring,freeze_str,run)
            environmentfile+=args.config_file


    if args.run_type is not None and args.run_type.startswith("stats"):
        print("reading environmentfile:"+str(environmentfile))
        e=read_incremental("/media/david/BackupDrive/POmazeFinalData/"+environmentfile+"_environment")
        print("file read")
        # except:
        #     e = getEnvironmentSFTP(environmentfile)
        if args.run_type=="stats":
            getStatistics(e,filename)
            return
        elif args.run_type == "statsP":
            from StatsAndVisualisation.Statistics import PolType
            e.printStatistics(PolType.P_MATRIX)
            return
        elif args.run_type == "statsN":
            from StatsAndVisualisation.Statistics import PolType
            e.printStatistics(PolType.NETWORK)
            return
        elif args.run_type == "statsEpsilon":
            for interval in loss_intervals:
                recordings_file='/home/david/PycharmProjects/PhD/Environments/'+environmentfile
                for stat in e.statistics.values():
                    stat.make_epsilon_map(e,
                                     maze_dir='/home/david/PycharmProjects/PhD/Environments/Mazes/',
                                     recordings_file=recordings_file,time_interval=interval)
            print("")

            return
        else:
            from StatsAndVisualisation.Statistics import PolType
            e.printStatistics(PolType.NETWORK)





    #filename="POmazeFinal__DRQN80actions_reset"
    #environmentfile=filename

    if environmentfile is not None:
         print("reading enviroment file")

         # Getting back the objects:
         e=read_incremental(environmentfile+'_environment')
         e.agent.learner.load(environmentfile)
         e.agent.learner.continue_experiment(intervals=loss_intervals)
         if difficulty=="Difficult":
            stoptime=81*10**6
         else:
            stoptime=5*10**6
         if filename.startswith("POmaze30Mil"):
             stoptime=32.001*10**6
         if explor_schedule:
             e.agent.learner.exploration_schedule=explor_schedule
             print("exploration schedule set:")
             print(explor_schedule)
         if filename.startswith("POmazeExploration"):
             stoptime=40*10**6
         if filename.startswith("POmazeTest"):
             stoptime=82.1*10**6


         e.stoptime=stoptime

            #print(e.agent.learner.Stack)
            #print(e.agent.learner.Pol.summary())



         print("starting at "+str(e.t))

         #e.agent.learner.displayNetPolicy()
         e.start = time.time()
         # if args.run_type == "test":
         #     print("preparing test run")
         #     if switching:
         #         initializeSwitchingTasks(seed=run, stoptime=e.t+TEST_ITS, generate_new=GENERATE_NEW,start_time=e.t)
         #     else:
         #         initializeDefaultNavigationTask(filename,default_task,defaultparams,run,SAMPLING_RATE if not defaultparams['real_time'] else REAL_SAMPLING_RATE,
         #                                         stoptime,reward_func)
         #     e.set_tasks(defaultparams['tasks'],statfreq)
         #
         # else:
         if not (switching and GENERATE_NEW) or e.interrupt: #else the new task is already in place
             if stoptime: #assuming one task
                 print("a")
                 e.currentTask.end_time=stoptime
                 print(e.currentTask.end_time)
             else:
                 print("b")
                 e.currentTask.end_time = STOPTIMES[difficulty]
                 e.currentTask.initialized=True

             e.set_tasks([e.currentTask] + e.tasks,statfreq)

         if args.VISUAL:
             # use these lines to convert old-style stack
             #from Stack import Stack
             #e.agent.learner.Stack = Stack.listToStack(e.agent.learner.Stack)
             for action in e.agent.learner.actions:
                 print(action)
                 print(action.n_args)
             e.rng = np.random.RandomState(run)
             e.visual = True
             print(defaultparams['record_intervals'])
             e.initVisualisation( defaultparams['record_intervals'], filename+"_video")
             if defaultparams['record_intervals']:
                 e.vis.on=False
             print("video initialised")



         print(e.start)

    else:
            #config = NEATGenotype(parseGenome(n_input,n_output,NEATGenotype))
            # create environment



            n_input = inputs
            if switching:
                inform_mazenum=True
                initializeSwitchingTasks(seed=run, stoptime=stoptime,generate_new=GENERATE_NEW)
            else:
                initializeDefaultNavigationTask(filename,default_task, defaultparams, run,
                                                SAMPLING_RATE if not defaultparams['real_time'] else REAL_SAMPLING_RATE,
                                                stoptime, reward_func)



            if methodname=='SSA_WM':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=20
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                method = SSA_with_WM(**SSA_WM_Params)

            elif methodname=='SSA_gradientQ':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQ)
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params)
            elif methodname=='SSA_gradientQ2':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(0,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQ)
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params)
            elif methodname=='SSA_gradientQsequence':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence)
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,
                                       SSA_WM_Params,intervals=loss_intervals)
                method.Qlearner.batch_size=32
                print("batch size:"+str(method.Qlearner.batch_size))
            elif methodname=='SSA_gradientQsequence_greedy':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequencenoeps)
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params)
            elif methodname=='SSA_gradientQsequence_nomodification':
                enhance_PLA=0
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence)
                del SSA_WM_Params['internal_actionsSSA']['incP']
                del SSA_WM_Params['internal_actionsSSA']['decP']
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params)
            elif methodname=='SSA_gradientQsequence_fixedexperience':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_fixedexperience)
                method = SSA_gradientQ(len(externalActions),trace_length,input_addresses,ConversionType.double_index,SSA_WM_Params)
            elif methodname=='SSA_gradientQsequence_internalgreedy':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA=0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval']=1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA=0
                else:
                    enhance_PLA=18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequencenoeps)
                method = SSA_gradientQ(len(externalActions),trace_length,
                                       input_addresses,ConversionType.double_index,SSA_WM_Params,Q_internal=True)
            elif methodname == 'SSA_gradientQsequence_internal':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA = 0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA = 0
                else:
                    enhance_PLA = 18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA / num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA']))
                input_addresses = range(4, 8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence)
                method = SSA_gradientQ(len(externalActions), trace_length, input_addresses,
                                       ConversionType.double_index,SSA_WM_Params,Q_internal=True)
            elif methodname == 'SSA_gradientQsequence_direct':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA = 0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA = 0
                else:
                    enhance_PLA = 18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA / num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = " + str(SSA_WM_Params['enhance_PLA']))
                input_addresses = range(4, 8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence)
                method = SSA_gradientQ(len(externalActions), trace_length, input_addresses,
                                       ConversionType.direct,SSA_WM_Params)
            elif methodname == 'SSA_gradientQsequence_notrainreplay':
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA = 0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA = 0
                else:
                    enhance_PLA = 18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_notrainreplay)
                method = SSA_gradientQ(len(externalActions), trace_length, input_addresses,
                                       ConversionType.direct,SSA_WM_Params,fixed_training=True,intervals=loss_intervals)
                method.Qlearner.batch_size=32

            elif methodname=='RandomLearner':
                method=RandomLearner(externalActions,filename)

            elif methodname=="DRQN":

                from Catastrophic_Forgetting_NNs.DRQN_Learner import DRQN_Learner
                task_features=[]
                #batch_size=32
                n_input=inputs

                trace_length=40 if difficulty == "Difficult" else 25
                use_task_bias=False
                use_task_gain=False
                epsilon_change=True
                method = DRQN_Learner(task_features,use_task_bias,use_task_gain,n_input,trace_length,externalActions,
                                      filename,episodic=False,loss=None,num_neurons=50,epsilon_change=epsilon_change,target_model=True)
                method.agent.batch_size=32
                print("batch size:"+str(method.agent.batch_size))
            elif methodname == "A2C2":
                from Catastrophic_Forgetting_NNs.A2C_Learner2 import A2C_Learner
                settings = get_A2C_configs(inputs, externalActions, filename, True)
                method = A2C_Learner(**settings)
            else:
                if methodname!='':
                    raise Exception("methodname %s not found"%(methodname))
                if filename.startswith("POmazeFinalPrepEval"):
                    enhance_PLA = 0
                    del SSA_WM_Params['internal_actionsSSA']['endSelfMod']
                    SSA_WM_Params['internal_actionsSSA']['prepEval'] = 1
                elif filename.startswith("POmazeFinalNoDupl"):
                    enhance_PLA = 0
                else:
                    enhance_PLA = 18
                SSA_WM_Params['filename'] = filename
                SSA_WM_Params['enhance_PLA'] = enhance_PLA//num_PLAs if not defaultparams['real_time'] else 0
                print("enhance PLA = "+str(SSA_WM_Params['enhance_PLA']))
                input_addresses=range(4,8)
                from IS.SSA_gradientQ import ConversionType
                trace_length = 40 if difficulty == "Difficult" else 25
                SSA_WM_Params['internal_actionsSSA'].update(internalActionsGradientQsequence_notrainreplay)
                method = SSA_gradientQ(len(externalActions), trace_length, input_addresses,
                                       ConversionType.direct,SSA_WM_Params,fixed_training=True,intervals=loss_intervals)
                method.Qlearner.batch_size=32
            e = POmaze(NavigationAgent(method,defaultparams),visual,switching,defaultparams)
            e.set_tasks(defaultparams['tasks'],statfreq)
            e.agent.learner.set_tasks(defaultparams['tasks']) 

    if args.run_type == "create_mazes":
        e.createMazes()

    e.run(walltime)
    if args.run_type=="test":
	    return

    continue_experiment(e.interrupt,arg_list)
    save_stats=not e.interrupt
    print("save stats %s"%(save_stats))
    #e, filename, arg_list, no_saving, args, save_stats = True, save_learner = True
    finalise_experiment(e, filename, arg_list, NO_SAVING, args, save_stats=save_stats)