Пример #1
0
def symbol_defvar(fun, args):
	"""Sets a variable in the current stack frame"""

	symbol_name = args[0].data
	result = Atom.evaluate(args[1], fun)
	Environment.set_dynamic_symbol(symbol_name.upper(), result)
	return result
Пример #2
0
def test():
    ale = ALEInterface()
    Environment.initializeALEParameters(ale, 1, 4, 0.00, False)
    # t = TransferTaskModule(ale, ["../../ALE/roms/boxing.bin", "../../ALE/roms/hero.bin", "../../ALE/roms/space_invaders.bin"], "0;0_-1;0_-1", "0;0_-1;0_-1")
    t = TransferTaskModule(ale, ["../../ALE/roms/pong.bin"], "0_-1", "0_-1")
    for x in range(t.getNumTasks()):
        print t.getTaskTuple(x)

    return t
Пример #3
0
    def reset_rewards(env, spec, K=1):
        if spec is None:
            raise NotImplemented
        else:
            extn = spec.split('.')[-1]
            if extn == "tsv":
                road_map = Rooms.make_map_from_tsv_file(spec)
            else:
                road_map = Rooms.make_map_from_txt_file(spec)

        goal = Rooms.get_random_goal(road_map)
        f, g = Rooms.create_bijection(road_map)
        state_idx = functools.partial(Rooms.state_idx, road_map, f)

        # Add rewards to all states that transit into the goal state
        R = {}
        start_set = None
        end_set = []

        for i in xrange(K):
            goal = Rooms.get_random_goal(road_map)
            g = state_idx(*goal)
            # Get reward for moving to s
            reward = np.random.normal(Rooms.REWARD_SUCCESS - Rooms.REWARD_BIAS,
                                      Rooms.REWARD_SUCCESS_VAR)
            for s_ in xrange(env.S):
                R[(s_, g)] = reward
            end_set.append(g)

        return Environment(Rooms, env.S, env.A, env.P, R, env.R_bias,
                           start_set, end_set)
Пример #4
0
def main():
    args = parse_arguments()

    if args.simulations < 1:
        raise Exception('-s must be a numerical value > 0')
    if args.players + args.computers > 4 or args.players + args.computers < 2:
        raise Exception(
            'There cannot be more than 4 total players or less than 2')
    for i in range(args.simulations):
        players = []
        #initiate deck
        pile = deck(True)
        #initiate players
        for i in range(args.players):
            players.append(ChaseTheAcePlayer(False, i,
                                             name='Player ' + str(i)))
        for i in range(args.computers):
            players.append(
                ChaseTheAcePlayer(True,
                                  i + args.players,
                                  name='Computer ' + str(i)))

        #pass the above initiated objects into Environment
        env = Environment(players, pile, players[0])
        #inititate interface
        if args.showComputerScreen == 0:
            ui = Interface(False, env)
        else:
            ui = Interface(True, env)
        #create machine and let it run off into the sunset
        game = ChaseTheAceGame(env, ui)

    return 0
Пример #5
0
def experiment(k):
    np.random.seed()

    # Agent initialization
    agent = AgentPrior(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBudget=100.0,usePrior=False)
    agent.initGPs()
    print "Experiment3D : ",k

    if show:
        plotter = Plotter(agent, env)
        [trueClicks, trueBudgets] = plotter.trueSample(1.0, maxBudget=100.0, nsimul=400)
    if save:
        trueClicks = np.array([trueClicks])
        trueBudgets = np.array([trueBudgets])
        np.save(path + 'trueClicks', trueClicks)
        np.save(path + 'trueBudgets', trueBudgets)

    # Set the GPs hyperparameters
    for c in range(0,nCampaigns):
        agent.setGPKernel(c , oracle.gps3D[c].kernel_)

    # Init the Core and execute the experiment
    env = Environment(copy.copy(campaigns))
    core = Core(agent, copy.copy(env), deadline)

    core.runEpisode()
    np.save(path+"policy3D_" +str(k), [agent.prevBids,agent.prevBudgets])
    np.save(path+"experiment3D_" + str(k),np.sum(agent.prevConversions,axis=1))
    return np.sum(agent.prevConversions,axis=1)
def fake_build_environment(info=True, flush=False):
    """create all the tasks for the project, but do not run the build
    return the build context in use"""
    bld = getattr(Utils.g_module, 'build_context', Utils.Context)()
    bld = Scripting.check_configured(bld)

    Options.commands['install'] = False
    Options.commands['uninstall'] = False
    Options.is_install = False

    bld.is_install = 0  # False

    try:
        proj = Environment.Environment(Options.lockfile)
    except IOError:
        raise Utils.WafError(
            "Project not configured (run 'waf configure' first)")

    bld.load_dirs(proj[SRCDIR], proj[BLDDIR])
    bld.load_envs()

    if info:
        Logs.info("Waf: Entering directory `%s'" % bld.bldnode.abspath())
    bld.add_subdirs([os.path.split(Utils.g_module.root_path)[0]])

    bld.pre_build()
    if flush:
        bld.flush()
    return bld
Пример #7
0
def distclean(ctx=None):
    '''removes the build directory'''
    global commands
    lst = os.listdir('.')
    for f in lst:
        if f == Options.lockfile:
            try:
                proj = Environment.Environment(f)
            except:
                Logs.warn('could not read %r' % f)
                continue
            try:
                shutil.rmtree(proj[BLDDIR])
            except IOError:
                pass
            except OSError as e:
                if e.errno != errno.ENOENT:
                    Logs.warn('project %r cannot be removed' % proj[BLDDIR])
            try:
                os.remove(f)
            except OSError as e:
                if e.errno != errno.ENOENT:
                    Logs.warn('file %r cannot be removed' % f)
        if not commands and f.startswith('.waf'):
            shutil.rmtree(f, ignore_errors=True)
Пример #8
0
def train_q_learning(num_trials, num_episodes, lr, epsilon, gamma,
                     fourier_basis_order, total_trials):
    undiscounted_returns = np.zeros(shape=(total_trials, num_episodes))

    environment = Environment.Environment()
    # environment = gym.make("MountainCar-v0")
    agent = Agent.Agent(fourier_basis_order=fourier_basis_order,
                        epsilon=epsilon)
    for trial in xrange(num_trials[0], num_trials[1]):
        print "Trial: ", trial
        agent.reset()
        for episode in xrange(num_episodes):
            environment.reset()
            # current_state = [-0.5, 0]
            current_state = environment.current_state
            current_action = agent.get_action(current_state)
            while True:
                # environment.render()
                next_state, reward, done = environment.step(
                    Constants.action_representation[current_action])
                if done:
                    # print environment.time_step
                    break
                agent.q_learning_update(current_state, current_action, reward,
                                        next_state, lr, gamma)
                current_state = next_state
                current_action = agent.get_action(next_state)
            undiscounted_returns[trial][episode] = -1 * environment.time_step

    return undiscounted_returns
Пример #9
0
def make_worlds():
    worlds = shelve.open("Environments")
    for count in range(50):
        env = Environment.Environment(random.randint(0, 100),
                                      random.randint(0, 100))
        env.create_env(env.environment_map[env.row][env.column])
        worlds[str(count)] = env
Пример #10
0
 def create( spec ):
     """Create a taxi from @spec"""
     if spec is None:
         road_map, starts = Taxi.make_map_from_size( 5, 5 )
     else:
         road_map, starts = Taxi.make_map_from_file( spec )
     return Environment( Taxi, *Taxi.make_mdp( road_map, starts ) )
Пример #11
0
def build_impl(bld):
    # compile the project and/or install the files
    try:
        proj = Environment.Environment(Options.lockfile)
    except IOError:
        raise Utils.WafError(
            "Project not configured (run 'waf configure' first)")

    bld.load_dirs(proj[SRCDIR], proj[BLDDIR])
    bld.load_envs()

    info("Waf: Entering directory `%s'" % bld.bldnode.abspath())
    bld.add_subdirs([os.path.split(Utils.g_module.root_path)[0]])

    # execute something immediately before the build starts
    bld.pre_build()

    try:
        bld.compile()
    finally:
        if Options.options.progress_bar: print('')
        info("Waf: Leaving directory `%s'" % bld.bldnode.abspath())

    # execute something immediately after a successful build
    bld.post_build()

    bld.install()
Пример #12
0
def EXPAND_VARIABLES(ctx, varstr, vars=None):
    '''expand variables from a user supplied dictionary

    This is most useful when you pass vars=locals() to expand
    all your local variables in strings
    '''

    if isinstance(varstr, list):
        ret = []
        for s in varstr:
            ret.append(EXPAND_VARIABLES(ctx, s, vars=vars))
        return ret

    if not isinstance(varstr, str):
        return varstr

    import Environment
    env = Environment.Environment()
    ret = varstr
    # substitute on user supplied dict if avaiilable
    if vars is not None:
        for v in vars.keys():
            env[v] = vars[v]
        ret = SUBST_VARS_RECURSIVE(ret, env)

    # if anything left, subst on the environment as well
    if ret.find('${') != -1:
        ret = SUBST_VARS_RECURSIVE(ret, ctx.env)
    # make sure there is nothing left. Also check for the common
    # typo of $( instead of ${
    if ret.find('${') != -1 or ret.find('$(') != -1:
        Logs.error('Failed to substitute all variables in varstr=%s' % ret)
        sys.exit(1)
    return ret
Пример #13
0
    def handle_series_start(self, unique_id, series_id, player_map, num_games,
                            game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id - integer identifier for the player within the whole tournament database
        :param series_id - (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map - a list of tuples: (unique-id series-id) for all players in a series
        :param num_games - number of games to be played in the series
        :param game_params - important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return

        """
        self.series_id = series_id
        self.board_size = game_params[0]
        #############################
        #
        #
        learning_rate = 0.001
        NN_structure = [128, 128, 128]
        optimizer_ = 'Adam'
        activation_function_ = 'sigmoid'
        self.game = Environment.HexGame("Diamond", game_params[0], 0)
        self.policy_network = Agent.Policy_Network(
            game_params[0],
            lr=learning_rate,
            nn_struct=NN_structure,
            activation_function=activation_function_,
            optimizer=optimizer_,
            conv_bool=False)
        self.policy_network.load_weights("./demo_agents/" +
                                         str(game_params[0]))
Пример #14
0
def experiment(k):
    np.random.seed()
    print "Esperimento: ", k
    agentGPUCB = AgentFactored(1000, deadline, ncampaigns, nIntervals, nBids,
                               maxBudget, 1.0, "GPUCB")
    agentGPUCB.initGPs()
    envGPUCB = Environment(copy.copy(campaigns))
    coreGPUCB = Core(agentGPUCB, envGPUCB, deadline)
    meanConvGPUCB = np.zeros(deadline)

    # in questo ciclo mi salvo le conversioni medie in ogni istante
    for t in range(deadline):
        print "Day: ", t + 1
        coreGPUCB.step()
        meanConvGPUCB[t] = lastMeanConv(agentGPUCB)

    # ora invece mi salvo le conversioni istantanee
    instConvGPUCB = np.sum(agentGPUCB.prevConversions, axis=1)

    positionGPUCB1 = path_dati + "inst_conv_3camp_" + str(k)
    positionGPUCB2 = path_dati + "mean_conv_3camp_" + str(k)
    np.save(positionGPUCB1, instConvGPUCB)
    np.save(positionGPUCB2, meanConvGPUCB)

    return
Пример #15
0
    def trainAll(self, opponent1, opponent2, opponent3):
        env = Environment.EnvironmentAll(self, opponent1, opponent2, opponent3)
        board = env.reset()
        for episodes in range(Constants.QEpisodes):
            print episodes
            past_state = None
            past_action = None
            while True:
                if past_state is None:
                    action = self.getAction(board)
                    past_state = ''.join(map(str, board.state))
                    past_action = action
                    self.q[(past_state, past_action)] = 0
                else:
                    self.learn(past_state, past_action, board)
                    action = self.getAction(board)
                    past_state = ''.join(map(str, board.state))
                    past_action = action

                board, reward, game_over = env.step(action)

                if game_over:
                    self.q[(past_state, past_action)] = reward
                    break
            board = env.reset()
            Constants.Epsilon = Constants.Epsilon + (1.3 / Constants.QEpisodes)
            Constants.Lr = Constants.Lr - (0.5 / Constants.QEpisodes)
        np.save('Qnormalall.npy', self.q)
        print len(self.q)
        self.findUnique()
Пример #16
0
 def __init__(self, screen):
     super(Gameplay, self).__init__(screen)
     loader = TxtLevelLoader()
     loader.load("data/level0.txt")
     self._environment = Environment()
     self._environment.load(loader)
     self._player = self._environment.get_player(0)
Пример #17
0
    def description(self, objects = None, index_map = None):
	if objects is None:
	    objects = self
        attributes = {}
        for attr in dir(self):
            if attr[0] != '_':
                object = getattr(self, attr)
                if ChemicalObjects.isChemicalObject(object) \
                   or Environment.isEnvironmentObject(object):
                    attributes[object] = attr
        items = []
        for o in objects.objectList():
            attr = attributes.get(o, None)
            if attr is not None:
                items.append(repr(attr))
            items.append(o.description(index_map))
        for o in self._environment:
            attr = attributes.get(o, None)
            if attr is not None:
                items.append(repr(attr))
            items.append(o.description())
        s = 'c(%s,[%s])' % \
            (`self.__class__.__name__ + self._descriptionArguments()`,
             string.join(items, ','))
	return s
Пример #18
0
def evaluate(atom, called_from="", return_object=False):
	if atom.quote:
		atom.quote -= 1
		return atom

	elif atom.type == Atom.KEYWORD:
		return atom.data

	elif atom.type == Atom.SYMBOL:
		value = Environment.get_symbol_value(atom.data)
		if value is not None:
			return value
		else:
			raise LispError("%s: Undefined symbol %s" % (called_from, atom.data))

	elif atom.type == Atom.CONS:
		if atom.data.quote:
			atom.data.quote -= 1
			return atom

		fun = atom.data.first.data
		if fun not in Functions.functions:
			raise LispError("%s: Undefined function %s" % (called_from, fun))

		args = cons_to_list(atom.data.second.data)
		result = Functions.functions[fun](fun, args)
		return result

	else:
		return atom if return_object else atom.data
Пример #19
0
    def addObject(self, object):
        """Adds |object| to the universe. If |object| is a Collection,
        all elements of the Collection are added to the universe. An
        object can only be added to a universe if it is not already
        part of another universe."""
        if ChemicalObjects.isChemicalObject(object):
	    if object.parent is not None:
		if isUniverse(object.parent):
		    raise ValueError, `object` + \
			  ' is already in another universe'
		else:
		    raise ValueError, `object` + ' is part of another object'
	    object.parent = self
	    self._objects.addObject(object)
	    self._changed(1)
        elif Environment.isEnvironmentObject(object):
            for o in self._environment:
                o.checkCompatibilityWith(object)
            self._environment.append(object)
            self._changed(0)
	elif Collection.isCollection(object) \
             or Utility.isSequenceObject(object):
	    for o in object:
		self.addObject(o)
        else:
            raise TypeError, repr(object) + ' cannot be added to a universe'
Пример #20
0
def train_sarsa(num_trials, num_episodes, lr, epsilon, gamma,
                fourier_basis_order, total_trials):
    undiscounted_returns = np.zeros(shape=(total_trials, num_episodes))

    environment = Environment.Environment()
    agent = Agent.Agent(fourier_basis_order=fourier_basis_order,
                        epsilon=epsilon)

    for trial in xrange(num_trials[0], num_trials[1]):
        print "Trial: ", trial
        agent.reset()
        for episode in xrange(num_episodes):
            environment.reset()
            current_state = environment.current_state
            current_action = agent.get_action(current_state)
            for time_step in xrange(Constants.episode_end_time_step):
                next_state, reward, done = environment.step(current_action)
                if done:
                    # print time_step
                    break
                next_action = agent.get_action(next_state)
                agent.sarsa_update(current_state, current_action, reward,
                                   next_state, next_action, lr, gamma)
                current_state = next_state
                current_action = next_action

            undiscounted_returns[trial][episode] += -1 * environment.time_step

    return undiscounted_returns
Пример #21
0
def test(actor_agent):
    actor_agent.env = Environment()  # DataGenerater(node_num))
    start_time = time.time()
    flow_number = 1
    info_record = {}
    while actor_agent.action():
        end_time = time.time()
        #print("flow_number", flow_number, "cycle", actor_agent.cycle, "time", end_time - start_time,
        #      "hop", len(actor_agent.path), "delay", actor_agent.delay, "usage", actor_agent.env.edge_usage())
        info_record[flow_number] = [
            flow_number, actor_agent.cycle, end_time - start_time,
            len(actor_agent.path), actor_agent.delay
        ]
        if flow_number == args.link_failure_pos:
            actor_agent.reschedule_start_time = time.time()
            edge = actor_agent.env.graph.edges[0]
            actor_agent.reschedule_cnt = len(
                actor_agent.env.edge_to_tt_flow[(edge.start_node.id,
                                                 edge.end_node.id)])
            # print(actor_agent.env.edge_to_tt_flow[(edge.start_node.id, edge.end_node.id)], actor_agent.reschedule_start_time)
            actor_agent.env.delete_edge((edge.start_node.id, edge.end_node.id))
        if actor_agent.env.reschedule == 2:
            actor_agent.reschedule_end_time = time.time()
        actor_agent.env.enforce_next_query()
        flow_number += 1
        start_time = time.time()

    return len(info_record), actor_agent.env.edge_usage(), \
           actor_agent.reschedule_end_time - actor_agent.reschedule_start_time, actor_agent.reschedule_cnt
Пример #22
0
 def test_can_go_through_hole(self):
     model = Environment.GridWorldModel()
     for step in ['up', 'right', 'right', 'right', 'right', 'right', 'up', 'up', 'up', 'up']:
         model.take_action(actions[step])
     assert model.x == Environment.GOAL_X
     assert model.y == Environment.GOAL_Y
     assert model.is_terminal_state()
Пример #23
0
    def visit_class_stmt(self, stmt : Stmt.Class):
        superclass = None
        if stmt.superclass != None:
            superclass = self.evaluate(stmt.superclass)
            if not isinstance(superclass, LoxClass.LoxClass):
                raise RuntimeError(stmt.superclass.name, "Superclass must be a class.")

        self.env.define(stmt.name.lexeme, None)

        if stmt.superclass != None:
            self.env = Environment.Environment(self.env)
            self.env.define("super", superclass)

        methods = {}

        for method in stmt.methods:
            function = LoxFunction.LoxFunction(method, self.env, method.name.lexeme == "init")
            methods[method.name.lexeme] = function

        klass = LoxClass.LoxClass(stmt.name.lexeme, superclass, methods)
        
        if superclass != None:
            self.env = self.env.enclosing

        self.env.assign(stmt.name, klass)
Пример #24
0
 def load(self):
     try:
         env = Environment.Environment(
             os.path.join(self.cachedir, 'build.config.py'))
     except (IOError, OSError):
         pass
     else:
         if env['version'] < HEXVERSION:
             raise Utils.WafError(
                 'Version mismatch! reconfigure the project')
         for t in env['tools']:
             self.setup(**t)
     try:
         gc.disable()
         f = data = None
         Node.Nodu = self.node_class
         try:
             f = open(os.path.join(self.bdir, DBFILE), 'rb')
         except (IOError, EOFError):
             pass
         try:
             if f: data = cPickle.load(f)
         except AttributeError:
             if Logs.verbose > 1: raise
         if data:
             for x in SAVED_ATTRS:
                 setattr(self, x, data[x])
         else:
             debug('build: Build cache loading failed')
     finally:
         if f: f.close()
         gc.enable()
Пример #25
0
def test():
    env = Environment.Env(4, 4, 0.15)
    obstacles = [(1, 1), (1, 3)]
    for obs in obstacles:
        env.addobstacle(obs)
    agent1 = Environment.agent(env)
    cat1 = Environment.agent(env, autopolicy=True)
    cat2 = Environment.agent(env, autopolicy=True)
    #Choose Policy
    with open(".\Revision1\policy_sto.pkl", "rb") as f:
        policy_act = pickle.load(f)

    with open(".\Revision1\policy_semi_2.pkl", "rb") as f:
        policy = pickle.load(f)

    agent_ini = (1, 0)
    cat1_ini = (3, 2)
    cat2_ini = (0, 3)
    state = (agent_ini, cat1_ini, cat2_ini)
    steplist = []
    trajlist = []
    sensorrewardlist = []
    penaltylist = []
    # for i in range(1000):
    #     stepcount, traj, penaltycount = simulate_2(agent1, cat1, agent_ini, cat1_ini, policy_act)
    #     steplist.append(stepcount)
    #     trajlist.append(trajlist)
    #     # costlist.append(cost)
    #     penaltylist.append(penaltycount)
    for i in range(1000):
        stepcount, traj, sensorreward, penaltycount = simulate_3(
            agent1, cat1, cat2, state, policy, policy_act)
        steplist.append(stepcount)
        trajlist.append(trajlist)
        sensorrewardlist.append(sensorreward)
        penaltylist.append(penaltycount)
    filename = ".\Revision1\steplist2.pkl"
    file = open(filename, "wb")
    pickle.dump(steplist, file)
    file.close()
    filename = ".\Revision1\\trajlist2.pkl"
    file = open(filename, "wb")
    pickle.dump(trajlist, file)
    file.close()
    print(sum(steplist) / 1000)
    print(sum(sensorrewardlist) / 1000)
    print(sum(penaltylist) / 1000)
Пример #26
0
def train_all_types():
    game_type = [["Diamond", 4, 9], ["Diamond", 4, 6], ["Triangle", 5, 4],
                 ["Triangle", 5, 7], ["Triangle", 5, 8]]
    critic_type = ["NN", "Tabular"]
    for g in range(0, len(game_type)):
        for c in range(0, len(critic_type)):
            env = Environment.PegSolitaire(game_type[g][0], game_type[g][1], 1,
                                           [game_type[g][2]])
            obs_space = env.get_obs_space()
            agent = Agent.Agent(critic_type=critic_type[c],
                                NN_structure=[25, 25, 25],
                                obs_space=obs_space,
                                action_space=6,
                                critic_lerning_rate=CRITIC_LEARNING_RATE,
                                actor_learning_rate=ACTOR_LEARNING_RATE,
                                critic_e_decay_rate=CRITIC_E_DECAY_RATE,
                                actor_e_decay_rate=ACTOR_E_DECAY_RATE,
                                gamma=0.85)
            episode_num_pegs_left = []
            for e in range(0, NUM_EPISODES):
                if e % 100 == 0:
                    print("Episode number ", e, " finished.")
                env.reset(numHoles=len(PLACEMENT_HOLES),
                          placementHoles=PLACEMENT_HOLES)
                state = env.get_boardState()
                done = env.is_game_done()
                num_pegs_left = 0
                for s in range(0, NUM_STEPS):
                    if np.random.rand() < epsilon_greedy(e):
                        actions = env.get_legal_actions(obs_space)
                        #actions = env.get_action_space()
                        #action_idx = np.random.randint(0,len(actions))
                        idx = np.random.randint(0, len(actions))
                        #peg = np.random.randint(0,obs_space)
                        #action = actions[idx]
                        action = actions[idx]
                    else:
                        action = agent.get_action(state, 0.0)

                    move_done, reward, state_next, ended = env.move_peg(
                        action[0], action[1])
                    if not move_done:
                        agent.update_illegal_move(state, action)

                    else:
                        #returned += reward
                        agent.update_trajectories(state, action)
                        agent.update_agent(state, state_next, reward)
                    if ended:
                        episode_num_pegs_left.append(int(env.num_pegs_left()))
                        break
                    state = state_next
            name = game_type[g][0] + "_boardsize_" + str(
                game_type[g][1]) + "_" + str(
                    game_type[g][2]) + "_" + critic_type[c]
            np.save(str(name + "_learning_plot"), episode_num_pegs_left)
            agent.save_policy(
                str(game_type[g][0] + "_" + str(game_type[g][2])),
                str("_board_size") + str(game_type[g][1]), critic_type[c])
Пример #27
0
 def make_bld(self):
     Options.commands['configure'] = False
     env = Environment.Environment()
     bld = Build.bld = Build.BuildContext()
     bld.set_env('default', env)
     blddir = os.path.join(self._test_dir_root, 'b')
     bld.load_dirs(self._test_dir_root, blddir)
     return bld
Пример #28
0
def check_configured(bld):
	if not Configure.autoconfig:
		return bld
	conf_cls=getattr(Utils.g_module,'configure_context',Utils.Context)
	bld_cls=getattr(Utils.g_module,'build_context',Utils.Context)
	def reconf(proj):
		back=(Options.commands,Options.options.__dict__,Logs.zones,Logs.verbose)
		Options.commands=proj['commands']
		Options.options.__dict__=proj['options']
		conf=conf_cls()
		conf.environ=proj['environ']
		configure(conf)
		(Options.commands,Options.options.__dict__,Logs.zones,Logs.verbose)=back
	try:
		proj=Environment.Environment(Options.lockfile)
	except IOError:
		conf=conf_cls()
		configure(conf)
	else:
		try:
			bld=bld_cls()
			bld.load_dirs(proj[SRCDIR],proj[BLDDIR])
			bld.load_envs()
		except Utils.WafError:
			reconf(proj)
			return bld_cls()
	try:
		proj=Environment.Environment(Options.lockfile)
	except IOError:
		raise Utils.WafError('Auto-config: project does not configure (bug)')
	h=0
	try:
		for file in proj['files']:
			if file.endswith('configure'):
				h=hash((h,Utils.readf(file)))
			else:
				mod=Utils.load_module(file)
				h=hash((h,mod.waf_hash_val))
	except(OSError,IOError):
		warn('Reconfiguring the project: a file is unavailable')
		reconf(proj)
	else:
		if(h!=proj['hash']):
			warn('Reconfiguring the project: the configuration has changed')
			reconf(proj)
	return bld_cls()
Пример #29
0
    def setUp(self):
        tfi = TextFileInterface.TextFileInterface(relative_directory="../UnitTests/TestDB/")
        tfi.clear_database()

        tfi.create_account("Instructor", "InstructorPassword", "instructor")

        environment = Environment.Environment(tfi, DEBUG=True)
        self.ui = UI.UI(environment)
Пример #30
0
def main(args=None):
    env = Environment(input_size=XorTest.input_size,
                      output_size=XorTest.output_size,
                      init_population=args.pop,
                      max_generation=args.gen,
                      comp_threshold=args.thr,
                      avg_comp_num=args.cmp,
                      mating_prob=args.mat,
                      copy_mutate_pro=args.cpy,
                      self_mutate_pro=args.slf,
                      excess=args.exc,
                      disjoint=args.dsj,
                      weight=args.wgh,
                      survive=args.srv,
                      task=XorTest)

    # env.test()
    env.run(task=XorTest, showResult=True)
Пример #31
0
class Gameplay(GameStage):
    def __init__(self, screen):
        super(Gameplay, self).__init__(screen)
        loader = TxtLevelLoader()
        loader.load("data/level0.txt")
        self._environment = Environment()
        self._environment.load(loader)
        self._player = self._environment.get_player(0)

    def on_event(self, event):
        return False

    def on_update(self, delta, current):
        # self._environment.update(delta, current)
        pass

    def on_redraw(self, surface, delta, current):
        self._environment.redraw(surface, vec2(0, 0), current, True)
Пример #32
0
 def test_cannot_fall_off_right_side(self):
     model = Environment.GridWorldModel()
     model.x = Environment.MAX_X
     model.y = Environment.START_Y
     reward, state = model.take_action(actions['right'])
     assert model.x == Environment.MAX_X
     assert model.y == Environment.START_Y
     assert reward == 0
     assert state == Environment.GridWorldModel.state_from_xy(model.x, model.y)
Пример #33
0
 def test_cannot_fall_off_top(self):
     model = Environment.GridWorldModel()
     model.x = Environment.START_X
     model.y = 0
     reward, state = model.take_action(actions['up'])
     assert model.x == Environment.START_X
     assert model.y == 0
     assert reward == 0
     assert state == Environment.GridWorldModel.state_from_xy(model.x, model.y)
Пример #34
0
    def testCompare(self):
        '''Checks comparing configurations'''

        newConfiguration = Environment.Configuration()
        self.assertEqual(newConfiguration, self.configuration)

        #Compare to some other object

        self.assertNotEqual(None, self.configuration)
Пример #35
0
class Gameplay(GameStage):
    def __init__(self, screen):
        super(Gameplay, self).__init__(screen)
        loader = TxtLevelLoader()
        loader.load("data/level0.txt")
        self._environment = Environment()
        self._environment.load(loader)
        self._player = self._environment.get_player(0)

    def on_event(self, event):
        return False

    def on_update(self, delta, current):
        # self._environment.update(delta, current)
        pass

    def on_redraw(self, surface, delta, current):
        self._environment.redraw(surface, vec2(0, 0), current, True)
Пример #36
0
def write_addrs_Json(data):
    ## Save our changes to JSON file
    PATH = ENV.setAddressFile()
    ##Create new fileif not exist otherwise update existed one
    action = "w+" if os.path.isfile(PATH) and os.access(PATH, os.R_OK) else "w"

    jsonFile = open(PATH, action)
    jsonFile.write(json.dumps(data))
    jsonFile.close()
Пример #37
0
 def __init__(self):
     self.env = Env.Game()
     # Game environment
     self.actions = [0, 1, 2, 3]
     # 0-3 is N, W, S, E
     self.state = self.state_locator()
     self.obs = self.obs_locator()
     # self.qmatrix = np.zeros((8, 255, 4), dtype=int)
     self.qmatrix = np.zeros((8, 15, 4), dtype=int)
Пример #38
0
def eval(args, called_from):
	if not isinstance(args, Atom.Atom) and not isinstance(args, Cons.Cons):
		return args

	if isinstance(args, Cons.Cons):
		args = args.first
	atom = Atom.evaluate(args, called_from)
	if isinstance(atom, Atom.Atom):
		if atom.type == Atom.Atom.SYMBOL:
			return eval(Environment.get_symbol_value(atom.data), called_from)
		atom = Atom.evaluate(atom, called_from)

	return atom
Пример #39
0
 def startLoop(self):
     itemcount = 0
     count = random.uniform(1,50)
     random.seed()
     
     while(1):
         visual.rate(const.framerate) # Frame rate
         
         # check for events, drive actions; must be executed repeatedly in a loop
         self.cCtrl.ctrls.interact()
         
         # do multiple simulation steps per frame to prevent jittering due to
         # 'small' collisions
         n = 6
         
         if itemcount < count:
             itemcount += 1
             self.bodies.append(e.drop_object(self.cWorld.world, self.cRobot.center))
                 
         itemcount+=1
         
         if itemcount == 500:
             itemcount = 0
             for b in self.bodies:
                 for body in b.GetElementKeys():
                     b.RemoveElement(body)
             self.bodies = []
     
         for i in range(n):           
             # Simulation step
             self.cWorld.world.step(self.dt/n)
             
             # terrain for future implementation
             #self.Terrain.UpdateDisplay()
             
             if self.cRobot.bodyExists():
                 self.cRobot.refreshRobot(self.cCtrl.lBody)
             
                 if (self.cRobot.centerRobot):
                     self.cWorld.world._getScene().center = self.cRobot.center
                 
                 for leg in self.cRobot.tibia:
                     leg.UpdateDisplay()
                     
                 for leg in self.cRobot.femur:
                     leg.UpdateDisplay()
                     
                 for b in self.bodies:
                     b.UpdateDisplay()
Пример #40
0
    def simulate_one_episode(self,init_state):
        state_seq = []
        action_seq = []
        reward = 0

        state = init_state
        while True:
            self.Ns[state] += 1
            eps_t = self.N0/(self.N0 + self.Ns[state])
            action  = super(MonteCarloLearner,self).eps_greedy(state,eps_t)
            state_seq.append(state)
            action_seq.append(action)
            state,reward = Environment.step(state,action)

            if state == TERMINAL:
                return state_seq,action_seq,reward
Пример #41
0
    def removeObject(self, object):
        """Removes |object| from the universe. If |object| is a Collection,
        each of its elements is removed. The object to be removed must
        be in the universe."""
	if Collection.isCollection(object) or Utility.isSequenceObject(object):
	    for o in object:
		self.removeObject(o)
	elif ChemicalObjects.isChemicalObject(object):
	    if object.parent != self:
		raise ValueError, `object` + ' is not in this universe.'
	    object.parent = None
	    self._objects.removeObject(object)
	    self._changed(1)
        elif Environment.isEnvironmentObject(object):
            self._environment.remove(object)
	    self._changed(0)
        else:
            raise ValueError, `object` + ' is not in this universe.'
Пример #42
0
 def __init__(self):
     '''Uses VPython to visualize, manipulate and simulate the Quadruped live.'''
     self.bodies = []
     self.cWorld = myWorld()
     self.createUI(self.cWorld.world._getScene())
     
     # terrain for future implementation
     #self.myWorld = e.Heightmap(self.cWorld.world)
     #self.Terrain = self.myWorld.makeWorld()
     
     self.cRobot = Robot(self.cWorld.world, vpyode._bigSpace, 50)
     self.cCtrl = ControlWindow(self.cWorld.world._getScene(), self.cRobot, self.cWorld)
     
     self.cRobot.dropRobot()
     
     self.dt = 1.0/const.framerate
     self.refresh = 0
     
     self.bodies.append(e.drop_object(self.cWorld.world, self.cRobot.center))
Пример #43
0
def symbol_let(fun, args):
	Environment.enter_frame()
	params = Atom.cons_to_list(args[0].data)
	result = args[1]

	for pair in params:
		pair = pair.data
		symbol_name = pair.first.data
		symbol_value = Atom.evaluate(pair.second.data.first, fun)
		Environment.set_dynamic_symbol(symbol_name, symbol_value)

	result = Atom.evaluate(result, fun)
	Environment.leave_frame()
	return result
Пример #44
0
def symbol_dolist(fun, args):
	pair = Atom.cons_to_list(args[0].data)
	symbol_name = pair[0].data
	list = Atom.cons_to_list(Atom.evaluate(pair[1]).data)

	Environment.enter_frame()

	body = args[1:]
	result = None
	for x in list:
		Environment.set_dynamic_symbol(symbol_name, x)
		for s in body:
			result = Atom.evaluate(s)

	Environment.leave_frame()
	return result
Пример #45
0
    def __init__(self, screen):
        super(Editor, self).__init__(screen)
        
        self._editor_gui = EditorGUI(vec2(screen.get_size()))
        if self._editor_gui.get_edit_mode() != EDIT_MODE_STATIC:
            self._editor_gui.set_left_dynamic_icons(DYNAMIC_OBJECTS)

        self._viewport = vec2(screen.get_size())
        self._environment = Environment()
        self._position = vec2(10, 10)
        self._rmb_drag = False
        self._lmb_drag = False
        self._exit = False

        self._brush_visible = False
        self._brush_screen_pos = vec2(0, 0)
        self._brush_world_pos = vec2(0, 0)
        self._brush_size = 3
        self._brush_tint = create_color_mask(TILE_SIZE, (0, 0, 255, 127))

        self._terrain_grid_sprites = []
        self._static_object_sprites = []
Пример #46
0
def evaluate_function(fun_name, args, params, results):
	Environment.enter_frame()

	# initialize parameters
	for i, arg in enumerate(args):
		param_name = params[i].data
		param_value = Atom.evaluate(arg, fun_name)  # eval(arg, fun_name)
		Environment.set_dynamic_symbol(param_name,
										param_value if isinstance(param_value, Atom.Atom) else Atom.make_atom(param_value))

	result = None
	for x in results:
		# evaluate result of function given current stack frame
		result = eval(x, fun_name)

	Environment.leave_frame()
	return result
Пример #47
0
 def _load(self, filepath):
     dataPoints = StegImageReader.load(filepath)
     
     curFloor = -1
     
     for i in dataPoints:
         if i[0] == "ADDRESS":
             self._address = i[1]
         elif i[0] == "WEATHERNAME":
             self.setEnvironment(Environment._getEnvironment(i[1]))
         elif i[0] == "GAMEMODE":
             self.setGameMode(GameMode._getGameMode(i[1]))
         elif i[0] == "TOTALMONEY":
             pass
         elif i[0] == "LEGALCHARACTERS":
             for j in i[1]:
                 self.addCharacter(Characters._getCharacterByDataName(j))
         elif i[0] == "GOALTYPE":
             self.setGoalType(GoalType._getGoalType(i[1]))
         elif i[0] == "GOALICON":
             pass
         elif i[0] == "GOALTEXT":
             self.setGoalText(i[1])
         elif i[0] == "INFINITEMONEY":
             self._infiniteMoney = True
         elif i[0] == "DIALOGUE":
             pass
         elif i[0] == "MAPSTART":
             curFloor += 1
         elif 0 <= curFloor < 8:
             if i[0] == "MAPNAME":
                 self._floors[curFloor].setName(i[1])
             elif i[0] == "ROOMNAMES":
                 for j in range(16):
                     self._floors[curFloor].setRoomName(j, i[1][j])
             elif i[0] == "MAPDATA":
                 self._floors[curFloor]._dumpData(i[1])
def brute_force(max_steps):
    best_action_num = 0
    best_clean_cells = 0

    actions = [1, 4, 4, 1, 2, 2, 2, 2]

    agent = FakeMemorylessAgent(actions)

    n = 4
    m = 4
    p = 1.0

    ## Set up world environment
    environment = Environment(n, m, p)

    ## Main loop
    MAX_ACTIONS = max_steps # prevent from running forever
    num_actions = 0
    num_clean_cells = [0] * max_steps
    running = True
    while (running and num_actions < MAX_ACTIONS):
        # print current world
        print "Action " + str(num_actions)
        environment.printCurrentWorld()

        # set up percept
        percept = environment.getPercept()

        # agent performs a step
        action = agent.takeStep(percept)

        # update environment and counters
        running = environment.updateWorld(action)

        # print num actions & num clean cells
        num_clean_cells[num_actions] = environment.getNumCleanCells()
        num_actions += 1
        print str(num_actions) + ", " + str(num_clean_cells)

    return num_clean_cells
Пример #49
0
'''
0) environment create
1) player add-delete
2) forage add-delete
3) playerWindow
4) player move / 4 direction & eat player, ghost or forage / harekete engel seylerin kontrolu / breaking the wall xD
5) get scoreboard
6) save & load

'''

#######################################
# environment create
#######################################

env = Environment()
env.mapGenerator()
env.getAllMap()

#######################################
# player add-delete
#######################################

#add player test p1 can not be added but p2 can be added
c1=Coordinate(1,1)
c2=Coordinate(0,0)
p1=PlayerFactory().new("p1","Pacman",0,1,c1) 
env.addPlayer(p1) 
env.getAllMap()
print
p2=PlayerFactory().new("p2","Ghost",0,-1,c2)
Пример #50
0
import sys
from Observation import *
from Reward import *
from Action import *
from Agent import *
from Environment import *
from UnawareEnvironment import *
from HelplessEnvironment import *
from MatrixEnvironment import *
import numpy

# Set up environment
gridEnvironment = Environment()
gridEnvironment.randomStart = False
gridEnvironment.humanWander = False

# Set up agent
gridAgent = Agent(gridEnvironment)

# Training episodes
episodes = 10000

# This is where learning happens
for i in range(episodes):
	gridAgent.qLearn(gridAgent.initialObs)
	
	if i%1000 == 0:
		print i

# Use this to prompt user for the initial state (agent x,y and human x,y)
'''
Пример #51
0
            action_seq.append(action)
            state,reward = Environment.step(state,action)

            if state == TERMINAL:
                return state_seq,action_seq,reward

    def learn(self,init_state):
        state_seq,action_seq,reward = self.simulate_one_episode(init_state)
        for state,action in zip(state_seq,action_seq):
            idx = state+(action,)
            self.Nsa[idx] += 1
            alpha_t = 1.0/self.Nsa[idx]
            self.qvalue_table[idx] += alpha_t * (reward - self.qvalue_table[idx])
        return reward

    def show_value(self):
        print("\nN0 is {}".format(self.N0))
        super(MonteCarloLearner,self).show_value()

if __name__ == "__main__":
    MC_learner = MonteCarloLearner(100)
    with open("MC_qtable/episode{}".format(MAX_ITER),'w') as q_file:
        for episode in range(MAX_ITER):
            sys.stdout.write('\rEpisode {}'.format((episode+1)))
            init_state =  Environment.init()
            MC_learner.learn(init_state)
        MC_learner.show_value()
        pickle.dump(MC_learner.get_q(),q_file)
    with open("MC_qtable/episode{}".format(MAX_ITER)) as q_file:
        test = pickle.load(q_file)
Пример #52
0
episodes = 500

# how often to report training results
trainingReportRate = 100

# play the interactive game?
# 0: human does not play
# 1: human plays as the bot
# 2: human plays as the enemy
play = 2

#Max reward received in any iteration
maxr = None

# Set up environment for initial training
gridEnvironment = Environment()
gridEnvironment.randomStart = False
gridEnvironment.enemyMode = 2
gridEnvironment.verbose = 0

# Set up agent
gridAgent = Agent(gridEnvironment)
gridAgent.verbose = False

# This is where learning happens
for i in range(episodes):
	# Train
	gridAgent.agent_reset()
	gridAgent.qLearn(gridAgent.initialObs)
	# Test
	gridAgent.agent_reset()
Пример #53
0
if n <= 0 or m <= 0:
	printUsage()
	sys.exit(3)

if p < 0.0 or p > 1.0:
	printUsage()
	sys.exit(4)

all_num_clean_cells = []
number_actions_to_take = []
for i in range(50):
    print "\n\n\n"
    print "******************************%d***************************************" % i
    ## Set up world environment
    environment = Environment(n, m, p)

    ## Main loop
    MAX_ACTIONS = 8000 # prevent from running forever
    num_actions = 0
    num_clean_cells = []
    running = True
    while (running and num_actions < MAX_ACTIONS):
        # print current world
        #print "Action " + str(num_actions)
        #environment.printCurrentWorld()

        # set up percept
        percept = environment.getPercept()

        # agent performs a step
Пример #54
0
import random
import sys
import copy
import operator
from Observation import *
from Reward import *
from Action import *
from Environment import *
from Agent import *
from random import Random

# Make an agent
gridEnvironment = Environment()
gridAgent = Agent(gridEnvironment)

# How many states to make?
numStates = 10

states = []

# Make some states
for i in range(numStates):
	# Make a state
	state = [random.randint(1,gridEnvironment.width-1), random.randint(1,gridEnvironment.height-1), True, random.randint(1,gridEnvironment.width-1), random.randint(1,gridEnvironment.height-1), False, False, False]
	states.append(state)
	# Create an entry in v_table for state
	entry = []
	for j in range(gridAgent.numActions):
		entry.append((random.random()-0.5)*100.0)
	gridAgent.v_table[gridAgent.calculateFlatState(state)] = entry
print "v table:"
Пример #55
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here. 

    experimentDirectory = parameters.rom + "_" + time.strftime("%d-%m-%Y-%H-%M") +"/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 1
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
        cPickle.dump(parameters,parametersFile)
        parametersFile.close()


    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
            cPickle.dump(parameters,parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(highestNNFile[highestNNFile.index("_") + 1 : highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber =  int(networkFile[networkFile.index("_") + 1 : networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 1:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile


    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed, parameters.frameSkip, parameters.repeatActionProbability, parameters.displayScreen)
    ale.loadROM(parameters.fullRomPath)
    minimalActions = ale.getMinimalActionSet()


    agent = DQNAgent.DQNAgent(minimalActions, parameters.croppedHeight, parameters.croppedWidth, 
                parameters.batchSize, 
                parameters.phiLength,
                parameters.nnFile, 
                parameters.loadWeightsFlipped, 
                parameters.updateFrequency, 
                parameters.replayMemorySize, 
                parameters.replayStartSize,
                parameters.networkType, 
                parameters.updateRule, 
                parameters.batchAccumulator, 
                parameters.networkUpdateDelay,
                parameters.discountRate, 
                parameters.learningRate, 
                parameters.rmsRho, 
                parameters.rmsEpsilon, 
                parameters.momentum,
                parameters.epsilonStart, 
                parameters.epsilonEnd, 
                parameters.epsilonDecaySteps,
                parameters.evalEpsilon,
                parameters.useSARSAUpdate,
                parameters.kReturnLength)



    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork, networkFileName)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgReward = runEvaluationEpoch(ale, agent, epoch, parameters.stepsPerTest)
            holdoutQVals = agent.computeHoldoutQValues(3200)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(str(epoch) + ",\t" + str(round(avgReward, 4)) + ",\t\t" + str(round(holdoutQVals, 4)) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()
Пример #56
0
	sys.exit(2)

n = int(sys.argv[2]) if len(sys.argv) > 2 else 10 # columns
m = int(sys.argv[3]) if len(sys.argv) > 3 else 10 # rows
p = float(sys.argv[4]) if len(sys.argv) > 4 else 1.0 # probability of dirt

if n <= 0 or m <= 0:
	printUsage()
	sys.exit(3)

if p < 0.0 or p > 1.0:
	printUsage()
	sys.exit(4)

## Set up world environment
environment = Environment(n, m, p)

## Main loop
MAX_ACTIONS = 1000000 # prevent from running forever
num_actions = 0
num_clean_cells = []
running = True
while (running and num_actions < MAX_ACTIONS):
    # print current world
    environment.printCurrentWorld()

    # set up percept
    percept = environment.getPercept()

    # agent performs a step
    action = agent.takeStep(percept)
Пример #57
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here. 

    experimentDirectory = parameters.rom + "_" + time.strftime("%d-%m-%Y-%H-%M") +"/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 0
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
        cPickle.dump(parameters,parametersFile)
        parametersFile.close()


    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
            cPickle.dump(parameters,parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(highestNNFile[highestNNFile.index("_") + 1 : highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber =  int(networkFile[networkFile.index("_") + 1 : networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 4:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile

    
    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed, parameters.frameSkip, parameters.repeatActionProbability, parameters.displayScreen)

    # ale.loadROM(parameters.fullRomPath)

    # minimalActions = ale.getMinimalActionSet()

    # difficulties = ale.getAvailableDifficulties()
    # modes = ale.getAvailableModes()

    # maxNumFlavors = len(difficulties) * len(modes)

    # difficulties = createFlavorList(parameters.difficultyString, len(difficulties))
    # modes = createFlavorList(parameters.modeString, len(modes))

    # transferTaskModule = TransferTaskModule.TransferTaskModule(difficulties, modes)


    transferTaskModule = TransferTaskModule.TransferTaskModule(ale, parameters.roms, parameters.difficultyString, parameters.modeString, parameters.taskBatchFlag)
    numActionsToUse = transferTaskModule.getNumTotalActions()
    print "Number of total tasks:" + str(transferTaskModule.getNumTasks()) + " across " + str(transferTaskModule.getNumGames()) + " games."
    print "Actions List:" + str(transferTaskModule.getTotalActionsList())
    # print "Num difficulties: " + str(len(difficulties)) + " num modes: " + str(len(modes)) + " numtasks: " + str(transferTaskModule.getNumTasks())
    # print "Modes: " + str(modes)
    # print "Difficulties: " + str(difficulties)

    numTransferTasks = transferTaskModule.getNumTasks()

    if (parameters.reduceEpochLengthByNumFlavors):
        parameters.stepsPerEpoch = int(parameters.stepsPerEpoch / numTransferTasks)

    agent = DQTNAgent.DQTNAgent(transferTaskModule.getTotalActionsList(), parameters.croppedHeight, parameters.croppedWidth, 
                parameters.batchSize, 
                parameters.phiLength,
                parameters.nnFile, 
                parameters.loadWeightsFlipped, 
                parameters.updateFrequency, 
                parameters.replayMemorySize, 
                parameters.replayStartSize,
                parameters.networkType, 
                parameters.updateRule, 
                parameters.batchAccumulator, 
                parameters.networkUpdateDelay,
                transferTaskModule,
                parameters.transferExperimentType,
                numTransferTasks,
                parameters.discountRate, 
                parameters.learningRate, 
                parameters.rmsRho, 
                parameters.rmsEpsilon, 
                parameters.momentum,
                parameters.epsilonStart, 
                parameters.epsilonEnd, 
                parameters.epsilonDecaySteps,
                parameters.evalEpsilon,
                parameters.useSARSAUpdate,
                parameters.kReturnLength,
                parameters.deathEndsEpisode)



    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch, transferTaskModule, parameters.frameSkip, parameters.maxNoActions)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork, networkFileName)

        print "Total number of samples seen per task: "
        print str(agent.trainingMemory.totalTaskSampleCount)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgRewardPerTask = runEvaluationEpoch(ale, agent, epoch, parameters.stepsPerTest, transferTaskModule, parameters.frameSkip, parameters.maxNoActions)
            holdoutQVals = agent.computeHoldoutQValues(parameters.numHoldoutQValues)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(str(epoch) + ",\t")
            resultsString = ""

            for avgReward in avgRewardPerTask:
                resultsString += str(round(avgReward, 4)) + ",\t"

            resultsFile.write(resultsString)
            resultsFile.write("\t" + str([round(x, 4) for x in holdoutQVals]) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()
Пример #58
0
import pickle

random.seed(2342040)
filePrefix = "cBigWorld"

print "\n----- Running World Simulation -----\n"


        
#World = Environment()
if (raw_input("(l)oad world or (c)reate new world?: ")=='l'):
    with open("save/"+raw_input("Enter World name:")) as f:
        World = pickle.load(f)
        print "World Loaded..."
else:
    World = Environment()
    for i in range(0,100):
        World.addCreature( randCreature() )
    print "Creatures Created..."
    filename = filePrefix+"0"
    print "\nSaving New World to file " +filename+"..."
    with open('save/'+filename,'w') as f:
        pickle.dump(World,f)
        print "World saved to file..."



World.printStatus()
n = 1000
print "\n----- World Simulation Starting for "+`n`+" steps -----\n"
Пример #59
0
    def __setattr__(self, attr, value):
	self.__dict__[attr] = value
	if attr[0] != '_' and (ChemicalObjects.isChemicalObject(value)
                               or Environment.isEnvironmentObject(value)):
	    self.addObject(value)
Пример #60
0
	def __init__(self, maxfiles=[], cc='gcc'):
		self.MAXELEROSDIR = Environment.require("MAXELEROSDIR")
		self.MAXCOMPILERDIR = Environment.require("MAXCOMPILERDIR")
		self.MAXNETDIR = Environment.optional("MAXCOMPILERNETDIR")
		self.cc = cc
		self.maxfiles = maxfiles