Пример #1
0
def launch_environment(parameters):
    """Start the sumo-rlglue environment

    (This function is executed in a separate process using
    multiprocessing.)
    """
    import rl_glue_sumo_environment
    environment = rl_glue_sumo_environment.SumoEnvironment()
    EnvironmentLoader.loadEnvironment(environment)
def main():
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(
            simulationParameterObj = None,
            actionParameterObj = None,
            Bad_Action_Penalty = -10000,
            fixedStartState = False,
            nbrReaches = REACHES,
            habitatSize = HABITATS,
            seed = 1))
Пример #3
0
        description=
        'Run a POMDP problem file as a domain in RL-Glue in network mode.')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "--file",
        type=str,
        default=config_files[0],
        help="Run POMDP domain given the path to a POMDP problem file.")
    group.add_argument("--list",
                       action='store_true',
                       default=False,
                       help="List path to included POMDP problem files.")
    args = parser.parse_args()

    if args.list:
        print "Included POMDP problem files:"
        for file in config_files:
            print file
    else:
        EnvironmentLoader.loadEnvironment(
            POMDPEnvironment(spec_filename=args.file))

    parser = argparse.ArgumentParser(
        description='Run a specified POMDP in RL-Glue in network mode.')
    parser.add_argument("--pomdp_file",
                        type=str,
                        help="Filename for POMDP spec file to load and use.",
                        required=True)
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
Пример #4
0
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";


def addGridworldArgs(parser):
    parser.add_argument("--size_x", type=float, default=10, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.")
    parser.add_argument("--size_y", type=float, default=10, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.")
    parser.add_argument("--goal_x", type=float, default=10, help="Goal x coordinate")
    parser.add_argument("--goal_y", type=float, default=10, help="Goal y coordinate")
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
    parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Randomly assign x,y initial locations.")

if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
    addGridworldArgs(parser)
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(Gridworld(size_x=args.size_x, size_y=args.size_y, goal_x=args.goal_x, goal_y=args.goal_y, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))
                        self.o.doubleArray=list(range(0,50000))
                        terminal=0
                        if self.stepCount==200:
                                terminal=1
                        ro=Reward_observation_terminal()
                        ro.r=1.0
                        ro.o=self.o
                        ro.terminal=terminal
                        return ro

                self.o.intArray=list(range(0,5))
                #cheating, might break something
                self.o.doubleArray=list(range(0,5))
                terminal=0
                if self.stepCount==5000:
                        terminal=1
                ro=Reward_observation_terminal()
                ro.r=1.0
                ro.o=self.o
                ro.terminal=terminal
                return ro
                
        def env_cleanup(self):
                pass

        def env_message(self,inMessage):
                return None;

if __name__=="__main__":
        EnvironmentLoader.loadEnvironment(test_speed_environment())
		
	def printState(self):
		numRows=len(self.map)
		numCols=len(self.map[0])
		print "Agent is at: "+str(self.agentRow)+","+str(self.agentCol)
		print "Columns:0-10                10-17"
		print "Col    ",
		for col in range(0,numCols):
			print col%10,
			
		for row in range(0,numRows):
			print
			print "Row: "+str(row)+" ",
			for col in range(0,numCols):
				if self.agentRow==row and self.agentCol==col:
					print "A",
				else:
					if self.map[row][col] == self.WORLD_GOAL:
						print "G",
					if self.map[row][col] == self.WORLD_MINE:
						print "M",
					if self.map[row][col] == self.WORLD_OBSTACLE:
						print "*",
					if self.map[row][col] == self.WORLD_FREE:
						print " ",
		print
		

if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(mines_environment())
Пример #7
0
        result = re.match('set (.+) (.+)', msg)
        if msg.startswith('set'):
            param, value = msg.split(None, 2)[1:]
            self.debug('set', param, value)
            
            self.env_message_set_param(param, value)
        
        elif msg.startswith('get'):
            param = msg.split(None, 1)[1]
            
            return self.env_message_get_param(param)
        
        else:
            return self.env_message_handler(msg)
    
    def debug(self, *args):
        """ Print a debug msg """
        if self.debug:
            args = [str(a) for a in args]
            print "%s: %s" % (self.name, ' '.join(args))
    
    def step_out(self, *args):
        if self.output_steps:
            args = [str(a) for a in args]
            print ' '.join(args)
    
if __name__ == '__main__':
    #p = PuddleEnvironment()
    #p.env_start()
    EnvironmentLoader.loadEnvironment(PuddleEnvironment())
Пример #8
0
                    return (-1 - i)

        if self.presentCol > 6999 and self.presentCol < 8000:
            if self.presentRow > 5999 and self.presentRow < 9000:
                return -1

        if self.presentCol > 5999 and self.presentCol < 7000:
            if self.presentRow > 4999 and self.presentRow < 8000:
                return -2

        #Reward
        if self.presentCol > 10999:
            if self.presentRow < 1000:
                return 10

        return 0

    #Checking if the current position is the goal state
    def goalcheck(self):

        # Only A goal state
        if self.presentCol > 10999:
            if self.presentRow < 1000:
                return True

        return False


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(puddle_world())
Пример #9
0
        r = 0.0
        if np.any(self.state_ranges[:,0] > self.state[:]) or \
           np.any(self.state_ranges[:,1] < self.state[:]):
            #            r = -1
            r = -np.sum(3.0 * self.state_ranges[:, 1]**2)
            r *= 6000 - self.num_sim_steps
            terminate = True
        else:
            #            perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2])
            #            nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2])
            #            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
            #                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \
            #                math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0]))))
            #            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
            #                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))
            #            r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \
            #                  np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2))
            r -= (self.state[0] - self.state_goal[0])**2
            r -= (self.state[1] - self.state_goal[1])**2
            r -= self.state[3]**2
            r -= self.state[4]**2

            terminate = False

        print("reward " + str(r))
        return r, terminate


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(vrep_environment())
Пример #10
0
			theReward=-1
			episodeOver=1

		if self.currentState >= 20:
			self.currentState=20
			theReward=1
			episodeOver=1
		
		theObs=Observation()
		theObs.intArray=[self.currentState]
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		if inMessage=="what is your name?":
			return "my name is dqn_environment, Python edition!";
		else:
			return "I don't know how to respond to your message";


if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(dqn_environment())
Пример #11
0
        default=0,
        help="Standard deviation of additive noise to generate")
    parser.add_argument(
        "--fudge",
        type=float,
        default=1.4143,
        help="Distance from goal allowed before episode is counted as finished"
    )


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Run 2D Noisy Continuous Gridworld environment in network mode.')
    addTaxiArgs(parser)
    args = parser.parse_args()
    fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc
    walls = numpy.array(args.wall) if args.wall is not None else None
    landmarks = numpy.array(
        args.landmark) if args.landmark is not None else None
    EnvironmentLoader.loadEnvironment(
        Taxi(args.size_x,
             args.size_y,
             walls=walls,
             landmarks=landmarks,
             fuel_loc=fuelloc,
             fickleness=args.fickleness,
             noise=args.noise,
             fudge=args.fudge))
Пример #12
0
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";

@register_environment
class MountainCar(MountainCarND):
    name = "Mountain Car"

    def __init__(self, **kwargs):
        kwargs['dimension'] = 2
        super(MountainCar, self).__init__(**kwargs)


if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run Noisy Mountain Car environment in network mode.')
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the cart with a random location and velocity.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(MountainCar(noise=args.noise, random_start=args.random_restarts))

Пример #13
0
"""
MENACE agent environment wrapper
"""

import sys

# Ugly!
sys.path.append('../')

from agents.symmetry_agent import SymmetryAgent
from wrapper_environment import WrapperEnvironment
from rlglue.environment import EnvironmentLoader

class SymmetryEnvironment(WrapperEnvironment, SymmetryAgent):
    
    name = 'symmetry_agent'
    
    player = 2
    def env_play(self): 
        action = self.do_step(self.state)
        self.state = action.intArray
        

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(SymmetryEnvironment())
                                        self.map[i:i + self.n_cols])) + '\n'
        self.history.append(current_map)

        # 試合の様子を記録
        if rot.r == self.game.r_lose:
            f = open('history.txt', 'a')
            history = '\n'.join(self.history)
            f.writelines('# START\n' + history + '# END\n\n')
            f.close()

        # 決着がついた場合は agentのagent_end
        # 決着がついていない場合は agentのagent_step に続く
        return rot

    def env_cleanup(self):
        pass

    def env_message(self, message):
        pass


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Deep Q-Learning')
    parser.add_argument('--size',
                        '-s',
                        default=6,
                        type=int,
                        help='Reversi board size')
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(KmoriReversiEnvironment(args.size))
Пример #15
0
		episodeOver=0
		theReward=0

		theObs=Observation()
		theObs.intArray=np.zeros(50816)
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		if inMessage=="what is your name?":
			return "my name is dqn_environment, Python edition!";
		else:
			return "I don't know how to respond to your message";


if __name__ == '__main__':
	App = TetrisApp()
	#App.run()
	EnvironmentLoader.loadEnvironment(TetrisApp())
	print('main start')

Пример #16
0
                        type=float,
                        default=5.,
                        help="Payment received per unit product sold.")
    parser.add_argument("--cost",
                        type=float,
                        default=2.,
                        help="Cost per unit product purchased.")
    parser.add_argument("--discount_factor",
                        type=float,
                        default=0.999,
                        help="Discount factor to learn over.")
    parser.add_argument(
        "--time_period",
        type=int,
        default=20,
        help="Time period for problem. (Number of steps to run)")
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help="Standard deviation of additive noise to generate")
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        BatchReplenishment(demand_mean=args.demand_mean,
                           demand_std=args.demand_std,
                           payoff=args.payoff,
                           cost=args.cost,
                           gamma=args.discount_factor,
                           time_period=args.time_period,
                           noise=args.noise))
Пример #17
0
    def checkCurrentTerminal(self):
        return self.checkTerminal(self.agentRow, self.agentCol)

    def updatePosition(self, theAction):
        # When the move would result in hitting an obstacles, the agent simply doesn't move

        newRow = self.agentRow
        newCol = self.agentCol

        newRow += self.FIXED_DISTANCE * math.cos(theAction)
        newCol += self.FIXED_DISTANCE * math.sin(theAction)

        # Check if new position is out of bounds or inside an obstacle
        if self.checkValid(newRow, newCol):
            self.agentRow = newRow
            self.agentCol = newCol
            return False
        else:
            return True

    def calculateReward(self, hitBoundary):
        if hitBoundary:
            return -0.5
        if distance.euclidean([self.agentRow, self.agentCol], self.END_STATE) < 0.5 * self.FIXED_DISTANCE:
            return 10.0
        return 0.0


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(threeroom_environment())
Пример #18
0
        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";


if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run Noisy Acrobot environment in network mode.')
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the state with random values.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(Acrobot(noise=args.noise, random_start=args.random_restarts))

Пример #19
0
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";

def addTaxiArgs(parser):
	parser.add_argument("--size_x", type=float, default=5, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.")
	parser.add_argument("--size_y", type=float, default=5, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.")
	parser.add_argument("--landmark", action="append", nargs=2, help="Add a landmark, give x y coordinates", type=float)
	parser.add_argument("--wall", type=float, action="append", nargs=2, help="Add a wall, give x coordinate and size in y with sign indicating starting at the bottom (+) or top (-)")
	parser.add_argument("--fuel_loc", type=float, default=[2.0, 1.0], nargs=2, help="x y coordinate of the fuel station")
	parser.add_argument("--fickleness", type=float, default=0, help="Probability of the passenger changing their destination mid-route.")
	parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
	parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished")


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
	addTaxiArgs(parser)
	args = parser.parse_args()
	fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc
	walls = numpy.array(args.wall) if args.wall is not None else None
	landmarks = numpy.array(args.landmark) if args.landmark is not None else None
	EnvironmentLoader.loadEnvironment(Taxi(args.size_x, args.size_y, walls=walls, landmarks=landmarks, fuel_loc=fuelloc, fickleness=args.fickleness, noise=args.noise, fudge=args.fudge))

Пример #20
0
                      action='store',
                      type='string',
                      default='./rlglue_param.json',
                      help="json file of simulation parameters")
    parser.add_option('-d',
                      '--dst',
                      dest='savepath',
                      action='store',
                      type='string',
                      default='./result/data',
                      help="Save data path.")
    (options, args) = parser.parse_args()
    #print 'options', options

    if not options.test:
        EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options))

    else:
        objEnv = ElecpyEnvironment(options)
        objEnv.env_init()
        for epi in range(3):
            print 'Episode {0}'.format(epi)
            objEnv.env_start()
            cnt_step = 0
            while True:
                cnt_step += 1
                action = Action(numInts=1)
                action.intArray = [0]
                rot = objEnv.env_step(action)
                if rot.terminal:
                    break
Пример #21
0
            base = self.var[0] if self.pos[1] <= 1.0 else self.var[1]
            a = self.var[2]
            return base - (int(self.pos[0]) % 5) * a
        elif intAction < 4:
            return -1.0
        elif intAction >= 4:
            return -1.4
        else:
            print "ERROR in FuelWorld.takeAction"


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.'
    )
    gridworld.addGridworldArgs(parser)
    parser.add_argument(
        "--fuel_noise",
        type=float,
        default=0.0,
        help=
        "If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure."
    )
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        FuelWorld(noise=args.noise,
                  fudge=args.fudge,
                  fuel_noise=args.fuel_noise))
Пример #22
0
            theReward=-1
            episodeOver=1

        if self.currentState >= 20:
            self.currentState=20
            theReward=1
            episodeOver=1

        theObs=Observation()
        theObs.intArray=[self.currentState]

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=theObs
        returnRO.terminal=episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        if inMessage=="what is your name?":
            return "my name is skeleton_environment, Python edition!";
        else:
            return "I don't know how to respond to your message";


if __name__=="__main__":
    EnvironmentLoader.loadEnvironment(skeleton_environment())
Пример #23
0
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return "I don't know how to respond to your message"


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description='Run Noisy Acrobot environment in network mode.')
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help=
        "Standard deviation of additive noise to generate, affects the action effects."
    )
    parser.add_argument("--random_restarts",
                        type=bool,
                        default=False,
                        help="Restart the state with random values.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        Acrobot(noise=args.noise, random_start=args.random_restarts))
Пример #24
0
        pygame.display.flip()

    pygame.quit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Pinball domain')
    parser.add_argument('configuration', help='the configuration file')
    parser.add_argument('--width',
                        action='store',
                        type=int,
                        default=500,
                        help='screen width (default: 500)')
    parser.add_argument('--height',
                        action='store',
                        type=int,
                        default=500,
                        help='screen height (default: 500)')
    parser.add_argument('-r',
                        '--rlglue',
                        action='store_true',
                        help='expose the environment through RL-Glue')
    args = parser.parse_args()

    if args.rlglue:
        print 'Starting rl-glue'
        EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
    else:
        run_pinballview(args.width, args.height, args.configuration)
Пример #25
0
            terminal = 0
            if self.stepCount == 200:
                terminal = 1
            ro = Reward_observation_terminal()
            ro.r = 1.0
            ro.o = self.o
            ro.terminal = terminal
            return ro

        self.o.intArray = range(0, 5)
        #cheating, might break something
        self.o.doubleArray = range(0, 5)
        terminal = 0
        if self.stepCount == 5000:
            terminal = 1
        ro = Reward_observation_terminal()
        ro.r = 1.0
        ro.o = self.o
        ro.terminal = terminal
        return ro

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return None


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_speed_environment())
Пример #26
0
		returnRO.terminal = int(self.counter >= self.T)

		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
	parser.add_argument("--demand_mean", type=float, default=10., help="Mean demand for the product.")
	parser.add_argument("--demand_std", type=float, default=1., 
			    help="Standard deviation of demand for the product.")
	parser.add_argument("--payoff", type=float, default=5., help="Payment received per unit product sold.")
	parser.add_argument("--cost", type=float, default=2., help="Cost per unit product purchased.")
	parser.add_argument("--discount_factor", type=float, default=0.999, help="Discount factor to learn over.")
	parser.add_argument("--time_period", type=int, default=20, help="Time period for problem. (Number of steps to run)")
	parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(BatchReplenishment(demand_mean=args.demand_mean, 
							     demand_std=args.demand_std, 
							     payoff=args.payoff, 
							     cost=args.cost, 
							     gamma=args.discount_factor,
							     time_period = args.time_period,
							     noise=args.noise))
Пример #27
0
        rot.o = observation

        current_map = 'map\n'
        for i in range(0, len(self.map), self.n_cols):
            current_map += ' '.join(map(str,
                                        self.map[i:i + self.n_cols])) + '\n'
            if (i % 16 == 0):
                current_map += "\n"

        self.history.append(current_map)

        if rot.r == -1:
            f = open('history.txt', 'a')
            history = '\n'.join(self.history)
            f.writelines('# START\n' + history + '# END\n\n')
            f.close()

        # 決着がついた場合は agentのagent_end
        # 決着がついていない場合は agentのagent_step に続く
        return rot

    def env_cleanup(self):
        pass

    def env_message(self, message):
        pass


if __name__ == '__main__':
    EnvironmentLoader.loadEnvironment(MarubatsuEnvironment())
Пример #28
0
"""
Random player environment
"""

import random
from rlglue.environment import EnvironmentLoader
from wrapper_environment import WrapperEnvironment

class RandomEnvironment(WrapperEnvironment):
    
    name = 'random'
    
    def env_play(self):
        """
        Pick the first free spot, and play there.
        """
        open_spots = []
        for i in range(len(self.state)):
            if self.state[i] == 0:
                open_spots.append(i)
        self.state[random.choice(open_spots)] = self.color

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(RandomEnvironment())
Пример #29
0
            1, 1, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0,
            0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0
        ]
        numRows = len(self.worldmap)
        numCols = len(self.worldmap[0])
        for row in range(0, numRows):
            print
            print "Row: " + str(row) + " ",
            for col in range(0, numCols):
                if self.checkValid(row, col):
                    flat = self.calculateFlatState(row, col)
                    flat_i = self.validstates.index(flat)
                    print unicode(argmaxes[flat_i]),
                else:
                    print "X",
                """
                if self.map[row][col] == self.GOAL:
                    print "G",
                if self.map[row][col] == self.WALL:
                    print "X",
                if self.map[row][col] == self.START:
                    print "S",
                if self.map[row][col] == self.FREE:
                    print " ",
                """
        print


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(threeroom_environment())
Пример #30
0
        if inMessage.startswith("set-start-state"):
            splitString = inMessage.split(" ")
            self.state = array(eval(splitString[1]))
            self.fixedStartState = True
            return "Message understood.  Using fixed start state."

        return "InvasiveEnvironment(Python) does not respond to messages."

    def setAgentState(self, S):
        assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches
        self.state = S
        valid = True
        return valid

    def setRandomState(self):
        S = array([random.randint(1, 3) for i in
                   xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)])
        self.setAgentState(S)

    def checkValid(self, S):
        valid = True
        return valid

    def printState(self):
        print "Agent is at: " + str(self.state)

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7,
            habitatSize=4, seed=1))
Пример #31
0
    def printState(self):
        numRows = len(self.map)
        numCols = len(self.map[0])
        print "Agent is at: " + str(self.agentRow) + "," + str(self.agentCol)
        print "Columns:0-10                10-17"
        print "Col    ",
        for col in range(0, numCols):
            print col % 10,

        for row in range(0, numRows):
            print
            print "Row: " + str(row) + " ",
            for col in range(0, numCols):
                if self.agentRow == row and self.agentCol == col:
                    print "A",
                else:
                    if self.map[row][col] == self.WORLD_GOAL:
                        print "G",
                    if self.map[row][col] == self.WORLD_MINE:
                        print "M",
                    if self.map[row][col] == self.WORLD_OBSTACLE:
                        print "*",
                    if self.map[row][col] == self.WORLD_FREE:
                        print " ",
        print


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(mines_environment())
Пример #32
0
    # (string) -> string

    def env_message(self, inMessage):
        if inMessage.startswith("print"):
            self.toprint = 1
            return "message understood, print"
        if inMessage.startswith("stop print"):
            self.toprint = 0
            return "message understood, stop print"
        return "RmaxAgent(Python) does not understand your message."

    def clearscreen(self, numlines=100):
        """Clear the console.
	numlines is an optional argument used only as a fall-back.
	"""
        # Thanks to Steven D'Aprano, http://www.velocityreviews.com/forums

        if os.name == "posix":
            # Unix/Linux/MacOS/BSD/etc
            os.system('clear')
        elif os.name in ("nt", "dos", "ce"):
            # DOS/Windows
            os.system('CLS')
        else:
            # Fallback for other operating systems.
            print('\n' * numlines)


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(My_Environment())
Пример #33
0
        if msg.startswith('set'):
            param, value = msg.split(None, 2)[1:]
            self.debug('set', param, value)

            self.env_message_set_param(param, value)

        elif msg.startswith('get'):
            param = msg.split(None, 1)[1]

            return self.env_message_get_param(param)

        else:
            return self.env_message_handler(msg)

    def debug(self, *args):
        """ Print a debug msg """
        if self.debug:
            args = [str(a) for a in args]
            print "%s: %s" % (self.name, ' '.join(args))

    def step_out(self, *args):
        if self.output_steps:
            args = [str(a) for a in args]
            print ' '.join(args)


if __name__ == '__main__':
    #p = PuddleEnvironment()
    #p.env_start()
    EnvironmentLoader.loadEnvironment(PuddleEnvironment())
		self.nonEmptyObservation.charArray=['a','b','c','d','e']
		return ""

	def env_start(self):
		self.whichEpisode=self.whichEpisode+1
		
		
		if self.whichEpisode % 2 == 0:
			return self.emptyObservation
		else:
			return self.nonEmptyObservation
	
	def env_step(self,action):
		ro=Reward_observation_terminal()
		
		if self.whichEpisode % 2 == 0:
			ro.o=self.emptyObservation
		else:
			ro.o=self.nonEmptyObservation

		return ro	

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return None
	
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_empty_environment())
Пример #35
0
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";

if __name__=="__main__":
	import argparse
        path_to_problems = os.path.join(os.path.dirname(__file__), 'configs', 'pomdps', '*')
        config_files = glob.glob(path_to_problems)
	parser = argparse.ArgumentParser(description='Run a POMDP problem file as a domain in RL-Glue in network mode.')
        group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("--file", type=str, default=config_files[0], 
                           help="Run POMDP domain given the path to a POMDP problem file.")
        group.add_argument("--list", action='store_true', default=False, help="List path to included POMDP problem files.")
	args = parser.parse_args()

        if args.list:
            print "Included POMDP problem files:"
            for file in config_files:
                print file
        else:
            EnvironmentLoader.loadEnvironment(POMDPEnvironment(spec_filename=args.file))

	parser = argparse.ArgumentParser(description='Run a specified POMDP in RL-Glue in network mode.')
	parser.add_argument("--pomdp_file", type=str, help="Filename for POMDP spec file to load and use.", required=True)
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
Пример #36
0
        #print("newState: "+str(self.state))
        r = 0.0
        if np.any(self.state_ranges[:,0] > self.state[:]) or \
           np.any(self.state_ranges[:,1] < self.state[:]):
#            r = -1
            r = -np.sum(3.0 * self.state_ranges[:,1]**2)
            r *= 6000-self.num_sim_steps
            terminate = True
        else:
#            perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2])
#            nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2])
#            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
#                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \
#                math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0]))))
#            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
#                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))
#            r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \
#                  np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2))
            r -= (self.state[0]-self.state_goal[0])**2
            r -= (self.state[1]-self.state_goal[1])**2
            r -= self.state[3]**2
            r -= self.state[4]**2
            
            terminate = False

        print("reward "+str(r))
        return r,terminate
		
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(vrep_environment())
            self.fixedStartState = True
            return "Message understood.  Using fixed start state."

        return "InvasiveEnvironment(Python) does not respond to messages."

    def setAgentState(self, S):
        assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches
        self.state = S
        valid = True
        return valid

    def setRandomState(self):
        S = array([random.randint(1, 3) for i in
                   xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)])
        self.setAgentState(S)

    def checkValid(self, S):
        valid = True
        return valid

    def printState(self):
        print "Agent is at: " + str(self.state)

# ============================ PARAMETERS =====================================

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7,
            habitatSize=4, seed=1))
            
# ============================ PARAMETERS =====================================
Пример #38
0
                #return max(outcomes)
                max_element = -1
                for o in outcomes:
                    if o == +1:
                        return o
                    max_element = max(o,max_element)
                return max_element

        finally:
            board.undoMove(move)

    moves = [(move, evaluateMove(move)) for move in board.getValidMoves()]
    random.shuffle(moves)
    moves.sort(key = lambda (move, winner): winner)
    board.makeMove(moves[-1][0], player)

class MiniMaxEnvironment(WrapperEnvironment):
    
    name = 'minimax'
    
    def env_play(self):
        b = Board(self.state)
        computerPlayer(b, Player_X)
        b.output()
        self.state = b.pieces


if __name__ == "__main__":
    #game()
    EnvironmentLoader.loadEnvironment(MiniMaxEnvironment())
Пример #39
0
        self.nonEmptyObservation.charArray = ['a', 'b', 'c', 'd', 'e']
        return ""

    def env_start(self):
        self.whichEpisode = self.whichEpisode + 1

        if self.whichEpisode % 2 == 0:
            return self.emptyObservation
        else:
            return self.nonEmptyObservation

    def env_step(self, action):
        ro = Reward_observation_terminal()

        if self.whichEpisode % 2 == 0:
            ro.o = self.emptyObservation
        else:
            ro.o = self.nonEmptyObservation

        return ro

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return None


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_empty_environment())
"""
Environment which always plays the first free spot available on the board.
"""

from rlglue.environment import EnvironmentLoader
from wrapper_environment import WrapperEnvironment


class FirstFreeEnvironment(WrapperEnvironment):

    name = 'first_free'

    def env_play(self):
        """
        Pick the first free spot, and play there.
        """
        for i in range(len(self.state)):
            if self.state[i] == 0:
                self.state[i] = self.color
                return


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(FirstFreeEnvironment())
Пример #41
0
            if event.type == pygame.QUIT:
                done = True
            if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN:
                user_action = actions.get(event.key, PinballModel.ACC_NONE)

	if environment.take_action(user_action) == environment.END_EPISODE:
	    done = True

        environment_view.blit()

        pygame.display.flip()

    pygame.quit()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Pinball domain')
    parser.add_argument('configuration', help='the configuration file')
    parser.add_argument('--width', action='store', type=int,
                        default=500, help='screen width (default: 500)')
    parser.add_argument('--height', action='store', type=int,
                        default=500, help='screen height (default: 500)')
    parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue')
    args = parser.parse_args()

    if args.rlglue:
	print 'Starting rl-glue'
	EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
    else:
        run_pinballview(args.width, args.height, args.configuration)

Пример #42
0
def main():
	EnvironmentLoader.loadEnvironment(SumoEnvironment(sys.argv[1]))
Пример #43
0
	returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ])
        scipy.misc.imsave('screen.png', arr)

	returnRO=Reward_observation_terminal()
	returnRO.r=theReward
	returnRO.o=returnObs
	returnRO.terminal=episodeOver

        # イベント処理
        for event in pygame.event.get():
            if event.type == QUIT:  # 終了イベント
                sys.exit()
		
	return returnRO
		
    def env_cleanup(self):
	pass

    def env_message(self,inMessage):
	if inMessage=="what is your name?":
		return "my name is dqn_environment, Python edition!";
	else:
		return "I don't know how to respond to your message";


if __name__ == "__main__":
	App = EdgeTracer()
	#App.run()
	EnvironmentLoader.loadEnvironment(EdgeTracer())
	print('main start')
Пример #44
0
		
		if self.inFuelCell(self.pos):
			self.fuel += 20.0
		if self.fuel > 60.0:
			self.fuel = 60.0

		if gridworld.Gridworld.isAtGoal(self):
			return 0.0
		elif self.fuel < 0:
			return -400.0
		elif self.inFuelCell(self.pos): # Fuel costs
			base = self.var[0] if self.pos[1] <= 1.0 else self.var[1]
			a = self.var[2]
			return base - (int(self.pos[0]) % 5)*a
		elif intAction < 4:
			return -1.0
		elif intAction >= 4:
			return -1.4
		else:
			print "ERROR in FuelWorld.takeAction"


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
	gridworld.addGridworldArgs(parser)
	parser.add_argument("--fuel_noise", type=float, default=0.0, 
			    help="If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure.")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(FuelWorld(noise=args.noise, fudge=args.fudge, fuel_noise=args.fuel_noise))
Пример #45
0
                    "the puddle's depth.")
    parser.add_argument("--puddle_penalty", type=float, default=-100,
                help="The reward penalty scale for walking through puddles.")
    args = parser.parse_args()
    kwargs = {}
    if args.puddle is not None:
        means = []
        covs = []
        for puddle in args.puddle:
            means.append(tuple(puddle[:2]))
            covs.append(tuple(puddle[2:]))
        kwargs['puddle_means'] = means
        kwargs['puddle_var'] = covs

    if args.size_x:
        kwargs['size_x'] = args.size_x
    if args.size_y:
        kwargs['size_y'] = args.size_y
    if args.goal_x:
        kwargs['goal_x'] = args.goal_x
    if args.goal_y:
        kwargs['goal_y'] = args.goal_y
    if args.noise:
        kwargs['noise'] = args.noise
    if args.fudge:
        kwargs['fudge'] = args.fudge
    if args.random_restarts:
        kwargs['random_start'] = args.random_restarts

    EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
Пример #46
0
      '-t','--test', 
      dest='test', action='store_true',default=False,
      help="test mode")
  parser.add_option(
      '-p','--param_file', 
      dest='param_file', action='store', type='string', default='./rlglue_param.json',
      help="json file of simulation parameters")
  parser.add_option(
      '-d','--dst', 
      dest='savepath', action='store', type='string', default='./result/data',
      help="Save data path.")
  (options, args) = parser.parse_args()
  #print 'options', options

  if not options.test:
    EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options))

  else:
    objEnv = ElecpyEnvironment(options)
    objEnv.env_init()
    for epi in range(3):
      print 'Episode {0}'.format(epi)
      objEnv.env_start()
      cnt_step = 0
      while True:
        cnt_step += 1
        action = Action(numInts=1)
        action.intArray = [0]
        rot = objEnv.env_step(action)
        if rot.terminal:
          break
	def env_init(self):  
		return ""

	def env_start(self):
		return Observation()
	
	def env_step(self,action):
		return Reward_observation_terminal()

	def env_cleanup(self):
		pass
	
	def env_message(self,inMessage):
		if inMessage==None:
			return "null"

		if inMessage=="":
			return "empty"

		if inMessage=="null":
			return None

		if inMessage=="empty":
			return ""
		
		return inMessage;	

if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_message_environment())
Пример #48
0
            theReward = -1
            episodeOver = 1

        if self.currentState >= 20:
            self.currentState = 20
            theReward = 1
            episodeOver = 1

        theObs = Observation()
        theObs.intArray = [self.currentState]

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        if inMessage == "what is your name?":
            return "my name is skeleton_environment, Python edition!"
        else:
            return "I don't know how to respond to your message"


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(skeleton_environment())
Пример #49
0
    parser = argparse.ArgumentParser(
        description=
        'Run Noisy Cart Pole Balancing or Swing Up environment in network mode.'
    )
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help=
        "Standard deviation of additive noise to generate, affects the action effects."
    )
    parser.add_argument(
        "--random_restarts",
        type=bool,
        default=False,
        help="Restart the cart with a random location and velocity.")
    parser.add_argument(
        "--mode",
        choices=["easy", "hard", "swingup"],
        default="easy",
        type=str,
        help=
        "Choose the type of cart pole domain. Easy/hard balancing, or swing up."
    )

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        CartPole(mode=args.mode,
                 noise=args.noise,
                 random_start=args.random_restarts))
Пример #50
0
        type=float,
        default=-100,
        help="The reward penalty scale for walking through puddles.")
    args = parser.parse_args()
    kwargs = {}
    if args.puddle is not None:
        means = []
        covs = []
        for puddle in args.puddle:
            means.append(tuple(puddle[:2]))
            covs.append(tuple(puddle[2:]))
        kwargs['puddle_means'] = means
        kwargs['puddle_var'] = covs

    if args.size_x:
        kwargs['size_x'] = args.size_x
    if args.size_y:
        kwargs['size_y'] = args.size_y
    if args.goal_x:
        kwargs['goal_x'] = args.goal_x
    if args.goal_y:
        kwargs['goal_y'] = args.goal_y
    if args.noise:
        kwargs['noise'] = args.noise
    if args.fudge:
        kwargs['fudge'] = args.fudge
    if args.random_restarts:
        kwargs['random_start'] = args.random_restarts

    EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
Пример #51
0
			ro.r=1.0

		else:
			self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
			self.o.charArray=['g','F','?',' ','&']
			self.o.intArray=[173,-173,2147483647,0,-2147483648]

			ro.r=-2.0

		ro.o=self.o
		ro.terminal=terminal
		return ro	

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		timesToPrint=self.stepCount%3
		
		outMessage=inMessage+"|"
		for i in range(0, timesToPrint):
			outMessage=outMessage+"%d" % (self.stepCount)
			outMessage=outMessage+"."

		outMessage=outMessage+"|"+inMessage
		
		return outMessage
	
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_1_environment())
Пример #52
0
if __name__ == "__main__":
    pygame.init()
    pygame.freetype.init()
    gameArea = pygame.Rect([0, 0], size)
    if len(sys.argv) > 1:
        useGlue = (sys.argv[1] == 'True')
    else:
        useGlue = False
    black = 0, 0, 0
    screen = pygame.display.set_mode([size[0],
                                      int(size[1] * (4 / 3.0))
                                      ])  #,pygame.FULLSCREEN)
    count = 0
    if len(sys.argv) > 2:
        p1Type = int(sys.argv[2])
    else:
        p1Type = 0
    if len(sys.argv) > 3:
        p2Type = int(sys.argv[3])
    else:
        p2Type = 1
    if useGlue:
        EnvironmentLoader.loadEnvironment(World(p1Type, 2))
    else:
        world = World(p1Type, p2Type)
        world.start()
        while True or count < 300:
            world.step()
            count += 1
    def env_init(self):
        return ""

    def env_start(self):
        return Observation()

    def env_step(self, action):
        return Reward_observation_terminal()

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        if inMessage == None:
            return "null"

        if inMessage == "":
            return "empty"

        if inMessage == "null":
            return None

        if inMessage == "empty":
            return ""

        return inMessage


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_message_environment())
Пример #54
0
	def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, wind_center=7., wind_stdev=1.0, wind_power=2.0, noise=0.0, random_start=False, fudge=1.4143):
		gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x, goal_y=goal_y, noise=noise, random_start=random_start, fudge=fudge)
		self.wind_center = wind_center
		self.wind_stdev = wind_stdev
		self.wind_power = wind_power
		self.domain_name = "Continuous Windy Gridworld by Will Dabney"
		
	def reset(self):
		if self.random_start:
			self.pos = numpy.random.random((2,)) * self.size
		else:
			self.pos = numpy.array([0.0, self.size[1]*0.5])
	
	def takeAction(self, action):
		self.pos[1] += norm.pdf(self.pos[0], self.wind_center, self.wind_stdev) * self.wind_power
		return gridworld.Gridworld.takeAction(self, action)


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
	gridworld.addGridworldArgs(parser)
	parser.add_argument("--wind_center", type=float, default=7, help="Center, or strongest point, in the x-direction of the wind")
	parser.add_argument("--wind_scale", type=float, default=1.0, help="Scale, or width, of the wind effects around the center.")
	parser.add_argument("--wind_power", type=float, default=2.0, help="The power, or strength, of the wind.")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(
		WindyGridworld(args.size_x, args.size_y, args.goal_x, args.goal_y, wind_center=args.wind_center, 
			       wind_stdev=args.wind_scale, wind_power=args.wind_power, noise=args.noise, 
			       random_start=args.random_restarts, fudge=args.fudge))