示例#1
0
 def test_getStateVars2(self):
     simulatedError = 0.01
     simWidth = 550.0
     simHeight = 357.0
     ballPos = (1/3 * simWidth,1/6 * simHeight)
     c = (simWidth /2.0,simHeight/2.0)
     a1 = agent.agent((1/3 * simWidth,1/6 * simHeight),simulatedError,"Keeper",ballPos)
     a2 = agent.agent((2/3 * simWidth,1/7 * simHeight),simulatedError,"Keeper",ballPos)
     a3 = agent.agent((2/5 * simWidth,6/7 * simHeight),simulatedError,"Keeper",ballPos)
     keepers = [a1,a2,a3]
     t1 = agent.agent((1/2 * simWidth,5/12 * simHeight),simulatedError,"Taker",ballPos)
     t2 = agent.agent((2/5 * simWidth,7/12 * simHeight),simulatedError,"Taker",ballPos)
     takers = [t1,t2]
     testOut = getStateVarsKeepers(keepers, takers, c)
     actualOut = [kUtil.getDist((550/3, 59.5), c),
                  kUtil.getDist((550/3 * 2, 51), c),
                  kUtil.getDist((220, 306), c),
                  kUtil.getDist((275, 148.75), c),
                  kUtil.getDist((220, 208.25), c),
                  kUtil.getDist((550/3,59.5), (550/3*2, 51)),
                  kUtil.getDist((550/3,59.5), (220,306)),
                  kUtil.getDist((550/3,59.5), (275, 148.75)),
                  kUtil.getDist((550/3,59.5), (220, 208.25)),
                  min( kUtil.getDist((550/3*2,51), (220, 208.25)), kUtil.getDist((550/3*2,51), (275, 148.75)) ),
                  min( kUtil.getDist((220,306), (220, 208.25)), kUtil.getDist((220,306), (275, 148.75)) ),
                  max(kUtil.cosTheta((550/3*2, 51), (550/3,59.5), (275,148.75)), 
                      kUtil.cosTheta((550/3*2, 51), (550/3,59.5), (220,208.25))),
                  max(kUtil.cosTheta((220,306), (550/3,59.5), (275,148.75)), 
                      kUtil.cosTheta((220,306), (550/3,59.5), (220,208.25))),
                  ]
     for i in range(len(testOut)):
         self.assertAlmostEqual(testOut[i], actualOut[i], 1,"Failed on index: %d" % i)
示例#2
0
    def test_getStateVars(self):
        # self, worldRef, pos, sigma, agentType, trueBallPos, maxPlayerSpeed, maxBallSpeed, inPossession = False
        import keepAway

        keepAwayWorld = keepAway.keepAway()
        ballPos = (0, 0)
        center = (0, 0)
        simulatedError = 0.01
        a1 = agent.agent(
            keepAwayWorld,
            (10, 0),
            simulatedError,
            "Keeper",
            ballPos,
            keepAwayWorld.maxPlayerSpeed,
            keepAwayWorld.maxBallSpeed,
        )
        a2 = agent.agent(
            keepAwayWorld,
            (0, 0),
            simulatedError,
            "Keeper",
            ballPos,
            keepAwayWorld.maxPlayerSpeed,
            keepAwayWorld.maxBallSpeed,
        )
        a3 = agent.agent(
            keepAwayWorld,
            (0, 5),
            simulatedError,
            "Keeper",
            ballPos,
            keepAwayWorld.maxPlayerSpeed,
            keepAwayWorld.maxBallSpeed,
        )
        keepers = [a1, a2, a3]
        t1 = agent.agent(
            keepAwayWorld,
            (5, 5),
            simulatedError,
            "Taker",
            ballPos,
            keepAwayWorld.maxPlayerSpeed,
            keepAwayWorld.maxBallSpeed,
        )
        t2 = agent.agent(
            keepAwayWorld,
            (5, 0),
            simulatedError,
            "Taker",
            ballPos,
            keepAwayWorld.maxPlayerSpeed,
            keepAwayWorld.maxBallSpeed,
        )
        takers = [t1, t2]
        testOut = getStateVarsKeepers(keepers, takers, center)
        actualOut = [0, 5, 10, 5, math.sqrt(50), 5, 10, 5, math.sqrt(50), 5, 5, math.cos(math.pi / 4.0), 1]
        for i in range(len(testOut)):
            self.assertAlmostEqual(testOut[i], actualOut[i], 1)
示例#3
0
    def __init__(self):
        mode = None #will be set to either monte carlo, q learning, sarsa, or manual control in the intro
        #RGB color
        self.white = (255,255,255) 
        self.black = (0,0,0)
        self.red = (255,0,0)
        self.green = (0,155,0)
        self.blue = (0,0,255)
            
        #give the game a title
        pygame.display.set_caption('Keepaway')
        self.keeperScore = 0
        self.displayGraphics = True
        
        #these are more or less global variables..
        #I'm not sure if this is bad or not. 
        self.worldImage = pygame.image.load('images/soccer_field.png')
        self.ballImage = pygame.image.load('images/ball.png')
        self.keeperImage = pygame.image.load('images/keeper.png')
        self.takerImage = pygame.image.load('images/taker.png')
        #block sizes are used for collision detection
        #only 1 size per element because all blocks are squares. block size = side length
        self.agent_block_size = 23
        self.ball_block_size = 12

        self.maxBallSpeed= 3
        self.maxPlayerSpeed = 2

        
        #dimensions of the game are the same as the soccer field image
        self.display_width = 550
        self.display_height = 357
        self.field_center = (self.display_width / 2 , self.display_height / 2)
        #gameDisplay is a pygame.surface object. it's your screen
        self.gameDisplay = pygame.display.set_mode((self.display_width,self.display_height))
        self.fps = 60 
        self.clock = pygame.time.Clock()
        
        types = ["keeper", "taker"]
        agentSigmaError = .01
        
        #start the ball kinda close to the keeper in the upper left corner
        self.fieldBall = ball.ball( (self.field_center[0]/4, self.field_center[1]/4), self.maxBallSpeed)
        
        #setup all the initial keepers and takers. They are all starting at different field positions, which is why
        #you can't have a for loop just iterate and declare all of them
        self.keeperArray = []
        self.keeperArray.append(agent.agent(self, (12.5, 12.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed))
        self.keeperArray.append(agent.agent(self, (25,  self.display_width - 37.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed))
        self.keeperArray.append(agent.agent(self, (self.display_height - 37.5,  self.display_width - 37.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed))
        self.takerArray = []
        self.takerArray.append(agent.agent(self, (self.display_height - 25,  25), agentSigmaError, types[1], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed))
        self.takerArray.append(agent.agent(self, (self.display_height - 37.5,  50), agentSigmaError, types[1], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed))
        
        #3 different font sizes 
        self.smallfont = pygame.font.SysFont("comicsansms",25) #25 is font sizes
        self.medfont = pygame.font.SysFont("comicsansms",50) 
        self.largefont = pygame.font.SysFont("comicsansms",80) 
        self.verysmallfont = pygame.font.SysFont("comicsansms", 12)
示例#4
0
 def test_getStateVars(self):
     ballPos = (0,0)
     center = (0,0)
     simulatedError = 0.01
     a1 = agent.agent((10,0),simulatedError,"Keeper",ballPos)
     a2 = agent.agent((0,0),simulatedError,"Keeper",ballPos)
     a3 = agent.agent((0,5),simulatedError,"Keeper",ballPos)
     keepers = [a1,a2,a3]
     t1 = agent.agent((5,5),simulatedError,"Taker",ballPos)
     t2 = agent.agent((5,0),simulatedError,"Taker",ballPos)
     takers = [t1,t2]
     testOut = getStateVarsKeepers(keepers, takers, center)
     actualOut = [0,5,10,5,math.sqrt(50),5,10,5,math.sqrt(50),5,5,math.cos(math.pi / 4.0),1]
     for i in range(len(testOut)):
         self.assertAlmostEqual(testOut[i], actualOut[i], 1)
示例#5
0
def start_from_terminal(app):
    parser = optparse.OptionParser()
    parser.add_option("-p", "--port", help="which port to serve content on", type="int", default=port)

    opts, args = parser.parse_args()
    net_args = {
        "model_def_file": model_def_file,
        "pretrained_model_file": pretrained_model_file,
        "gpu_mode": gpu_mode,
        "device_id": 1,
        "image_dim": image_dim,
        "raw_scale": raw_scale,
    }
    # Initialize classifier
    app.agent = agent(**net_args)
    logging.info("Initialize vision model done")
    # warm start by forward for allocation
    app.agent.net.forward()
    logging.info("Net forward done")

    app.indexer = indexer()
    for category_id in CATEGORY_NAME:
        app.indexer.load(category_id, DATABASE_FILENAME % category_id)
        logging.info("Loading indexer for {}".format(category_id))
    logging.info("Initialize indexer done")
    # app.indexer.load(DATABASE_FILE)

    start_tornado(app, opts.port)
示例#6
0
 def test_distCenter(self):
     a1 = agent.agent((0, 0), self.unitTestSigma, "Keeper", (0, 0))
     self.assertAlmostEqual(__distCenter(a1, (0, 0)), 0, 1)
     self.assertAlmostEqual(__distCenter(a1, (0, 10)), 10, 1)
     self.assertAlmostEqual(__distCenter(a1, (10, 0)), 10, 1)
     self.assertAlmostEqual(__distCenter(a1, (1, 1)), math.sqrt(2), 1)
     self.assertAlmostEqual(__distCenter(a1, (10, 10)), math.sqrt(200), 1)
示例#7
0
def getListOfAgents(version):
	worldFile = getWorldFile(version)
	if(worldFile == 0):
		return []
	listOfAgents = []
	for line in worldFile.readlines():
		a = agent.agent(line)
		listOfAgents.append(a)
	return listOfAgents
示例#8
0
文件: mcloc.py 项目: ZiweiXU/mcloc
def setup():
    size(768, 768)
    frameRate(fps)

    global g
    g = grid.generate()

    global a
    a = agent()
示例#9
0
文件: sessions.py 项目: BwRy/FuzzLabs
    def fuzz (self, this_node=None, path=[]):
        '''
        Call this routine to get the ball rolling. No arguments are necessary as they are
        both utilized internally during the recursive traversal of the session graph.

        @type  this_node: request (node)
        @param this_node: (Optional, def=None) Current node that is being fuzzed.
        @type  path:      List
        @param path:      (Optional, def=[]) Nodes along the path to the current one.
        '''

        # if no node is specified, we start from root and initialize the session.
        if not this_node:
            # we can't fuzz if we don't have at least one target and one request.
            if not self.target:
                syslog.syslog(syslog.LOG_ERR, self.session_id + 
                                  ": no target specified for session")
                return

            if not self.edges_from(self.root.id):
                syslog.syslog(syslog.LOG_ERR, self.session_id + 
                                  ": no request specified for session")
                return

            this_node = self.root

            self.total_mutant_index  = 0
            self.total_num_mutations = self.num_mutations()

        # If no errors above and not already connected to the agent, initialize the
        # agent connection.
        # If the agent cannot be initialized make sure the user is aware of it.

        if self.agent == None and self.agent_settings != None:
            try:
                self.agent = agent(self.config, self.session_id, self.agent_settings)
                self.agent.connect()
            except Exception, ex:
                syslog.syslog(syslog.LOG_ERR, self.session_id +
                              ": failed to establish agent connection (%s)" % str(ex))
                self.finished_flag = True
                self.stop_flag = True
                return

        # Get the agent to execute 
            try:
                self.agent.start()
            except Exception, ex:
                syslog.syslog(syslog.LOG_ERR, self.session_id +
                              ": agent failed to execute command (%s)" % str(ex))
                self.finished_flag = True
                self.stop_flag = True
                return
示例#10
0
文件: gen.py 项目: brhoades/irps
    def initialize( self ):
        self.inds = []

        delprn( "Creating Trees\t\t", 2 )
        #Set up random trees
        for i in range(0,self.mu):
            delprn( str(perStr( i/self.mu )), 3 )
            self.inds.append(agent( self ) )
        
        delprn( "Calc. Inital Fitness\t", 2 )
        #Do our initial run
        for i in range(0,len(self.inds)):
            delprn( str(perStr( i/len(self.inds) )), 3 )
            self.inds[i].fitness( )
示例#11
0
文件: gen.py 项目: brhoades/irps
 def recombination( self ):
     parents = self.parentSelection( )
     kids = []
     delprn( "Creating Children\t", 2 )
     for i in range(0,len(parents)):
         delprn( str(perStr( i/self.lamb )), 3 )
         pair = parents[i]
         p1 = pair[0]
         p2 = pair[1]
         #We're just doing cross over for now, so hardcode this in:
         
         #Create two kids, one from each parent
         kid1 = agent( self, copy=p1 )
         kid2 = agent( self, copy=p2 )
         
         #And sample for a random crossover point from both kids
         kid1pt = random.sample(kid1.tree.nodes, 1)[0]
         kid2pt = random.sample(kid2.tree.nodes, 1)[0]
         
         #Now swap subtrees
         tree.swapsubtree( kid1.tree, kid1pt, kid2.tree, kid2pt )
         
         #Mutate them
         kid1.mutate( )
         kid2.mutate( )
         
         kids.append(kid1)
         kids.append(kid2)
         
     if self.strat == PLUS:
         for ind in kids:
             self.inds.append( ind ) 
     elif self.strat == COMMA:
         for ind in self.inds:
             ind.delete( )
         self.inds = kids
示例#12
0
  def __init__(self, port):

    util_root = '/works/demon_11st/utils'
    sys.path.insert(0, util_root)
    from exifutil import exifutil
    app.exifutils = exifutil()

    #import pdb; pdb.set_trace()
    agent_root = '/works/demon_11st/agent/detection'
    sys.path.insert(0, agent_root)
    import _init_paths
    from conf import conf
    from agent import agent
    yaml_file = '/storage/product/detection/11st_All/cfg/faster_rcnn_end2end_test.yml'
    conf = conf(yaml_file, 0)
    app.agent = agent()

    from korean_url_handler import korean_url_handler
    app.korean_url_handler = korean_url_handler()

    # start web server
    web_server.__init__(self, app, port)
示例#13
0
  def __init__(self, 
    port, 
    net_args, oversample, 
    category_no, max_num_items, database_filename):
    self.net_args = net_args
    self.database_filename = database_filename
    # Initialize classifier
    app.oversample = oversample
    app.agent = agent(**self.net_args)
    logging.info('Initialize vision model done')
    app.agent.net.forward()
    logging.info('Net forward done')
    # Initialize indexer
    app.indexer = indexer(category_no, max_num_items)
    app.indexer.load_category(database_filename)
    logging.info('Initialize indexer done')
    # get parser_utils
    app.parser_utils = parser_utils()
    app.korean_url_handler = korean_url_handler() 

    # start web server
    web_server.__init__(self, app, port)
示例#14
0
文件: log.py 项目: brhoades/irps
    def absBestFinish( self, cfg, best ):
        self.res.write( "\nTree with the Global Best Fitness\n" )

        #Mock container generation
        generation = gen( cfg )

        #Avoiding errors
        best.gen = generation

        self.res.write( "\nRandom GP Performance\n" )
        self.res.write( "Global best's gen #: " + str(best.gennum) + "\n" )

        #Clear old payoffs
        best.payoffs = []

        #Randomly make many individuals to face.
        for i in range(30):
            generation.inds.append( agent( generation ) )

        for opp in generation.inds:
            beforepayoff = best.mem*2
            for j in range(0,generation.seqs):
                tmoves = opp.mymoves
                oppres = opp.run( best.mymoves )
                myres = best.run( opp.mymoves )

                if j > beforepayoff:
                    best.upres( myres, oppres )
                    opp.upres( oppres, myres )

        avg = 0
        for i in best.payoffs:
            avg += i
        avg /= len(best.payoffs)
        self.res.write( "Random fit: " + str(avg) + "\n" )

        self.csv.write( "\n\n" + "Global Best Gen #,avgabsfit,lastwinfit,csv,random fit" + "\n" )
        self.csv.write( str(best.gennum) + "," + str(best.fit) + "," + str(best.fits[0]) + "," + str(best.fits[1]) + "," + str(avg) + "\n" )
示例#15
0
    def __init__(self, _num_adv, _num_agents, _num_drones):

        g_var.num_of_adverseries = _num_adv
        g_var.num_of_agents = _num_agents
        g_var.num_of_drones = _num_drones

        # re-initialization of result variables

        g_var.arrested_poachers = 0
        g_var.fled_poachers = 0
        g_var.resource_poached = 0
        g_var.resource_recovered = 0
        g_var.distance_travelled = 0

        self.refresh_counter = 0
        self.refresh_limit = 40

        print "Parameters: adversaries: " + str(g_var.num_of_adverseries),\
        ", agents: " + str(g_var.num_of_agents),\
        ", drones: " + str(g_var.num_of_drones)

        self.root = Tk()
        self.root.title("Green Security Game")
        self.root.geometry('640x480+620+120')
        self.canvas = Canvas(self.root, bg="#333333", height=480, width=640)
        self.canvas.pack()
        Frame.__init__(self)

        self.agent_pos = [[0 for i in range(g_var.dimension)]
                          for j in range(g_var.dimension)]
        #self.cell_resources = [[random.randint(10,50) for i in range(global_var.dimension)] for j in range(global_var.dimension)]
        self.adv_pos = [[0 for i in range(g_var.dimension)]
                        for j in range(g_var.dimension)]
        self.drone_pos = [[0 for i in range(g_var.dimension)]
                          for j in range(g_var.dimension)]
        self.drone_signal = [[0 for i in range(g_var.dimension)]
                             for j in range(g_var.dimension)]
        self.target_pos = []
        self.round_marking = []
        self.cell_resources = [[4, 9, 6, 7, 0, 2, 1, 6, 7, 0],
                               [13, 50, 0, 0, 50, 0, 50, 0, 0, 21],
                               [14, 0, 19, 13, 24, 23, 36, 17, 0, 11],
                               [17, 50, 40, 10, 50, 50, 50, 6, 0, 6],
                               [10, 31, 20, 13, 50, 0, 0, 10, 50, 3],
                               [9, 34, 30, 10, 50, 50, 50, 10, 0, 5],
                               [11, 37, 10, 22, 17, 15, 12, 10, 0, 6],
                               [13, 0, 50, 14, 33, 17, 50, 32, 26, 11],
                               [7, 0, 0, 50, 0, 0, 0, 50, 13, 23],
                               [11, 12, 31, 10, 9, 8, 11, 13, 14, 21]]

        for i in range(g_var.dimension):
            for j in range(g_var.dimension):
                if self.cell_resources[i][j] > 0:
                    self.target_pos.append((i, j))
                if self.cell_resources[i][j] == -1:
                    self.round_marking.append((i, j))
        self.round_marking.append((5, 5))  # temporary dummy

        self.cell_coord = [[
            i.__str__() + "," + j.__str__() for i in range(g_var.dimension)
        ] for j in range(g_var.dimension)]

        self.label_poacher_num = Label(self.root,
                                       text="Number of Total \nPoachers:\n" +
                                       str(g_var.num_of_adverseries))
        self.label_poacher_num.place(relx=0.78, rely=0.05)

        self.label_arrest = Label(self.root, text=g_var.arrested_poachers)
        self.label_arrest.place(relx=0.78, rely=0.2)
        self.label_fled = Label(self.root, text=g_var.fled_poachers)
        self.label_fled.place(relx=0.78, rely=0.3)
        self.label_sack = Label(self.root, text=g_var.resource_poached)
        self.label_sack.place(relx=0.78, rely=0.4)
        self.label_recovered = Label(self.root, text=g_var.resource_poached)
        self.label_recovered.place(relx=0.78, rely=0.5)
        self.label_travelled = Label(self.root, text=g_var.distance_travelled)
        self.label_travelled.place(relx=0.78, rely=0.6)

        self.label_agent_num = Label(self.root,
                                     text="Number of Agents:\n" +
                                     str(g_var.num_of_agents))
        self.label_agent_num.place(relx=0.78, rely=0.7)
        self.label_drone_num = Label(self.root,
                                     text="Number of Drones:\n" +
                                     str(g_var.num_of_drones))
        self.label_drone_num.place(relx=0.78, rely=0.8)

        self.refresh()
        self.canvas.create_rectangle(0,
                                     0,
                                     g_var.dimension * g_var.block_size,
                                     g_var.dimension * g_var.block_size,
                                     fill=g_var.bg_color)

        # for ONE TIME labelling *******************************************
        for i in range(g_var.dimension):
            for j in range(g_var.dimension):
                self.coord_label = Label(self.root,
                                         text=self.cell_coord[i][j],
                                         bg="black",
                                         fg="white")
                self.coord_label.place(x=i * g_var.block_size + 2,
                                       y=j * g_var.block_size + 18)

        for i in range(g_var.dimension + 1):
            for j in range(g_var.dimension + 1):
                self.canvas.create_rectangle(i * g_var.block_size,
                                             j * g_var.block_size,
                                             g_var.block_size,
                                             g_var.block_size,
                                             outline="grey")

        for i in range(g_var.num_of_agents):
            agent_obj = agent(self.canvas, self.root, self.agent_pos,
                              self.cell_resources, self.target_pos,
                              self.round_marking, self.drone_signal)
            agent_obj.move_spec_guard()

        for i in range(g_var.num_of_drones):
            drone_obj = drone(self.canvas, self.root, self.drone_pos,
                              self.drone_signal, self.adv_pos)
            drone_obj.move_drone()

        for i in range(g_var.num_of_adverseries):
            adv_obj = adv(self.canvas, self.root, self.agent_pos,
                          self.drone_pos, self.cell_resources, self.target_pos,
                          self.adv_pos)
            adv_obj.operate_adv()

        self.root.mainloop()
示例#16
0
文件: main.py 项目: aldopareja/thesis
from simulGivenRoutes import execute 
from getRoutes import getRoutes
from itertools import product
from copy import deepcopy
from ARoptimization import ARoptim
from ExpOptim import updatePosterior
from ExpOptim import generateDraws
#network creation
network=[ [None] * 4 for i in range(4)]
network[0][1]=edge.edge('x')
network[1][2]=edge.edge('x')
network[1][3]=edge.edge('x')
network[3][2]=edge.edge('x')
#agent creation
agents=list()
agents.append(agent.agent(0,2,1))
agents.append(agent.agent(1,2,2))
agents.append(agent.agent(1,2,3))
agents.append(agent.agent(1,2,1))
#%%
#Experience
#sensitivity to New information
delta=0.1
#getting routes and costs
routesAndCosts=getRoutes(network, agents, 10, cost=True)
#calculate the first probability distribution
posterior=updatePosterior(routesAndCosts['costs'])
prior=posterior
#get a route draw for each agent
while True:
    for i in range(3000):
        f, ax = plt.subplots(1, 1)
        ax.plot(reward_ts)

        plt.show()

    else:

        if args.weight_sharing:
            model = rnn.RDQN_multi(args)
            target = rnn.RDQN_multi(args)
            optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, momentum=args.learning_momentum)

        game = env.env_multi(args)
        players = []
        for i in range(args.n_agents):
            player = agent.agent(args, agent_id=i)
            if args.weight_sharing:
                player.setup_models(model, target)
            else:
                model = rnn.RDQN_multi(args)
                target = rnn.RDQN_multi(args)
                optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, momentum=args.learning_momentum)
                player.setup_models(model, target)
                player.set_optimizer(optimizer)

            players.append(player)

        criterion = nn.MSELoss()
        average_loss = 0
        average_reward = 0
        loss_record = []
示例#18
0
class Matris(object):
    board = agent.board()
    agent_mode = True  #used to check if agent is playing. Causes hard-drops to always happen.
    if agent_mode == True:
        if (sys.argv[1] == "-hh"):
            #Creates an agent that takes column differences, holes and height of the tallest column as inputs
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000,
                                height=True,
                                holes=True)
        elif (sys.argv[1] == "-ho"):
            #Creates an agent that takes column differences and holes as inputs
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000,
                                holes=True)
        elif (sys.argv[1] == "-hi"):
            #Creates an agent that takes column differences and height of the tallest column as inputs
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000,
                                height=True)
        elif (sys.argv[1] == "-no"):
            #Creates an agent that takes column differences as inputs only
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000)
        elif (sys.argv[1] == "-ra"):
            #Creates an agent that plays randomly
            agent = agent.agent([], int(sys.argv[2]), random_moves=True)
        elif (sys.argv[1] == "-lo"):
            #Loads an agent that has previously been trained in MaTris. Loads .obj file.
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000,
                                filepath=sys.argv[3])
        elif (sys.argv[1] == "-lt"):
            #Loads an agent that has previously been trained using supervised learning in MaTris-O. Loads .obj file.
            agent = agent.agent([],
                                int(sys.argv[2]),
                                random_moves=False,
                                rewards_as_lines=True,
                                epsilon=1,
                                epsilon_decay=0.01,
                                epsilon_minimum=0.01,
                                memory_size=1000,
                                sample_size=32,
                                reset_steps=1000,
                                filepath=sys.argv[3],
                                supervised=True)

        else:
            raise Exception(
                error_message=
                "\n\nError inputting command line arguments\nUsage:\n[mode] [number of episodes]\nmode:\n\t-hh - holes and height and column differences\n\t-ho - holes and column differences\n\t-hi - height and column differences\n\t-no - column differences only\n\tLoad ANN\nSecond argument should be number of episodes\n third argument should be filepath if file is being loaded."
            )
    seed = agent.load_new_seed()
    random.seed(seed)
    tetromino_placement = None

    def __init__(self):
        self.surface = screen.subsurface(
            Rect((MATRIS_OFFSET + BORDERWIDTH, MATRIS_OFFSET + BORDERWIDTH),
                 (MATRIX_WIDTH * BLOCKSIZE, (MATRIX_HEIGHT - 2) * BLOCKSIZE)))

        self.matrix = dict()
        for y in range(MATRIX_HEIGHT):
            for x in range(MATRIX_WIDTH):
                self.matrix[(y, x)] = None
        """
        `self.matrix` is the current state of the tetris board, that is, it records which squares are
        currently occupied. It does not include the falling tetromino. The information relating to the
        falling tetromino is managed by `self.set_tetrominoes` instead. When the falling tetromino "dies",
        it will be placed in `self.matrix`.
        """

        self.next_tetromino = random.choice(list_of_tetrominoes)
        self.set_tetrominoes()

        if self.agent_mode == True:
            #Creates a representation of the initial board
            self.board.update_board_representation(
                self.create_board_representation())
            self.board.set_board_height()
            self.board.set_holes()
            self.board.set_column_differences()
            print(str(self.board))
            print("Column Height Differences:" +
                  str(self.board.get_column_differences()))

            #Set up the the agent
            self.agent.set_current_board(self.board)
            self.agent.set_agent_tetromino(self.current_tetromino)

        self.tetromino_rotation = 0
        self.downwards_timer = 0
        self.base_downwards_speed = 0.4  # Move down every 400 ms

        self.movement_keys = {'left': 0, 'right': 0}
        self.movement_keys_speed = 0.05
        self.movement_keys_timer = (-self.movement_keys_speed) * 2

        self.level = 1
        self.score = 0
        self.lines = 0

        self.combo = 1  # Combo will increase when you clear lines with several tetrominos in a row

        self.paused = False

        self.highscore = load_score()
        self.played_highscorebeaten_sound = False

        self.levelup_sound = get_sound("levelup.wav")
        self.gameover_sound = get_sound("gameover.wav")
        self.linescleared_sound = get_sound("linecleared.wav")
        self.highscorebeaten_sound = get_sound("highscorebeaten.wav")

        if self.agent_mode == True:
            #Agent's first move
            self.tetromino_placement = self.agent.make_move()
            self.tetromino_position = (0, self.tetromino_placement[2])
            for rotations in range(self.tetromino_placement[0]):
                self.request_rotation()

    def set_tetrominoes(self):
        """
        Sets information for the current and next tetrominos
        """
        self.current_tetromino = self.next_tetromino
        self.next_tetromino = random.choice(list_of_tetrominoes)
        self.surface_of_next_tetromino = self.construct_surface_of_next_tetromino(
        )
        self.tetromino_position = (0, 4) if len(
            self.current_tetromino.shape) == 2 else (0, 3)
        self.tetromino_rotation = 0
        self.tetromino_block = self.block(self.current_tetromino.color)
        self.shadow_block = self.block(self.current_tetromino.color,
                                       shadow=True)

    def hard_drop(self):
        """
        Instantly places tetrominos in the cells below
        """
        amount = 0
        while self.request_movement('down'):
            amount += 1
        self.score += 10 * amount

        self.lock_tetromino()

    def update(self, timepassed):
        """
        Main game loop
        """
        try:
            self.needs_redraw = False

            if self.agent_mode == True:
                self.hard_drop()

            else:
                #Handles player input
                pressed = lambda key: event.type == pygame.KEYDOWN and event.key == key
                unpressed = lambda key: event.type == pygame.KEYUP and event.key == key

                events = pygame.event.get()
                #Controls pausing and quitting the game.
                for event in events:
                    if pressed(pygame.K_p):
                        self.surface.fill((0, 0, 0))
                        self.needs_redraw = True
                        self.paused = not self.paused
                    elif event.type == pygame.QUIT:
                        self.gameover(full_exit=True)
                    elif pressed(pygame.K_ESCAPE):
                        self.gameover()

                if self.paused:
                    return self.needs_redraw

                for event in events:
                    #Handles player input
                    #Controls movement of the tetromino
                    if pressed(pygame.K_SPACE):
                        self.hard_drop()
                    elif pressed(pygame.K_UP) or pressed(pygame.K_w):
                        self.request_rotation()
                    elif pressed(pygame.K_LEFT) or pressed(pygame.K_a):
                        self.request_movement('left')
                        self.movement_keys['left'] = 1
                    elif pressed(pygame.K_RIGHT) or pressed(pygame.K_d):
                        self.request_movement('right')
                        self.movement_keys['right'] = 1

                    elif unpressed(pygame.K_LEFT) or unpressed(pygame.K_a):
                        self.movement_keys['left'] = 0
                        self.movement_keys_timer = (
                            -self.movement_keys_speed) * 2
                    elif unpressed(pygame.K_RIGHT) or unpressed(pygame.K_d):
                        self.movement_keys['right'] = 0
                        self.movement_keys_timer = (
                            -self.movement_keys_speed) * 2

                    self.downwards_speed = self.base_downwards_speed**(
                        1 + self.level / 10.)

                    self.downwards_timer += timepassed
                    downwards_speed = self.downwards_speed * 0.10 if any([
                        pygame.key.get_pressed()[pygame.K_DOWN],
                        pygame.key.get_pressed()[pygame.K_s]
                    ]) else self.downwards_speed
                    if self.downwards_timer > downwards_speed:
                        if not self.request_movement(
                                'down'
                        ):  #Places tetromino if it cannot move further down
                            self.lock_tetromino()

                        self.downwards_timer %= downwards_speed

                    if any(self.movement_keys.values()):
                        self.movement_keys_timer += timepassed
                    if self.movement_keys_timer > self.movement_keys_speed:
                        self.request_movement(
                            'right' if self.movement_keys['right'] else 'left')
                        self.movement_keys_timer %= self.movement_keys_speed

        except:
            print("Error in agent running")
            print(
                "Manually causing gameover. Preserves continuation of agent running with minor potential impediment on learning."
            )
            self.gameover()
            self.needs_redraw = True
        return self.needs_redraw

    def draw_surface(self):
        """
        Draws the image of the current tetromino
        """
        with_tetromino = self.blend(matrix=self.place_shadow())

        for y in range(MATRIX_HEIGHT):
            for x in range(MATRIX_WIDTH):

                #                                       I hide the 2 first rows by drawing them outside of the surface
                block_location = Rect(x * BLOCKSIZE,
                                      (y * BLOCKSIZE - 2 * BLOCKSIZE),
                                      BLOCKSIZE, BLOCKSIZE)
                if with_tetromino[(y, x)] is None:
                    self.surface.fill(BGCOLOR, block_location)
                else:
                    if with_tetromino[(y, x)][0] == 'shadow':
                        self.surface.fill(BGCOLOR, block_location)

                    self.surface.blit(with_tetromino[(y, x)][1],
                                      block_location)

    def gameover(self, full_exit=False):
        """
        Gameover occurs when a new tetromino does not fit after the old one has died, either
        after a "natural" drop or a hard drop by the player. That is why `self.lock_tetromino`
        is responsible for checking if it's game over.
        """

        write_score(self.score)

        if full_exit:
            if self.agent_mode == True:
                print("Runs completed.")
                self.serialize_agent()
            exit()
        else:
            if self.agent_mode == True:
                self.agent.complete_episode()
                #Manages the starting of a new game
                if self.agent.get_current_episode(
                ) < self.agent.get_number_of_episodes():
                    #Resets the board
                    self.matrix = dict()
                    for y in range(MATRIX_HEIGHT):
                        for x in range(MATRIX_WIDTH):
                            self.matrix[(y, x)] = None
                    self.score = 0
                    self.lines = 0
                    self.board = agent.board(
                        self.create_board_representation())
                    self.board.set_board_height()
                    self.board.set_holes()
                    self.board.set_column_differences()
                    self.agent.set_current_board(self.board)
                    print(str(self.board))
                    new_seed = self.agent.load_new_seed()
                    if new_seed == None:
                        try:
                            raise ValueError(
                                "Not enough seeds for current experiment!")
                        except:
                            print(
                                "\nNot enough seeds for current experiment!\nExiting Matris..."
                            )
                            exit()
                    print("Generating new game with seed: " + str(new_seed))
                    random.seed(new_seed)
                    self.set_tetrominoes()
                    self.next_tetromino = random.choice(list_of_tetrominoes)
                    self.agent.set_agent_tetromino(self.current_tetromino)

                    #Agent's first move of the new game
                    self.tetromino_placement = self.agent.make_move()
                    self.tetromino_position = (0, self.tetromino_placement[2])
                    for rotations in range(self.tetromino_placement[0]):
                        self.request_rotation()

                else:
                    print("Runs completed.")
                    self.serialize_agent()
                    exit()
            else:
                raise GameOver("Sucker!")

    def place_shadow(self):
        """
        Draws shadow of tetromino so player can see where it will be placed
        """
        posY, posX = self.tetromino_position
        while self.blend(position=(posY, posX)):
            posY += 1

        position = (posY - 1, posX)

        return self.blend(position=position, shadow=True)

    def fits_in_matrix(self, shape, position):
        """
        Checks if tetromino fits on the board
        """
        posY, posX = position
        for x in range(posX, posX + len(shape)):
            for y in range(posY, posY + len(shape)):
                if self.matrix.get((y, x), False) is False and shape[y - posY][
                        x - posX]:  # outside matrix
                    return False

        return position

    def request_rotation(self):
        """
        Checks if tetromino can rotate
        Returns the tetromino's rotation position if possible
        """
        rotation = (self.tetromino_rotation + 1) % 4
        shape = self.rotated(rotation)

        y, x = self.tetromino_position

        position = (self.fits_in_matrix(shape, (y, x))
                    or self.fits_in_matrix(shape, (y, x + 1))
                    or self.fits_in_matrix(shape, (y, x - 1))
                    or self.fits_in_matrix(shape, (y, x + 2))
                    or self.fits_in_matrix(shape, (y, x - 2)))
        # ^ That's how wall-kick is implemented

        if position and self.blend(shape, position):
            self.tetromino_rotation = rotation
            self.tetromino_position = position

            self.needs_redraw = True
            return self.tetromino_rotation
        else:
            return False

    def request_movement(self, direction):
        """
        Checks if teteromino can move in the given direction and returns its new position if movement is possible
        """
        posY, posX = self.tetromino_position
        if direction == 'left' and self.blend(position=(posY, posX - 1)):
            self.tetromino_position = (posY, posX - 1)
            self.needs_redraw = True
            return self.tetromino_position
        elif direction == 'right' and self.blend(position=(posY, posX + 1)):
            self.tetromino_position = (posY, posX + 1)
            self.needs_redraw = True
            return self.tetromino_position
        elif direction == 'up' and self.blend(position=(posY - 1, posX)):
            self.needs_redraw = True
            self.tetromino_position = (posY - 1, posX)
            return self.tetromino_position
        elif direction == 'down' and self.blend(position=(posY + 1, posX)):
            self.needs_redraw = True
            self.tetromino_position = (posY + 1, posX)
            return self.tetromino_position
        else:
            return False

    def rotated(self, rotation=None):
        """
        Rotates tetromino
        """
        if rotation is None:
            rotation = self.tetromino_rotation
        return rotate(self.current_tetromino.shape, rotation)

    def block(self, color, shadow=False):
        """
        Sets visual information for tetromino
        """
        colors = {
            'blue': (105, 105, 255),
            'yellow': (225, 242, 41),
            'pink': (242, 41, 195),
            'green': (22, 181, 64),
            'red': (204, 22, 22),
            'orange': (245, 144, 12),
            'cyan': (10, 255, 226)
        }

        if shadow:
            end = [90]  # end is the alpha value
        else:
            end = [
            ]  # Adding this to the end will not change the array, thus no alpha value

        border = Surface((BLOCKSIZE, BLOCKSIZE), pygame.SRCALPHA, 32)
        border.fill(list(map(lambda c: c * 0.5, colors[color])) + end)

        borderwidth = 2

        box = Surface(
            (BLOCKSIZE - borderwidth * 2, BLOCKSIZE - borderwidth * 2),
            pygame.SRCALPHA, 32)
        boxarr = pygame.PixelArray(box)
        for x in range(len(boxarr)):
            for y in range(len(boxarr)):
                boxarr[x][y] = tuple(
                    list(
                        map(
                            lambda c: min(255, int(c * random.uniform(
                                0.8, 1.2))), colors[color])) + end)

        del boxarr  # deleting boxarr or else the box surface will be 'locked' or something like that and won't blit.
        border.blit(box, Rect(borderwidth, borderwidth, 0, 0))

        return border

    def lock_tetromino(self):
        """
        This method is called whenever the falling tetromino "dies". `self.matrix` is updated,
        the lines are counted and cleared, and a new tetromino is chosen.
        """
        self.matrix = self.blend()

        lines_cleared = self.remove_lines()

        if lines_cleared == -1:  #Indicates that clearing the lines failed. This is due to the tetromino reaching higher than 2 above the skyline.
            """
            End episode:
                game will be in a terminal state as the skyline was occupied 3 cells high
                however MaTris can only handle the skyline being occupied by 2 cells high.

            This causes the memory to be stored as if it were a terminal state.
            The board is then cleared, and a new episode restarted.
            """
            self.agent.remember_state_action(self.agent.previous_state,
                                             self.agent.previous_action, -1000,
                                             self.agent.get_current_board(),
                                             True)
            self.agent.update_approximater()
            self.agent.reset_approximaters()
            self.gameover()

        else:
            self.lines += lines_cleared

            if lines_cleared:
                self.score += 100 * (lines_cleared**2) * self.combo

                if not self.played_highscorebeaten_sound and self.score > self.highscore:
                    self.played_highscorebeaten_sound = True

            if self.lines >= self.level * 10:
                self.level += 1

                self.combo = self.combo + 1 if lines_cleared else 1

        self.set_tetrominoes()

        if not self.blend() and lines_cleared != -1:
            self.gameover()

        self.needs_redraw = True

        if self.agent_mode == True:
            #Collects information from the board.
            self.board.update_board_representation(
                self.create_board_representation())
            self.board.set_board_height()
            self.board.set_holes()
            self.board.set_column_differences()
            print(str(self.board))
            print("Column Height Differences:" +
                  str(self.board.get_column_differences()))
            if self.agent.holes == True:
                print("Holes: " + str(self.board.get_holes()))
            if self.agent.height == True:
                print("Height: " + str(self.board.get_board_height()))
            print(str(self.tetromino_placement))
            print("\nTetromino:")
            for line in range(0, len(self.agent.agent_tetromino[0])):
                print(str(self.agent.agent_tetromino[0][line]))
            print("Epsilon: " + str(self.agent.epsilon))
            reward = self.agent.update_score_and_lines(self.score, self.lines)
            print("Score: " + str(self.agent.score))
            print("Lines Cleared: " + str(self.agent.lines_cleared))
            print("Current Episode number: " +
                  str(self.agent.current_episode + 1) + " / " +
                  str(self.agent.number_of_episodes))
            print("**********************************")

            #Passes tetromino and board information to the agent.
            self.agent.set_agent_tetromino(self.current_tetromino)
            self.agent.set_current_board(self.board)

            #Remembers previous S,A,R,S
            if self.agent.check_game_over(
            ) and lines_cleared != -1:  #Ends episode if previous turn was terminal
                #End of episode
                if self.agent.random_moves == False:
                    self.agent.remember_state_action(
                        self.agent.previous_state, self.agent.previous_action,
                        -1000, self.agent.get_current_board(), True)
                    self.agent.update_approximater()
                    self.agent.reset_approximaters()
                self.gameover()
            else:  #Continue episode as not in terminal state
                self.tetromino_placement = self.agent.make_move()

                if self.tetromino_placement == False:
                    #Tetromino placed in state that causes a game over
                    if self.agent.random_moves == False:
                        #Tetromino placed in state that causes a game over
                        self.agent.remember_state_action(
                            self.agent.previous_state,
                            self.agent.previous_action, -1000,
                            self.agent.get_current_board(), True)
                        self.agent.update_approximater()
                        self.agent.reset_approximaters()
                    self.gameover()
                else:
                    #Tetromino placed in a non-terminal state.
                    if self.agent.random_moves == False:
                        self.agent.remember_state_action(
                            self.agent.previous_state,
                            self.agent.previous_action, reward,
                            self.agent.get_current_board(), False)
                        self.agent.update_approximater()
                        self.agent.reset_approximaters()
                    self.tetromino_position = (0, self.tetromino_placement[2])
                    for rotations in range(self.tetromino_placement[0]):
                        self.request_rotation()

    def remove_lines(self):
        """
        Removes lines from the board
        """
        try:
            lines = []
            for y in range(MATRIX_HEIGHT):
                #Checks if row if full, for each row
                line = (y, [])
                for x in range(MATRIX_WIDTH):
                    if self.matrix[(y, x)]:
                        line[1].append(x)
                if len(line[1]) == MATRIX_WIDTH:
                    lines.append(y)

            for line in sorted(lines):
                #Moves lines down one row
                for x in range(MATRIX_WIDTH):
                    self.matrix[(line, x)] = None
                for y in range(0, line + 1)[::-1]:
                    for x in range(MATRIX_WIDTH):
                        self.matrix[(y, x)] = self.matrix.get((y - 1, x), None)

            return len(lines)
        except:
            print("ERROR REMOVING LINES:\t DEBUG INFORMATION")
            print(self.tetromino_placement)
            print(self.board.board_representation)
            return -1

    def blend(self, shape=None, position=None, matrix=None, shadow=False):
        """
        Does `shape` at `position` fit in `matrix`? If so, return a new copy of `matrix` where all
        the squares of `shape` have been placed in `matrix`. Otherwise, return False.

        This method is often used simply as a test, for example to see if an action by the player is valid.
        It is also used in `self.draw_surface` to paint the falling tetromino and its shadow on the screen.
        """
        if shape is None:
            shape = self.rotated()
        if position is None:
            position = self.tetromino_position

        copy = dict(self.matrix if matrix is None else matrix)
        posY, posX = position
        for x in range(posX, posX + len(shape)):
            for y in range(posY, posY + len(shape)):
                if (copy.get((y, x), False) is False and
                        shape[y - posY][x -
                                        posX]  # shape is outside the matrix
                        or  # coordinate is occupied by something else which isn't a shadow
                        copy.get((y, x)) and shape[y - posY][x - posX]
                        and copy[(y, x)][0] != 'shadow'):

                    return False  # Blend failed; `shape` at `position` breaks the matrix

                elif shape[y - posY][x - posX]:
                    copy[(y, x)] = ('shadow',
                                    self.shadow_block) if shadow else (
                                        'block', self.tetromino_block)

        return copy

    def construct_surface_of_next_tetromino(self):
        """
        Draws the image of the next tetromino
        """
        shape = self.next_tetromino.shape
        surf = Surface((len(shape) * BLOCKSIZE, len(shape) * BLOCKSIZE),
                       pygame.SRCALPHA, 32)

        for y in range(len(shape)):
            for x in range(len(shape)):
                if shape[y][x]:
                    surf.blit(self.block(self.next_tetromino.color),
                              (x * BLOCKSIZE, y * BLOCKSIZE))
        return surf

    def create_board_representation(self):
        lines = []
        for y in range(MATRIX_HEIGHT):
            #Checks if row if full, for each row
            line = (y, [])
            for x in range(MATRIX_WIDTH):
                if self.matrix[(y, x)]:
                    line[1].append(1)
                else:
                    line[1].append(0)
            lines.append(line[1])
        board = []
        for i in range(len(lines)):
            board.append(lines[i])

        return board

    def serialize_agent(self):
        """
        Serializes the agent.
        This saves the epsilon value, whether holes or height was used and the current ANN of the agent.
        """
        agent_information = [
            self.agent.epsilon, self.agent.holes, self.agent.height,
            self.agent.current_net
        ]
        handler = open(self.agent.file_path + ".obj", 'wb')
        pickle.dump(agent_information, handler)
        handler.close()
示例#19
0
    def gen(self):
        fh = open(self.package_name + ".sv", "w")
        fh.write(self.header.replace("file_name", self.package_name + ".sv"))
        fh.write("`ifndef _%s_\n" % (self.package_name.upper()))
        fh.write("`define _%s_\n" % (self.package_name.upper()))
        fh.write("\n")
        fh.write("package %s;\n" % (self.package_name))
        fh.write("  import uvm_pkg::*;\n")
        fh.write("\n")
        fh.write("  `include \"%s.sv\"\n" % (self.defines_name))
        fh.write("  `include \"%s.sv\"\n" % (self.config_name))
        fh.write("  `include \"%s.sv\"\n" % (self.transaction_name))
        fh.write("  `include \"%s.sv\"\n" % (self.config_name))
        fh.write("  `include \"%s.sv\"\n" % (self.callback_name))
        fh.write("  `include \"%s.sv\"\n" % (self.cov_callback_name))
        fh.write("  `include \"%s.sv\"\n" % (self.master_driver_name))
        fh.write("  `include \"%s.sv\"\n" % (self.master_sequencer_name))
        fh.write("  `include \"%s.sv\"\n" % (self.master_sequence_name))
        fh.write("  `include \"%s.sv\"\n" % (self.slave_driver_name))
        fh.write("  `include \"%s.sv\"\n" % (self.slave_sequencer_name))
        fh.write("  `include \"%s.sv\"\n" % (self.slave_sequence_name))
        fh.write("  `include \"%s.sv\"\n" % (self.monitor_name))
        fh.write("  `include \"%s.sv\"\n" % (self.master_agent_name))
        fh.write("  `include \"%s.sv\"\n" % (self.slave_agent_name))
        fh.write("\n")
        fh.write("endpackage: %s\n" % (self.package_name))
        fh.write("\n")
        fh.write("`endif //_%s_\n" % (self.package_name.upper()))
        fh.close()

        #Generate agent components
        agent_defines = defines.defines(self.header, self.agent_setting)
        agent_defines.gen()

        agent_interface = interface.interface(self.header, self.agent_setting)
        agent_interface.gen()

        agent_cfg = cfg.cfg(self.header, self.agent_setting)
        agent_cfg.gen()

        agent_transaction = transaction.transaction(self.header,
                                                    self.agent_setting)
        agent_transaction.gen()

        agent_sequencer = sequencer.sequencer(self.header, self.agent_setting)
        agent_sequencer.sequencer_gen()

        agent_sequence = sequence.sequence(self.header, self.agent_setting)
        agent_sequence.sequence_gen()

        agent_drv = driver.driver(self.header, self.agent_setting)
        agent_drv.master_driver_gen()
        agent_drv.slave_driver_gen()

        agent_mon = monitor.monitor(self.header, self.agent_setting)
        agent_mon.monitor_gen()

        agent_callback = callback.callback(self.header, self.agent_setting)
        agent_callback.callback_gen()
        agent_callback.cov_callback_gen()

        agent_agent = agent.agent(self.header, self.agent_setting)
        agent_agent.agent_gen()
示例#20
0
def genesis(size):
    population = [agent() for _ in range(size)]
    population = genes(population)
    return population
示例#21
0
from arguments import get_args
from baselines.common.atari_wrappers import make_atari
from baselines import bench
from baselines import logger
from baselines.common.atari_wrappers import wrap_deepmind
from agent import agent
import os

if __name__ == '__main__':
    if not os.path.exists('logs/'):
        os.mkdir('logs/')
    envArgs = get_args()
    logAddress = 'logs/' + envArgs.env_name + '/'
    if not os.path.exists(logAddress):
        os.mkdir(logAddress)
    logger.configure(logAddress)
    # start to create the environment
    environment = make_atari(envArgs.env_name)
    environment = wrap_deepmind(environment, frame_stack=True)
    environment = bench.Monitor(environment, logger.get_dir())
    # train the agent
    trainer = agent(environment, envArgs)
    trainer.learn()
    environment.close()
示例#22
0
def init_agents():

    for k in range(2, 26):
        soc = random.normal(g.average_soc, g.sd_average_soc, 1)
        g.agents[k] = agent(soc, k, g.time)
        g.agents[k].copy_all = g.copy_al
示例#23
0
    def __init__(self,
                 name=None,
                 discount=0.9,
                 lr=None,
                 alpha=1,
                 policy_type='greedy',
                 policy_param={
                     'eps': 0.05,
                     'min_eps': 0.01,
                     'eps_decay': 0.9999
                 },
                 env=hb,
                 suits='rgbyp',
                 players=3,
                 mode='standard',
                 hidden_layers=[200, 200, 200, 150, 100],
                 batch_size=512,
                 l1=0,
                 optimizer='adagrad',
                 mem_size=2000,
                 max_steps=130,
                 plot_frequency=1,
                 discrete_agents=True,
                 Double_DQN_version=1,
                 accelerated=True,
                 games_per_epoch=100):

        self.name = name
        self.weights_dir = model_directory
        # if self.name == None:
        #   date = str(time.strftime('%m%d-%H%M'))
        #   self.name = f'{date}-{mode}-{suits}'
        if self.name != None:
            self.model_file = os.path.join(self.weights_dir, self.name + '.h5')
        self.env = hb.hanabi_env(players, suits, mode)
        self.iterations_done = 0
        self.gamma = discount
        self.learning_rate = lr
        self.alpha = alpha
        self.max_steps = max_steps
        self.policy_param = policy_param
        self.hidden_layers = hidden_layers
        self.discrete_agents = discrete_agents
        self.epoch = 0
        self.epoch_size = games_per_epoch
        self.epoch_history = {}
        self.epoch_history['steps'] = []
        self.epoch_history['rewards'] = []
        self.epoch_history['discounted_rewards'] = []
        self.epoch_history['rps'] = []
        self.epoch_history['loss'] = []
        self.batch_size = batch_size
        self.plot_frequency = plot_frequency
        self.suits = suits
        self.mem_size = mem_size
        self.players = players
        self.mode = mode
        self.l1 = l1
        self.Double_DQN_version = Double_DQN_version
        self.optimizer = get_optimizer(optimizer, lr)
        self.action_map = self._create_action_map()
        self.action_space = len(self.action_map)
        self.action_totals = [0] * self.action_space
        self.accelerated = accelerated
        move_func = self._create_valid_moves_function()
        self.policy = BehaviorPolicy(self.action_space,
                                     move_func,
                                     policy_type=policy_type,
                                     param=policy_param)
        if self.name != None and os.path.exists(self.model_file):
            self.online_model = models.load_model(self.model_file)
            self.target_model = models.load_model(self.model_file)
            self.target_model.name = 'target_' + self.target_model.name
        else:
            self.online_model = create_Q_model(self.env, self.action_space,
                                               self.optimizer,
                                               self.hidden_layers,
                                               self.learning_rate, self.l1,
                                               'online_model')
            self.online_model.name = 'online_model'
            self.target_model = create_Q_model(self.env, self.action_space,
                                               self.optimizer,
                                               self.hidden_layers,
                                               self.learning_rate, self.l1,
                                               'target_model')
            self.target_model.name = 'target_model'
        self._freeze_target_model()
        if self.accelerated:
            self.training_model = training_strategy.build_accelerated_model(
                self.Double_DQN_version, self.env.get_input_dim(),
                self.online_model, self.target_model,
                self.batch_size * self.players, self.optimizer,
                self.learning_rate, self.gamma)
            self._update_online_model = training_strategy.get_accelerated_update_strategy(
                self.action_space,
                training_model=self.training_model,
            )
        else:
            self._update_online_model = training_strategy.get_CPU_update_strategy(
                alpha, self.gamma, Double_DQN_version, self.online_model,
                self.target_model)
        self.player = []
        for playerID in range(self.players):
            self.player.append(
                agent(self.env, self.online_model, self.policy.choose_action,
                      self.mem_size, self.action_map, playerID))
示例#24
0
    def fuzz(self, this_node=None, path=[]):
        '''
        Call this routine to get the ball rolling. No arguments are necessary as they are
        both utilized internally during the recursive traversal of the session graph.

        @type  this_node: request (node)
        @param this_node: (Optional, def=None) Current node that is being fuzzed.
        @type  path:      List
        @param path:      (Optional, def=[]) Nodes along the path to the current one.
        '''

        # if no node is specified, we start from root and initialize the session.
        if not this_node:
            # we can't fuzz if we don't have at least one target and one request.
            if not self.transport_media.media_target():
                self.database.log("error",
                                  "no target specified for job %s" %\
                                  self.session_id)
                return

            if not self.edges_from(self.root.id):
                self.database.log("error",
                                  "no request specified for job %s" %\
                                  self.session_id)
                return

            this_node = self.root

            self.total_mutant_index = 0
            self.total_num_mutations = self.num_mutations()

        # If no errors above and not already connected to the agent, initialize the
        # agent connection.
        # If the agent cannot be initialized make sure the user is aware of it.

        if self.agent == None and self.agent_settings != None:
            try:
                self.agent = agent(self.root_dir, self.config, self.session_id,
                                   self.agent_settings)
                self.agent.connect()
            except Exception, ex:
                self.database.log("error",
                                  "failed to establish agent connection for job %s" %\
                                  self.session_id,
                                  str(ex))

                self.finished_flag = True
                self.stop_flag = True
                self.save_status()
                return

        # Get the agent to execute
            try:
                self.agent.start()
            except Exception, ex:
                self.database.log("error",
                                  "agent failed to execute command for job %s" %\
                                  self.session_id,
                                  str(ex))

                self.finished_flag = True
                self.stop_flag = True
                self.save_status()
                return
示例#25
0
#
# MAC0425/5730 - Inteligencia Artificial - EP1 @ 2013.2
# Autor: Bruno Nunes Leal Faria - nUSP: 8765551
#
# FILE: environment.py
#
import agent
import search
import time

a = agent.agent()

# enviroment class
# @map - multi dimensional array to hold mine map
# @agent - search type to execute
class environment:
	def __init__(self):
		self.map = [[]]
		self.graph = {}
		self.size = 0
		self.gold_count = 0
		self.gold_left = []
		self.gold_locations = []
		self.search_type = None
	
	# creates matrix from file
	def create_matrix(self, stream):
		x = 0
		y = 0
		# read dimension
		c = stream.readline()
示例#26
0
    def __init__(self, inputAgentSigmaNoise=0.1):
        pygame.init()
        # RGB color
        self.__white = (255, 255, 255)
        self.__black = (0, 0, 0)
        self.__red = (255, 0, 0)
        self.__green = (0, 155, 0)
        self.__blue = (0, 0, 255)

        # give the game a title
        pygame.display.set_caption("Keepaway")
        self.keeperScore = 0

        # these are more or less global variables..
        # I'm not sure if this is bad or not.
        self.__worldImage = pygame.image.load("images/soccer_field.png")
        self.__ballImage = pygame.image.load("images/ball.png")
        self.__keeperImage = pygame.image.load("images/keeper.png")
        self.__takerImage = pygame.image.load("images/taker.png")
        self.__predictedImage = pygame.image.load("images/x.png")
        self.__debugYellowDotImage = pygame.image.load("images/yellow_dot.png")
        self.__debugRedDotImage = pygame.image.load("images/red_dot.png")
        # block sizes are used for collision detection
        # only 1 size per element because all blocks are squares. block size = side length
        self.__agent_block_size = 23
        self.ball_block_size = 12

        self.maxBallSpeed = 4
        self.maxPlayerSpeed = 2

        # dimensions of the game are the same as the soccer field image
        self.__display_width = 550
        self.display_height = 357
        self.__field_center = (self.__display_width / 2, self.display_height / 2)
        # gameDisplay is a pygame.surface object. it's your screen
        self.gameDisplay = pygame.display.set_mode((self.__display_width, self.display_height))
        self.test_fps = 60
        self.train_fps = 10000
        self.clock = pygame.time.Clock()

        # start the ball kinda close to the keeper in the upper left corner
        self.fieldBall = ball.ball((self.__field_center[0] / 4, self.__field_center[1] / 4), self.maxBallSpeed)

        # setup all the initial keepers and takers. They are all starting at different field positions, which is why
        # you can't have a for loop just iterate and declare all of them
        types = ["keeper", "taker"]
        self.agentSigmaError = inputAgentSigmaNoise
        self.keeperArray = []
        self.keeperTruePosArray = []
        self.keeperTruePosArray.append((12.5, 12.5))
        self.keeperTruePosArray.append((25, self.__display_width - 37.5))
        self.keeperTruePosArray.append((self.display_height - 37.5, self.__display_width - 37.5))
        self.keeperArray.append(
            agent.agent(
                self,
                0,
                kUtil.getNoisyVals(self.keeperTruePosArray[0], self.agentSigmaError),
                self.agentSigmaError,
                types[0],
                kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError),
                self.maxPlayerSpeed,
                self.maxBallSpeed,
            )
        )
        self.keeperArray.append(
            agent.agent(
                self,
                1,
                kUtil.getNoisyVals(self.keeperTruePosArray[1], self.agentSigmaError),
                self.agentSigmaError,
                types[0],
                kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError),
                self.maxPlayerSpeed,
                self.maxBallSpeed,
            )
        )
        self.keeperArray.append(
            agent.agent(
                self,
                2,
                kUtil.getNoisyVals(self.keeperTruePosArray[2], self.agentSigmaError),
                self.agentSigmaError,
                types[0],
                kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError),
                self.maxPlayerSpeed,
                self.maxBallSpeed,
            )
        )

        self.takerArray = []
        self.takerTruePosArray = []
        self.takerTruePosArray.append((self.display_height - 25, 25))
        self.takerTruePosArray.append((self.display_height - 37.5, 50))
        self.takerArray.append(
            agent.agent(
                self,
                0,
                self.takerTruePosArray[0],
                self.agentSigmaError,
                types[1],
                kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError),
                self.maxPlayerSpeed,
                self.maxBallSpeed,
            )
        )
        self.takerArray.append(
            agent.agent(
                self,
                1,
                self.takerTruePosArray[1],
                self.agentSigmaError,
                types[1],
                kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError),
                self.maxPlayerSpeed,
                self.maxBallSpeed,
            )
        )

        # 3 different font sizes
        self.smallfont = pygame.font.SysFont("comicsansms", 25)  # 25 is font sizes
        self.medfont = pygame.font.SysFont("comicsansms", 50)
        self.largefont = pygame.font.SysFont("comicsansms", 80)
        self.verysmallfont = pygame.font.SysFont("comicsansms", 12)
示例#27
0
文件: main.py 项目: afcarl/Gridgame
from grid import grid
from gridQController import gridQController
from agent import agent
import matplotlib.pyplot as plt

gridSize = 5

grid = grid(gridSize)
controller = gridQController(gridSize)
agentSmith = agent(grid, controller)

iterations = 200000

rewards = [0] * iterations

for i in range(0, iterations):
    agentSmith.step()
    if i > 100000:
        controller.setGreed(1)
    rewards[i] = agentSmith.getReward()

plt.plot(rewards)
plt.show()
print("Total reward: " + str(agentSmith.getReward()) + " Iterations: " +
      str(iterations) + " Success rate: " +
      str(agentSmith.getReward() / iterations))
示例#28
0
 def __init__(self, phone_number, pin):
     self.agent = agent()
     self.agent.autho(phone_number, pin)
     self.steps = []
示例#29
0
	def __init__(self, inputAgentSigmaNoise = .1, alreadyTrained = True, bevCustomTileSize = None):
		pygame.init()
		#RGB color
		self.__white = (255,255,255) 
		self.__black = (0,0,0)
		self.__red = (255,0,0)
		self.__green = (0,155,0)
		self.__blue = (0,0,255)
		
		#give the game a title
		pygame.display.set_caption('Keepaway')
		self.keeperScore = 0
		
		#these are more or less global variables..
		#I'm not sure if this is bad or not. 
		self.__worldImage = pygame.image.load('images/soccer_field.png')
		self.__ballImage = pygame.image.load('images/ball.png')
		self.__keeperImage = pygame.image.load('images/keeper.png')
		self.__keeperGoldImage = pygame.image.load('images/keeperGold.png')
		self.__takerImage = pygame.image.load('images/taker.png')
		self.__predictedImage = pygame.image.load('images/x.png')
		self.__debugYellowDotImage = pygame.image.load('images/yellow_dot.png')
		self.__debugRedDotImage = pygame.image.load('images/red_dot.png')
		self.__debugBlackDotImage = pygame.image.load('images/black_dot.png')
		self.__debugWhiteDotImage = pygame.image.load('images/white_dot.png')
		self.__debugBlueDotImage = pygame.image.load('images/blue_dot.png')
		self.__debugTakerPathTile = pygame.image.load('images/takerPathSquare.png')
		self.__debugKeeperPathTile = pygame.image.load('images/keeperPathSquare.png')
		self.__debugKeeperTile = pygame.image.load('images/keeperSquare.png')
		self.__debugTakerTile = pygame.image.load('images/takerSquare.png')
		self.__debugEmptyTile = pygame.image.load('images/emptySquare.png')
		self.__debugTakerPathTileTwo = pygame.image.load('images/takerPathSquare2.png')
		self.__debugKeeperPathTileTwo = pygame.image.load('images/keeperPathSquare2.png')
		#block sizes are used for collision detection
		#only 1 size per element because all blocks are squares. block size = side length
		self.__agent_block_size = 23
		self.__ball_block_size = 12

		self.maxBallSpeed= 4
		self.maxPlayerSpeed = 2
		#self.rDecision = None

		
		#dimensions of the game are the same as the soccer field image
		self.__display_width = 550
		self.__display_height = 357
		self.displayGraphics = True
		self.__field_center = (self.__display_width / 2 , self.__display_height / 2)
		#gameDisplay is a pygame.surface object. it's your screen
		self.gameDisplay = pygame.display.set_mode((self.__display_width,self.__display_height))
		self.test_fps = 60
		self.train_fps = 10000
		self.clock = pygame.time.Clock()
		
		
		#start the ball kinda close to the keeper in the upper left corner
		self.fieldBall = ball.ball( (self.__field_center[0]/4, self.__field_center[1]/4), self.maxBallSpeed)
		
		#the simple state variables for agents like NEAT, novelty search, and maybe sarsa
		self.simpleStateVars = None
		
		self.alreadyTrained = alreadyTrained  #False if you want agent to learn and True if you want to demo
		
		#setup all the initial keepers and takers. They are all starting at different field positions, which is why
		#you can't have a for loop just iterate and declare all of them
		types = ["keeper", "taker"]
		self.agentSigmaError = inputAgentSigmaNoise
		self.keeperArray = []
		self.keeperTruePosArray = []
		self.keeperTruePosArray.append((12.5, 12.5))
		self.keeperTruePosArray.append((25,  self.__display_width - 37.5))
		self.keeperTruePosArray.append((self.__display_height - 37.5,  self.__display_width - 37.5))
		self.keeperArray.append(agent.agent(self, 0, kUtil.getNoisyVals( self.keeperTruePosArray[0], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed))
		self.keeperArray.append(agent.agent(self, 1, kUtil.getNoisyVals( self.keeperTruePosArray[1], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed))
		self.keeperArray.append(agent.agent(self, 2, kUtil.getNoisyVals( self.keeperTruePosArray[2], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed))
		
		self.takerArray = []
		self.takerTruePosArray = []
		self.takerTruePosArray.append((self.__display_height - 25,  25))
		self.takerTruePosArray.append((self.__display_height - 37.5,  50))
		self.takerArray.append(agent.agent(self, 0, self.takerTruePosArray[0], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed))
		self.takerArray.append(agent.agent(self, 1, self.takerTruePosArray[1], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed))
		
		#3 different font sizes 
		self.smallfont = pygame.font.SysFont("comicsansms",25) #25 is font sizes
		self.medfont = pygame.font.SysFont("comicsansms",50) 
		self.largefont = pygame.font.SysFont("comicsansms",80) 
		self.verysmallfont = pygame.font.SysFont("comicsansms", 12)
		
		#birdsEyeView generator for agents like hyperNEAT:
		if bevCustomTileSize == None:
			bevCustomTileSize = self.__agent_block_size
		self.bev = birdsEyeView.birdsEyeView(self.__display_width, self.__display_height, bevCustomTileSize, self.__ball_block_size )
		self.bev_grid_as_grid = self.bev.getBirdsEyeView(self.keeperArray, self.takerArray);
		self.bev_grid_as_list = self.bev.getBirdsEyeViewAsList(self.keeperArray, self.takerArray);
		self.bev_substrate = self.bev.getSubstrate(self.keeperArray, self.takerArray);
		self.bev_keeper_sub_index = self.bev.getBallHolderTile(self.keeperArray)
示例#30
0
    sigmaM = 10.
    sigmaN = 1.

# time length for ticks per time period
deltaT = 100
# holder list for agent objects
agentList = []
# price, return, and volume time series
price = pf * np.ones(Tmax + 1)
ret = np.zeros(Tmax + 1)
totalV = np.zeros(Tmax + 1)
rprice = np.zeros((Tmax + 1) / 100)

# create agents in list of objects
for i in range(nAgents):
    agentList.append(agent(sigmaF, sigmaM, sigmaN, kMax, Lmin, Lmax))
# create set of
forecastSet = forecasts(Lmax, pf, sigmae)
# create order book
marketBook = orderBook(600., 1400., deltaP)
# set up initial prices

price[0:Tinit] = pf * (1. + 0.001 * np.random.randn(Tinit))
ret[0:Tinit] = 0.001 * np.random.randn(Tinit)

for t in range(Tinit, Tmax):
    # update all forecasts
    forecastSet.updateForecasts(t, price[t], ret)
    tradePrice = -1
    # draw random agent
    randomAgent = agentList[np.random.randint(1, nAgents)]
示例#31
0
文件: regul.py 项目: CACNTAP/dangdang
def agent_ip():
    agentout = open('agent_ip.txt','w')
    ag = agent()
    ag.get_ip(agentout)
    agentout.close()
示例#32
0
    parser.add_argument("--exploration", type=float, default=0.2)
    parser.add_argument("--save_freq", type=int, default=100)
    parser.add_argument("--save_folder", type=str, default="model")
    parser.add_argument("--reload", type=str, default=None)
    args = parser.parse_args()

    gmm = gameMgr.tetris(20, 10)
    epoch = 0
    write_epoch = 100
    reward_history = collections.deque(maxlen=1000)
    loss_history = collections.deque(maxlen=1000)
    agt = agent.agent(gmm.getActionList(),
                      gmm.getStateSize(),
                      n_batch=args.batch_size,
                      replay_size=args.replay_size,
                      learning_rate=args.learn_rate,
                      discountRate=args.discount_rate,
                      saveFreq=args.save_freq,
                      saveFolder=args.save_folder,
                      memoryLimit=args.memory_limit)
    if args.reload: agt.load(args.reload)

    fig = plt.figure(figsize=(gmm.getScreenSize()[0], gmm.getScreenSize()[1]))
    fig.canvas.set_window_title("TeTris")
    setFile = file(os.path.join(args.save_folder, "settings.dat"), "w")
    setFile.write(str(args))
    setFile.close()
    logFile = file(os.path.join(args.save_folder, "log.dat"), "w")
    logCSV = csv.writer(logFile)
    logCSV.writerow([
        "epoch", "last_loss", "loss_mean", "last_reward", "mean_reward",
示例#33
0
def parameter_camp_test(parameter_list):
    """
    This function should take a camp ID, train an agent for that specific campaign
    and then test the agent for that campaign. We start by defining the hyper-parameters.
    It (currently) takes the whole campaign as an episode.
    """

    epsilon_max = 0.9
    epsilon_min = 0.05
    discount_factor = 1
    batch_size = 32
    memory_cap = 100000
    update_frequency = 100
    episode_length = 96

    camp_id = parameter_list[0]
    budget_scaling = parameter_list[1]
    initial_Lambda = parameter_list[2]
    epsilon_decay_rate = parameter_list[3]
    budget_init_var = parameter_list[4] * budget_scaling
    step_length = parameter_list[5]
    learning_rate = parameter_list[6]
    seed = parameter_list[7]

    action_size = 7
    state_size = 5
    tf.reset_default_graph()
    np.random.seed(seed)
    tf.set_random_seed(seed)
    sess = tf.Session()
    rtb_agent = agent(epsilon_max, epsilon_min, epsilon_decay_rate,
                      discount_factor, batch_size, memory_cap, state_size,
                      action_size, learning_rate, sess)

    camp_n = [
        '1458', '2259', '2997', '2821', '3358', '2261', '3386', '3427', '3476'
    ]
    train_file_dict, test_file_dict = get_data(camp_n)
    test_file_dict = test_file_dict[camp_id]
    total_budget = 0
    total_impressions = 0
    global_step_counter = 0

    for i in camp_n:
        rtb_environment = RTB_environment(train_file_dict[i], episode_length,
                                          step_length)
        total_budget += train_file_dict[i]['budget']
        total_impressions += train_file_dict[i]['imp']
        while rtb_environment.data_count > 0:
            episode_size = min(episode_length * step_length,
                               rtb_environment.data_count)
            budget = train_file_dict[i]['budget'] * min(rtb_environment.data_count, episode_size) \
                     / train_file_dict[i]['imp'] * budget_scaling
            budget = np.random.normal(budget, budget_init_var)

            state, reward, termination = rtb_environment.reset(
                budget, initial_Lambda)
            while not termination:
                action, _, _ = rtb_agent.action(state)
                next_state, reward, termination = rtb_environment.step(action)

                memory_sample = (action, state, reward, next_state,
                                 termination)
                rtb_agent.replay_memory.store_sample(memory_sample)
                rtb_agent.q_learning()
                if global_step_counter % update_frequency == 0:
                    rtb_agent.target_network_update()

                rtb_agent.e_greedy_policy.epsilon_update(global_step_counter)
                state = next_state
                global_step_counter += 1

    epsilon = rtb_agent.e_greedy_policy.epsilon
    budget = total_budget / total_impressions * test_file_dict[
        'imp'] * budget_scaling
    imp, click, cost, wr, ecpc, ecpi, camp_info = drlb_test(
        test_file_dict, budget, initial_Lambda, rtb_agent, episode_length,
        step_length)
    sess.close()
    lin_bid_result = list(
        lin_bidding_test(train_file_dict[camp_id], test_file_dict, budget,
                         'historical'))
    rand_bid_result = list(
        rand_bidding_test(train_file_dict[camp_id], test_file_dict, budget,
                          'uniform'))

    result_dict = {
        'camp_id': camp_id,
        'parameters': parameter_list[1:],
        'epsilon': epsilon,
        'total budget': budget,
        'auctions': test_file_dict['imp'],
        'camp_result': np.array([imp, click, cost, wr, ecpc, ecpi]).tolist(),
        'budget': camp_info[0],
        'lambda': camp_info[1],
        'unimod': camp_info[2],
        'action values': camp_info[3],
        'lin_bid_result': lin_bid_result,
        'rand_bid_result': rand_bid_result
    }
    return result_dict
示例#34
0
SAVE_FREQ = 50 # episodes between saving the network
SAVE_PATH = "ai.data"

MIN_NO_OPS = 4 # lower limit of the no-ops inserted at the start of each episode
MAX_NO_OPS = 30 # upper limit of the no-ops inserted at the start of each episode


"""
The main training loop.
A random number of no-ops is inserted at the start of each episode.
The DQN is saved periodically.
"""

def training_loop(env, ai, total_frames):
    while(env.frame_count < total_frames):
        t0 = time.time()
        no_ops = random.randrange(MIN_NO_OPS, MAX_NO_OPS + 1)
        reward = env.run_episode(ai, no_ops)
        print(env.episode, env.frame_count, reward, time.time()-t0)
        
        if (env.episode % SAVE_FREQ) == SAVE_FREQ - 1:
            ai.save(SAVE_PATH)


# Running the training

env = enviroment(GAME)
ai = agent(env.n_actions)
training_loop(env, ai, TOTAL_FRAMES)
示例#35
0
	if gpus:
		try:
			# Pour avoir 2 tensorflow en meme temps ( tensorboard & model )
			for gpu in gpus:
				tf.config.experimental.set_memory_growth(gpu, True)
				
			#Liste GPU
			logical_gpus = tf.config.experimental.list_logical_devices('GPU')
			print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
			
		except RuntimeError as e:
			print(e)

    
	# Creation de l'agent et de l'environnement
	agent = agent(MODEL_NAME, RESTART)
	env = environement(IP, PORT, S_PER_EPISODE)
	env.startClient()

	
	# On lance le thread de training
	trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
	trainer_thread.start()
	
	#On attend que le thread soit OK
	while not agent.training_initialized:
		time.sleep(0.01)


	#On genere une prediction, car la 1ere est toujours longue
	print(agent.get_qs(np.ones((1, 16))))
示例#36
0
def get_agent(features, players, args):
    agent = ag.agent(features, players)
    if args.load:
        agent.model = load_model(args.load)
    return agent
示例#37
0
def run(rule, attacker, epochs):
    torch.manual_seed(1)

    start_time = time.time()

    N = 10
    N1 = 5
    tr_split_len1 = 2000
    te_split_len1 = 400
    tr_split_len2 = 2000
    te_split_len2 = 400
    A = []
    train_data1, test_data1 = readData_mnist()
    train_data2, test_data2 = readData_synthetic_digits()
    remaining_tr, remaining_te = train_data2, test_data2

    Parameters = []

    # attacker_num = 2
    # attacker = [2, 7]

    attacker_num = len(attacker)
    # Accumulated_Loss = np.zeros((N, N))
    Accumulated_Loss = np.ones((N, N))

    average_train_loss, average_train_acc = [], []
    average_test_loss, average_test_acc = [], []

    individual_average_train_loss, individual_average_train_acc = np.zeros(
        (epochs, N)), np.zeros((epochs, N))
    individual_average_test_loss, individual_average_test_acc = np.zeros(
        (epochs, N)), np.zeros((epochs, N))

    for k in range(0, N):
        net = Net().to(device)
        # print(net)
        # summary(net, (1,28,28), batch_size=-1)
        a = agent(net)
        A.append(a)
        Parameters.append({})

        for name, param in a.net.named_parameters():
            if param.requires_grad:
                Parameters[k][name] = param.data

    for epoch in range(epochs):
        print('epoch {}'.format(epoch + 1))
        Train_loader_iter = []
        Test_loader = []
        total_train_loss = 0.
        total_train_acc = 0.
        total_eval_loss = 0.
        total_eval_acc = 0.
        remaining_tr, remaining_te = train_data2, test_data2

        Count = np.zeros((N, ))

        ave_train_loss = 0.
        ave_train_acc = 0.
        ave_eval_loss = 0.
        ave_eval_acc = 0.
        nanCount = 0

        for k in range(0, N):
            a = A[k]
            a.train_loss = 0.
            a.train_acc = 0.

            if k < N1:
                train_loader_no, test_loader_no = generateData_mnist(
                    train_data1, test_data1, tr_split_len1, te_split_len1, k)
            else:
                train_loader_no, test_loader_no, remaining_tr, remaining_te = generateData_synthetic_digits(
                    remaining_tr, remaining_te, tr_split_len2, te_split_len2)

            Train_loader_iter.append(iter(train_loader_no))
            Test_loader.append(test_loader_no)

        # for iteration in range(0, tr_split_len//64):
        # for k in range(0, N):
        # training-----------------------------
        try:
            while True:
                A_last = deepcopy(A)
                Batch_X, Batch_Y = {}, {}
                for k in range(0, N):
                    batch_x, batch_y = next(Train_loader_iter[k])
                    Batch_X[k] = batch_x.to(device)
                    Batch_Y[k] = batch_y.to(device)
                    if k in attacker:
                        continue
                    # 5 agents, get access to 1, 1/2, 1/3, 1/5, 1/10 data, so their models have different accuracy
                    if k % 5 == 0:
                        if random.randint(0, 1) in [0]:
                            continue
                    if k % 5 == 1:
                        if random.randint(0, 2) in [0, 1]:
                            continue
                    if k % 5 in [2, 3]:
                        if random.randint(0, 3) in [0, 1, 2]:
                            continue
                    # if k % 5 == 3:
                    #     if random.randint(0, 9) in [0,1,2,3,4,5,6,7,8]:
                    #         continue
                    a = A[k]
                    loss, acc = a.optimize(batch_x.to(device),
                                           batch_y.to(device))
                    total_train_loss += loss
                    total_train_acc += acc
                    Count[k] += len(batch_x)

                A, Accumulated_Loss = cooperation(A, A_last, Batch_X, Batch_Y,
                                                  Accumulated_Loss, rule,
                                                  attacker)
                # print(Accumulated_Loss)

        except StopIteration:
            # print(iteration)
            Eval_count = np.zeros((N, ))
            for k in range(0, N):
                if k in attacker:
                    continue
                print('Agent: {:d}, Train Loss: {:.6f}, Acc: {:.6f}'.format(
                    k, A[k].train_loss / Count[k], A[k].train_acc / Count[k]))
                individual_average_train_loss[epoch,
                                              k] = A[k].train_loss / Count[k]
                individual_average_train_acc[epoch,
                                             k] = A[k].train_acc / Count[k]

                if not (math.isnan(A[k].train_loss / Count[k])
                        or math.isnan(A[k].train_acc / Count[k])):
                    ave_train_loss += A[k].train_loss / Count[k]
                    ave_train_acc += A[k].train_acc / Count[k]
                else:
                    nanCount += 1

                # evaluation--------------------------------
                A[k].net.eval()
                eval_loss = 0.
                eval_acc = 0.
                for batch_x, batch_y in Test_loader[k]:
                    batch_x, batch_y = Variable(
                        batch_x, volatile=True).to(device), Variable(
                            batch_y, volatile=True).to(device)
                    out = A[k].net(batch_x)
                    loss_func = torch.nn.CrossEntropyLoss()
                    loss = loss_func(out, batch_y)
                    eval_loss += loss.item()
                    total_eval_loss += loss.item()
                    pred = torch.max(out, 1)[1]
                    num_correct = (pred == batch_y).sum()
                    eval_acc += num_correct.item()
                    total_eval_acc += num_correct.item()
                    Eval_count[k] += len(batch_x)

                if not (math.isnan(eval_loss / Eval_count[k])
                        or math.isnan(eval_acc / Eval_count[k])):
                    ave_eval_loss += eval_loss / Eval_count[k]
                    ave_eval_acc += eval_acc / Eval_count[k]
                print('Agent: {:d}, Test Loss: {:.6f}, Acc: {:.6f}'.format(
                    k, eval_loss / Eval_count[k], eval_acc / Eval_count[k]))
                individual_average_test_loss[epoch,
                                             k] = eval_loss / Eval_count[k]
                individual_average_test_acc[epoch,
                                            k] = eval_acc / Eval_count[k]

        # print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format(total_train_loss / sum(Count),
        #                                                                    total_train_acc / sum(Count)))
        # average_train_loss.append(total_train_loss / sum(Count))
        # average_train_acc.append(total_train_acc / sum(Count))
        # print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format(total_eval_loss / sum(Eval_count),
        #                                                                  total_eval_acc / sum(Eval_count)))
        #
        # print('Training time by far: {:.2f}s'.format(time.time() - start_time))
        # average_test_loss.append(total_eval_loss / sum(Eval_count))
        # average_test_acc.append(total_eval_acc / sum(Eval_count))

        print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format(
            ave_train_loss / (N - nanCount - attacker_num),
            ave_train_acc / (N - nanCount - attacker_num)))
        average_train_loss.append(ave_train_loss /
                                  (N - nanCount - attacker_num))
        average_train_acc.append(ave_train_acc / (N - nanCount - attacker_num))
        print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format(
            ave_eval_loss / (N - attacker_num),
            ave_eval_acc / (N - attacker_num)))

        print('Training time by far: {:.2f}s'.format(time.time() - start_time))
        average_test_loss.append(ave_eval_loss / (N - attacker_num))
        average_test_acc.append(ave_eval_acc / (N - attacker_num))

        if epoch % 10 == 0 or epoch == epochs - 1:
            if attacker_num == 0:
                try:
                    os.makedirs("results")
                except OSError:
                    print("Creation of the directory %s failed")
                np.save('results/average_train_loss_%s.npy' % rule,
                        average_train_loss)
                np.save('results/average_train_acc_%s.npy' % rule,
                        average_train_acc)
                np.save('results/average_test_loss_%s.npy' % rule,
                        average_test_loss)
                np.save('results/average_test_acc_%s.npy' % rule,
                        average_test_acc)
                np.save('results/individual_average_train_loss_%s.npy' % rule,
                        individual_average_train_loss)
                np.save('results/individual_average_train_acc_%s.npy' % rule,
                        individual_average_train_acc)
                np.save('results/individual_average_test_loss_%s.npy' % rule,
                        individual_average_test_loss)
                np.save('results/individual_average_test_acc_%s.npy' % rule,
                        individual_average_test_acc)
            else:
                try:
                    os.makedirs("results/attacked/%d" % attacker_num)
                except OSError:
                    print("Creation of the directory %s failed")
                np.save(
                    'results/attacked/%d/average_train_loss_%s.npy' %
                    (attacker_num, rule), average_train_loss)
                np.save(
                    'results/attacked/%d/average_train_acc_%s.npy' %
                    (attacker_num, rule), average_train_acc)
                np.save(
                    'results/attacked/%d/average_test_loss_%s.npy' %
                    (attacker_num, rule), average_test_loss)
                np.save(
                    'results/attacked/%d/average_test_acc_%s.npy' %
                    (attacker_num, rule), average_test_acc)
                np.save(
                    'results/attacked/%d/individual_average_train_loss_%s.npy'
                    % (attacker_num, rule), individual_average_train_loss)
                np.save(
                    'results/attacked/%d/individual_average_train_acc_%s.npy' %
                    (attacker_num, rule), individual_average_train_acc)
                np.save(
                    'results/attacked/%d/individual_average_test_loss_%s.npy' %
                    (attacker_num, rule), individual_average_test_loss)
                np.save(
                    'results/attacked/%d/individual_average_test_acc_%s.npy' %
                    (attacker_num, rule), individual_average_test_acc)
示例#38
0
文件: main.py 项目: andresbayuelo1/RL
import pygame
from Grid import Grid
from consts import SCREEN_SIZE
import agent

pygame.init()
pygame.display.set_caption('2048')
pygame.font.init()
clock = pygame.time.Clock()
clock.tick(60)

screen = pygame.display.set_mode([SCREEN_SIZE, SCREEN_SIZE])
screen.fill((127, 127, 127))

g = Grid(pygame, screen)
a = agent.agent()
running = True
agent_playing = False
g.render()
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_i:
                agent_playing = not agent_playing
                g.reset(hardreset=True)
                break
            if event.key == pygame.K_r:
                g.reset()
示例#39
0
 def test_angleAgent(self):
     a1 = agent.agent((0.010631645330612073, 5.000750148780534), self.unitTestSigma, "Keeper", (0, 0))
     a2 = agent.agent((-0.008793653992994898, -0.0003569779220770502), self.unitTestSigma, "Taker", (0, 0))
     a3 = agent.agent((5.000443882611892, -0.017223221164217175), self.unitTestSigma, "Keeper", (0, 0))
     self.assertAlmostEqual(__getCosAngle(a1, a2, a3), 0, 1)
示例#40
0
文件: game.py 项目: ShaggO/xmp
    'cin':   pChans[6].reader(),
    'cout':  pChans[7].writer(),
    'cnote': pChans[8].writer(),
    'items': []})

pObjects.append({
    'name':  'Agent #2',
    'cin':   pChans[9].reader(),
    'cout':  pChans[10].writer(),
    'cnote': pChans[11].writer(),
    'items': []})

# Initialize client/player processes
pAgents = []
pAgents.append(player.player(-pChans[0], +pChans[1], +pChans[2]))
pAgents.append(agent.agent(-pChans[3], +pChans[4], +pChans[5], 'Agent #0'))
pAgents.append(agent.agent(-pChans[6], +pChans[7], +pChans[8], 'Agent #1'))
pAgents.append(agent.agent(-pChans[9], +pChans[10], +pChans[11], 'Agent #2'))


##### Initialize world
# Room channels
rChans = Channel() * 4
# Room layout:
#  0  1
#  3  2

# Room 0
# Add all player objects
rooms = []
rooms.append(room.room(
示例#41
0
def run(rule, attacker, epochs):
    torch.manual_seed(0)

    start_time = time.time()

    N = 30
    A = []
    batch_size = 10
    train_data, test_data = readData()

    Parameters = []

    attacker_num = len(attacker)

    # Accumulated_Loss = np.zeros((N, N))
    Accumulated_Loss = np.ones((N, N))
    middle1_neurons = 50

    Train_loader, Test_loader = [], []
    Val_loader_iter = []
    Val_loader = []

    average_train_loss, average_train_acc = [], []
    average_test_loss, average_test_acc = [], []

    individual_average_train_loss, individual_average_train_acc = np.zeros(
        (epochs, N)), np.zeros((epochs, N))
    individual_average_test_loss, individual_average_test_acc = np.zeros(
        (epochs, N)), np.zeros((epochs, N))

    for k in range(0, N):
        # net = Net(n_feature=561, n_hidden1=middle1_neurons, n_output=6)
        net = linearRegression(561, 6)
        a = agent(net)
        A.append(a)

        train_loader_no, val_loader_no, test_loader_no = generateData(
            train_data, test_data, k + 1, batch_size)
        Train_loader.append(train_loader_no)
        Test_loader.append(test_loader_no)
        Val_loader.append(val_loader_no)
        Val_loader_iter.append(iter(val_loader_no))

    for epoch in range(epochs):
        print('epoch {}'.format(epoch + 1))
        Train_loader_iter = []
        total_train_loss = 0.
        total_train_acc = 0.
        total_eval_loss = 0.
        total_eval_acc = 0.

        Count = np.zeros((N, ))

        ave_train_loss = 0.
        ave_train_acc = 0.
        ave_eval_loss = 0.
        ave_eval_acc = 0.
        nanCount = 0

        for k in range(0, N):
            a = A[k]
            a.train_loss = 0.
            a.train_acc = 0.
            Train_loader_iter.append(iter(Train_loader[k]))

        try:
            while True:
                A_last = deepcopy(A)
                Batch_X, Batch_Y = {}, {}
                for k in range(0, N):
                    # if k in attacker:
                    #     continue
                    batch_x, batch_y = Train_loader_iter[k].next()
                    Batch_X[k] = batch_x
                    Batch_Y[k] = batch_y
                    # only process 1/10 data for 1/3 of agents
                    if k % 3 == 0:
                        if random.randint(
                                0, 10) in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
                            continue

                    # if k % 3 == 0:
                    #     train_loader = Train_loader_iter[k].next()
                    #     batch_x, batch_y = (train_loader[0]).narrow(0,0,1), (train_loader[1]).narrow(0,0,1)
                    # else:
                    #     batch_x, batch_y = Train_loader_iter[k].next()
                    #
                    # Batch_X.append(batch_x)
                    # Batch_Y.append(batch_y)

                    # if k % 3 == 0:
                    #     if random.randint(0, 5) == 1:
                    #         pass
                    # batch_x = gaussian(batch_x, 5, 5)
                    # batch_y = torch.LongTensor(np.random.randint(6, size=batch_size))
                    # if random.randint(0, 2) == 1:
                    #     batch_y = torch.LongTensor(np.random.randint(6, size=batch_size))
                    # if (k+1) % 5 == 0:
                    #     try:
                    #         batch_x, batch_y = Train_loader_iter[k].next()
                    #     except:
                    #         Train_loader_iter[k] = iter(Train_loader[k])
                    #         batch_x, batch_y = Train_loader_iter[k].next()
                    # else:
                    #     batch_x, batch_y = Train_loader_iter[k].next()
                    a = A[k]
                    loss, acc = a.optimize(batch_x, batch_y)
                    if math.isnan(loss) or math.isnan(acc):
                        continue
                    total_train_acc += acc
                    # try:
                    #     val_x, val_y = Val_loader_iter[k].next()
                    # except:
                    #     Val_loader_iter[k] = iter(Val_loader[k])
                    #     val_x, val_y = Val_loader_iter[k].next()
                    # Batch_X.append(val_x)
                    # Batch_Y.append(val_y)
                    Count[k] += len(batch_x)
                A, Accumulated_Loss = cooperation(A, A_last, Batch_X, Batch_Y,
                                                  Accumulated_Loss, rule,
                                                  attacker)
                # print(Accumulated_Loss)

        except StopIteration:
            # print(iteration)
            Eval_count = np.zeros((N, ))
            for k in range(0, N):
                if k in attacker:
                    continue
                print('Agent: {:d}, Train Loss: {:.6f}, Acc: {:.6f}'.format(
                    k, A[k].train_loss / Count[k], A[k].train_acc / Count[k]))
                individual_average_train_loss[epoch,
                                              k] = A[k].train_loss / Count[k]
                individual_average_train_acc[epoch,
                                             k] = A[k].train_acc / Count[k]

                if not (math.isnan(A[k].train_loss / Count[k])
                        or math.isnan(A[k].train_acc / Count[k])):
                    ave_train_loss += A[k].train_loss / Count[k]
                    ave_train_acc += A[k].train_acc / Count[k]
                else:
                    nanCount += 1

                # evaluation--------------------------------
                A[k].net.eval()
                eval_loss = 0.
                eval_acc = 0.
                for batch_x, batch_y in Test_loader[k]:
                    batch_x, batch_y = Variable(
                        batch_x, volatile=True), Variable(batch_y,
                                                          volatile=True)
                    out = A[k].net(batch_x)
                    loss_func = torch.nn.CrossEntropyLoss()
                    loss = loss_func(out, batch_y)
                    pred = torch.max(out, 1)[1]
                    num_correct = (pred == batch_y).sum()
                    if math.isnan(loss) or math.isnan(num_correct):
                        continue
                    eval_loss += loss.item()
                    eval_acc += num_correct.item()
                    total_eval_loss += loss.item()
                    total_eval_acc += num_correct.item()
                    Eval_count[k] += len(batch_x)

                if not (math.isnan(eval_loss / Eval_count[k])
                        or math.isnan(eval_acc / Eval_count[k])):
                    ave_eval_loss += eval_loss / Eval_count[k]
                    ave_eval_acc += eval_acc / Eval_count[k]
                print('Agent: {:d}, Test Loss: {:.6f}, Acc: {:.6f}'.format(
                    k, eval_loss / Eval_count[k], eval_acc / Eval_count[k]))
                individual_average_test_loss[epoch,
                                             k] = eval_loss / Eval_count[k]
                individual_average_test_acc[epoch,
                                            k] = eval_acc / Eval_count[k]

        try:
            print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format(
                ave_train_loss / (N - nanCount - attacker_num),
                ave_train_acc / (N - nanCount - attacker_num)))
            average_train_loss.append(ave_train_loss /
                                      (N - nanCount - attacker_num))
            average_train_acc.append(ave_train_acc /
                                     (N - nanCount - attacker_num))
            print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format(
                ave_eval_loss / (N - attacker_num),
                ave_eval_acc / (N - attacker_num)))
        except:
            pass

        print('Training time by far: {:.2f}s'.format(time.time() - start_time))
        average_test_loss.append(ave_eval_loss / (N - attacker_num))
        average_test_acc.append(ave_eval_acc / (N - attacker_num))

        if epoch % 10 == 0 or epoch == epochs - 1:
            if attacker_num == 0:
                try:
                    os.makedirs("results")
                except OSError:
                    print("Creation of the directory %s failed")
                np.save('results/average_train_loss_%s.npy' % rule,
                        average_train_loss)
                np.save('results/average_train_acc_%s.npy' % rule,
                        average_train_acc)
                np.save('results/average_test_loss_%s.npy' % rule,
                        average_test_loss)
                np.save('results/average_test_acc_%s.npy' % rule,
                        average_test_acc)
                np.save('results/individual_average_train_loss_%s.npy' % rule,
                        individual_average_train_loss)
                np.save('results/individual_average_train_acc_%s.npy' % rule,
                        individual_average_train_acc)
                np.save('results/individual_average_test_loss_%s.npy' % rule,
                        individual_average_test_loss)
                np.save('results/individual_average_test_acc_%s.npy' % rule,
                        individual_average_test_acc)
            else:
                try:
                    os.makedirs("results/attacked/%d" % attacker_num)
                except OSError:
                    print("Creation of the directory %s failed")
                np.save(
                    'results/attacked/%d/average_train_loss_%s.npy' %
                    (attacker_num, rule), average_train_loss)
                np.save(
                    'results/attacked/%d/average_train_acc_%s.npy' %
                    (attacker_num, rule), average_train_acc)
                np.save(
                    'results/attacked/%d/average_test_loss_%s.npy' %
                    (attacker_num, rule), average_test_loss)
                np.save(
                    'results/attacked/%d/average_test_acc_%s.npy' %
                    (attacker_num, rule), average_test_acc)
                np.save(
                    'results/attacked/%d/individual_average_train_loss_%s.npy'
                    % (attacker_num, rule), individual_average_train_loss)
                np.save(
                    'results/attacked/%d/individual_average_train_acc_%s.npy' %
                    (attacker_num, rule), individual_average_train_acc)
                np.save(
                    'results/attacked/%d/individual_average_test_loss_%s.npy' %
                    (attacker_num, rule), individual_average_test_loss)
                np.save(
                    'results/attacked/%d/individual_average_test_acc_%s.npy' %
                    (attacker_num, rule), individual_average_test_acc)
示例#42
0
    def simulate(self):
        a = agent()
        a.load_memory()
        s = snake()
        offset = 50
        pygame.init()
        screen = pygame.display.set_mode((400,450))
        head_color = self.pink





        snake_color = self.green
        pygame.display.set_caption("snake.ai")
        self.my_font = pygame.font.SysFont("arial", 24)
        self.surface = self.my_font.render(self.intro, True, self.blue, self.pink)
        
        running = True
        screen.fill(self.white)
        pygame.draw.rect(screen, self.green, [0, 200, 40, 40], 3) 
        x = 0.
        direction = "right"
        while running:
            time.sleep(0.5)
            for event in pygame.event.get():
                if event.type == KEYDOWN:
                    if event.key == K_SPACE:
                        self.is_agent = not self.is_agent
                        self.update_word()
                    if event.key == K_LEFT:
                        direction = "left"
                    elif event.key == K_RIGHT:
                        direction = "right"
                    elif event.key == K_UP:
                        direction = "down"
                    elif event.key == K_DOWN:
                        direction = "up"
                if event.type == QUIT:
                    print "quit"
                    a.output()
                    pygame.quit()
                    exit()
            if not self.is_agent:
                snake_color = self.green
                ans = s.run(direction)
            else:
                snake_color = self.blue
                state = a.build_state()
                a.create_Q(state)                 # Create 'state' in Q-table
                direction = a.choose_action(state)
                ans = a.s.run(direction)
            if ans == False:
                s.reset()
                direction = "right"
                continue
            if ans[1]:
                if self.is_agent and a.learning:
                    self.train_time += 1
                    self.update_word()
                    if self.train_time == 10000:
                        a.output()
                        a.learning = False
            screen.fill(self.white)
            for index, item in enumerate(ans[0]):
                if index == 0:
                    color = head_color
                else:
                    # prev = ans[0][index - 1]
                    color = snake_color
                pygame.draw.rect(screen, color, [item[0] * 40 + 2,item[1] * 40 + offset + 2, 36, 36], 0)
            food = s.get_food()
            pygame.draw.rect(screen, self.red, [food[0] * 40, food[1] * 40 + offset, 40, 40], 0)
            screen.blit(self.surface,(0,0))
            pygame.display.update()
示例#43
0
def main(args):
    env_name = args.env
    env = gym.make(env_name)
    a2c_agent = agent(env)
    a2c_agent.train(env_name)
示例#44
0
文件: run.py 项目: bhamrick/ai2
import sys
from world import world
from agent import agent
import random
from PyQt4 import QtGui, QtCore
from map import map

app = QtGui.QApplication(sys.argv)
inname = sys.argv[1]

mp = map(inname)

wrld = world(mp)
for i in range(mp.mwidth*mp.mheight/8):
	agent(wrld,i)
wrld.show()
sys.exit(app.exec_())
示例#45
0
文件: main.py 项目: zerbeln/ROB521
def qLearn():
    a = agent.agent()
    t = agent.target()
    ql = QLearner.QLearner()

    # Initialize vectors and starting coordinates for agents and targets
    ql.reset_qTable()

    # Create output files
    learning = open('BestFit_QL.txt', 'w')  # Records best fitnesses
    perf = open('SystemReward_QL.txt', 'w')
    rel = open('Reliability_QL.txt',
               'w')  # Records how successful trained NN is using "best" policy
    eff = open('Alg_Time_QL.txt', 'w')
    stp = open('Steps_Taken_QL.txt', 'w')

    for srun in range(p.stat_runs):
        print('current stat run: ', srun)
        a.assign_acoords(p.x_dim, p.y_dim)
        t.assign_tcoords(p.x_dim, p.y_dim, a.ax_init, a.ay_init)
        time_begin = process_time()
        ql.reset_qTable()

        for ep in range(p.episodes):

            k = 0
            while k < p.steps:
                ql.update_prev_state(a.agent_x, a.agent_y)
                act = ql.epsilon_select()
                a.agent_move(act)
                ql.update_curr_state(a.agent_x, a.agent_y)
                a.update_reward_QL(t.tx, t.ty)
                ql.update_qTable(a.agent_reward, act)

                if a.goal_captured == True:
                    k = p.steps  # Stop iterating if target is captured
                k += 1

            a.reset_agent()
            learning.write('%f' % np.max(ql.qtable[:, :]))
            learning.write('\t')  # Records max reward in Qtable

        time_end = process_time()
        total_time = time_end - time_begin
        eff.write('%f' % total_time)
        eff.write('\n')

        # Test Best Policy Found
        a.reset_agent()
        k = 0

        while k < p.steps:
            ql.update_prev_state(a.agent_x, a.agent_y)
            a.update_state_vec(t.tx, t.ty)
            act = ql.greedy_select()
            a.agent_move(act)
            a.update_reward_QL(t.tx, t.ty)

            if a.goal_captured == True:
                stp.write('%f' % k)
                stp.write('\n')
                k = p.steps  # Stop iterating if target is captured
            k += 1

        if a.goal_captured == True:  # Record reliability of agent
            rel.write('%d' % 1)
            rel.write('\t')
        else:
            rel.write('%d' % 0)
            rel.write('\t')

        system_reward = a.agent_reward  # Record system performance for stat run
        perf.write('%f' % system_reward)
        perf.write('\t')
        learning.write('\n')
        perf.write('\n')
        rel.write('\n')  # New line for new stat run
    learning.close()
    perf.close()
    rel.close()
    stp.close()
示例#46
0
文件: epi.py 项目: SuBoYu/CRS_Chatbot
def run_one_episode(FM_model, user_id, busi_id, MAX_TURN, do_random, write_fp,
                    strategy, TopKTaxo, PN_model, gamma, trick, mini,
                    optimizer1_fm, optimizer2_fm, alwaysupdate, start_facet,
                    mask, sample_dict):
    # _______ initialize user and agent _______

    # Initialize the user
    the_user = env.user(user_id, busi_id)
    # Initialize done

    numpy_list = list()
    log_prob_list, reward_list = Variable(torch.Tensor()), list()
    action_tracker, candidate_length_tracker = list(), list()

    the_agent = agent.agent(FM_model, user_id, busi_id, do_random, write_fp,
                            strategy, TopKTaxo, numpy_list, PN_model,
                            log_prob_list, action_tracker,
                            candidate_length_tracker, mini, optimizer1_fm,
                            optimizer2_fm, alwaysupdate, sample_dict)

    # _______ chat history _______
    chat_history = dict()

    # _______ initialize start message _______
    data = dict()
    # data['facet'] = choose_start_facet(busi_id)
    data['facet'] = start_facet
    # print('Starting facet is : {}'.format(data['facet']))
    start_signal = message(cfg.AGENT, cfg.USER, cfg.EPISODE_START, data)

    agent_utterance = None
    while (the_agent.turn_count < MAX_TURN):
        if the_agent.turn_count == 0:
            user_utterance = the_user.response(start_signal)
        else:
            user_utterance = the_user.response(agent_utterance)
        # print('The user utterance in #{} turn, type: {}, data: {}\n'.format(the_agent.turn_count, user_utterance.message_type, user_utterance.data))
        with open(write_fp, 'a') as f:
            f.write(
                'The user utterance in #{} turn, type: {}, data: {}\n'.format(
                    the_agent.turn_count, user_utterance.message_type,
                    user_utterance.data))

        if user_utterance.message_type == cfg.ACCEPT_REC:
            the_agent.history_list.append(2)
            print('Rec Success! in Turn: {}.'.format(the_agent.turn_count))

            rewards = get_reward(the_agent.history_list, gamma, trick,
                                 user_utterance.data)
            if cfg.purpose == 'pretrain':
                return numpy_list
            else:
                return (the_agent.log_prob_list, rewards,
                        the_agent.history_list)

        agent_utterance = the_agent.response(user_utterance)

        the_agent.turn_count += 1

        if the_agent.turn_count == MAX_TURN:
            the_agent.history_list.append(-2)
            print('Max turn quit...')
            rewards = get_reward(the_agent.history_list, gamma, trick)
            if cfg.purpose == 'pretrain':
                return numpy_list
            else:
                return (the_agent.log_prob_list, rewards,
                        the_agent.history_list)
示例#47
0
from environment import environment
from agent import agent
import numpy as np

agent_o = agent('agent_o','O')
agent_x = agent('agent_x','X')
env = environment(agent_o,agent_x)
env.render(['X','O','X','O','X','O','X','O','X'])
示例#48
0
 def test__distAgent(self):
     a1 = agent.agent((0, 0), self.unitTestSigma, "Keeper", (0, 0))
     a2 = agent.agent((10, 10), self.unitTestSigma, "Taker", (0, 0))
     self.assertAlmostEqual(__distAgent(a1, a2), math.sqrt(200), 1)
示例#49
0
  '11st_julia', category_no[current_category])
OUTPUT_FILENAME = \
  '/home/taey16/storage/product/11st_julia/demo_{}.txt.wrap_size0.oversampleFalse.pickle'.format(
    category_no[current_category])


if __name__ == '__main__':
  print 'Start to indexing for {}'.format(INPUT_FILENAME)
  print 'output will be saved in {}'.format(OUTPUT_FILENAME)

  #import pdb; pdb.set_trace()
  meta_filename = '{}/{}'.format(DATASET_ROOT, INPUT_FILENAME)
  parser = parser_utils()
  input = parser.parse(meta_filename)

  agent = agent(**net_args)
  agent.net.forward()
  indexer = indexer(category_no, max_num_items)

  item_counter = 0
  for item in input:
    try:
      prd_no = item['__prd_no__']
      fname  = \
        '/userdata2/index_11st_20151020/october_11st_imgdata/{}.jpg'.format(prd_no)
      object_roi = item['__object_roi__'].strip().split(',')
      category_id = item['__mctgr_no__']
      roi = parser.get_roi_meta_dic(object_roi)
      start_loading = time.time()
      image  = agent.load_image_roi(fname, roi, 0)
      elapsed_loading = time.time() - start_loading
示例#50
0
from environment import environment
from agent import agent
from player import player

# agent plays with human player
agent_o = agent('agent_o', 'O')
player_x = player('player_x', 'X')

env = environment(agent_o, player_x)

# X plays first
env.play(player_x)

env.close()
示例#51
0
import agent as ag
from threading import Thread
from rgui import *
from bot import *
import time


def printTime():
    while True:
        print(time.ctime(time.time()))

if __name__ == '__main__':
    ob=ag.agent()
    bb=bot()
    #bb.ru()
    Thread(target=bb.ru).start()
    a=GUI(ob, bb)
    Thread(target=a.run).start()
    #thread.start_new_thread(bb.ru)
    #thread.start_new_thread(a.run)
    #bb.ru()
    #a.run()

示例#52
0
canvas.create_oval((W/2.-4, H/2.-4, W/2.+4, H/2.+4), fill='goldenrod')
canvas.create_oval((W/2.-2, H/2.-2, W/2.+2, H/2.+2), fill='orange red')
for i in range(300):
    x = random.random()*W
    y = random.random()*H
    canvas.create_oval((x-1, y-1, x+1, y+1), fill='white')
canvas.pack()

items = []
normeSpeed = 60
distToCenter = 250
for i in range(2000):
    posRandom = 2*3.14*random.random()
    items.append(agent(canvas,
                        Vec2d(W/2. + distToCenter*cos(posRandom) + 10*(2*random.random()-1),
                              H/2. + distToCenter*sin(posRandom) + 10*(2*random.random()-1)),
                        Vec2d(normeSpeed*sin(posRandom) + 10*(2*random.random()-1),
                                -normeSpeed*cos(posRandom) + 10*(2*random.random()-1))))
                        #Vec2d(normeSpeed*(2*random.random()-1), normeSpeed*(2*random.random() - 1))))
    #items.append(agent(canvas,
                        #Vec2d(520, 300),
                        #Vec2d(0, 0)))

root.update() # fix geometry

# loop over items

try:
    while 1:
        t1 = time.time()
        for agent in items:
示例#53
0
from environment import environment
from agent import agent

agent_o = agent('agent_o', 'O', exp_rate=0.3)
agent_x = agent('agent_x', 'X', exp_rate=0.3)

env = environment(agent_o, agent_x)

#rounds = 9*8*7*6*5*4*3*2*1 * 10
rounds = 9
print(len(agent_o.Q))
env.train(agent_x, rounds)
print(len(agent_o.Q))
env.close()
示例#54
0
            plt.clf()

            if agent_.env.maze.ravel()[s_] != 0:
                break


if __name__ == '__main__':

    train_settings()

    seed()

    brain_ = brain(size=args.arena_size, gamma=0.9, l_r=0.9)
    env_ = env(size=args.arena_size, cat_r=[-10, -20], cheese_r=[10, 20])

    agent_ = agent(env=env_, brain=brain_)

    plt.imshow(env_.maze)
    plt.pause(1)

    for i in range(args.random_steps):

        agent_.step()

        if i % 10 == 0:

            plt.imshow(agent_.brain.q_mat)
            plt.pause(0.01)
            plt.clf()

    animate(agent_)
示例#55
0
文件: main.py 项目: zerbeln/ROB521
def evo_net():
    nn = neuralnet.NN()
    g = GA.GA()
    a = agent.agent()
    t = agent.target()

    # Initialize vectors and starting coordinates for agents and targets
    nn.create_NN(2, 3, 4)  # (n_inputs, n_outputs, hidden layer size)

    # Create output files
    learning = open('BestFit_NN.txt', 'w')  # Records best fitnesses
    perf = open('SystemReward_NN.txt', 'w')
    rel = open('Reliability_NN.txt',
               'w')  # Records how successful trained NN is using "best" policy
    eff = open('Alg_Time_NN.txt', 'w')
    stp = open('Steps_Taken_NN.txt', 'w')

    for srun in range(p.stat_runs):
        print('current stat run: ', srun)
        a.assign_acoords(p.x_dim, p.y_dim)
        t.assign_tcoords(p.x_dim, p.y_dim, a.ax_init, a.ay_init)
        time_begin = process_time()
        g.create_pop()  # (policy_size)

        for j in range(g.population_size):  # Evaluate the initial population
            nn.get_weights(g.population[j])
            a.reset_agent()
            k = 0

            while k < p.steps:  # Move around for certain number of steps unless target is captured
                a.update_state_vec(t.tx, t.ty)  # Updates state input to NN
                nn.get_inputs(a.state_vector)
                act = nn.get_ouput()  # Get output from NN
                a.agent_move(act)  # Agent moves
                a.update_reward_NN(t.tx, t.ty)

                if a.goal_captured == True:
                    k = p.steps  # Stop iterating, target is captured
                k += 1

            g.pop_fit[j] = a.agent_reward  # Fitness is sum of agent rewards

        learning.write('%f' % max(g.pop_fit))
        learning.write('\t')

        # Train weights or neural network
        for i in range(p.generations - 1):
            g.crossover()
            g.mutate()  # Create new population for testing
            for j in range(g.population_size):  # Test offspring population
                nn.get_weights(g.offspring_pop[j])
                a.reset_agent()
                k = 0

                while k < p.steps:  # Move around for certain number of steps unless target is captured
                    a.update_state_vec(t.tx, t.ty)  # Updates state input to NN
                    nn.get_inputs(a.state_vector)
                    act = nn.get_ouput()  # Get output from NN
                    a.agent_move(act)  # Agent moves
                    a.update_reward_NN(t.tx, t.ty)

                    if a.goal_captured == True:
                        k = p.steps  # Stop iterating, target is captured
                    k += 1

                g.pop_fit[j] = a.agent_reward

            g.down_select()  # Establish new parent population

            learning.write('%f' % g.pop_fit[0])
            learning.write('\t')
        time_end = process_time()
        total_time = time_end - time_begin
        eff.write('%f' % total_time)
        eff.write('\n')

        # Test Best Policy Found
        nn.get_weights(g.population[0])
        a.reset_agent()
        k = 0
        best_fitn = max(g.pop_fit)
        assert (best_fitn == g.pop_fit[0])

        while k < p.steps:
            a.update_state_vec(t.tx, t.ty)
            nn.get_inputs(a.state_vector)
            act = nn.get_ouput()
            a.agent_move(act)
            a.update_reward_NN(t.tx, t.ty)

            if a.goal_captured == True:
                stp.write('%f' % k)
                stp.write('\n')
                k = p.steps  # Stop iterating if target is captured
            k += 1

        if a.goal_captured == True:
            rel.write('%d' % 1)
            rel.write('\t')
        else:
            rel.write('%d' % 0)
            rel.write('\t')

        system_reward = a.agent_reward
        perf.write('%f' % system_reward)
        perf.write('\t')
        learning.write('\n')
        perf.write('\n')
        rel.write('\n')  # New line for new stat run
    learning.close()
    perf.close()
    rel.close()
    eff.close()
    stp.close()