def test_getStateVars2(self): simulatedError = 0.01 simWidth = 550.0 simHeight = 357.0 ballPos = (1/3 * simWidth,1/6 * simHeight) c = (simWidth /2.0,simHeight/2.0) a1 = agent.agent((1/3 * simWidth,1/6 * simHeight),simulatedError,"Keeper",ballPos) a2 = agent.agent((2/3 * simWidth,1/7 * simHeight),simulatedError,"Keeper",ballPos) a3 = agent.agent((2/5 * simWidth,6/7 * simHeight),simulatedError,"Keeper",ballPos) keepers = [a1,a2,a3] t1 = agent.agent((1/2 * simWidth,5/12 * simHeight),simulatedError,"Taker",ballPos) t2 = agent.agent((2/5 * simWidth,7/12 * simHeight),simulatedError,"Taker",ballPos) takers = [t1,t2] testOut = getStateVarsKeepers(keepers, takers, c) actualOut = [kUtil.getDist((550/3, 59.5), c), kUtil.getDist((550/3 * 2, 51), c), kUtil.getDist((220, 306), c), kUtil.getDist((275, 148.75), c), kUtil.getDist((220, 208.25), c), kUtil.getDist((550/3,59.5), (550/3*2, 51)), kUtil.getDist((550/3,59.5), (220,306)), kUtil.getDist((550/3,59.5), (275, 148.75)), kUtil.getDist((550/3,59.5), (220, 208.25)), min( kUtil.getDist((550/3*2,51), (220, 208.25)), kUtil.getDist((550/3*2,51), (275, 148.75)) ), min( kUtil.getDist((220,306), (220, 208.25)), kUtil.getDist((220,306), (275, 148.75)) ), max(kUtil.cosTheta((550/3*2, 51), (550/3,59.5), (275,148.75)), kUtil.cosTheta((550/3*2, 51), (550/3,59.5), (220,208.25))), max(kUtil.cosTheta((220,306), (550/3,59.5), (275,148.75)), kUtil.cosTheta((220,306), (550/3,59.5), (220,208.25))), ] for i in range(len(testOut)): self.assertAlmostEqual(testOut[i], actualOut[i], 1,"Failed on index: %d" % i)
def test_getStateVars(self): # self, worldRef, pos, sigma, agentType, trueBallPos, maxPlayerSpeed, maxBallSpeed, inPossession = False import keepAway keepAwayWorld = keepAway.keepAway() ballPos = (0, 0) center = (0, 0) simulatedError = 0.01 a1 = agent.agent( keepAwayWorld, (10, 0), simulatedError, "Keeper", ballPos, keepAwayWorld.maxPlayerSpeed, keepAwayWorld.maxBallSpeed, ) a2 = agent.agent( keepAwayWorld, (0, 0), simulatedError, "Keeper", ballPos, keepAwayWorld.maxPlayerSpeed, keepAwayWorld.maxBallSpeed, ) a3 = agent.agent( keepAwayWorld, (0, 5), simulatedError, "Keeper", ballPos, keepAwayWorld.maxPlayerSpeed, keepAwayWorld.maxBallSpeed, ) keepers = [a1, a2, a3] t1 = agent.agent( keepAwayWorld, (5, 5), simulatedError, "Taker", ballPos, keepAwayWorld.maxPlayerSpeed, keepAwayWorld.maxBallSpeed, ) t2 = agent.agent( keepAwayWorld, (5, 0), simulatedError, "Taker", ballPos, keepAwayWorld.maxPlayerSpeed, keepAwayWorld.maxBallSpeed, ) takers = [t1, t2] testOut = getStateVarsKeepers(keepers, takers, center) actualOut = [0, 5, 10, 5, math.sqrt(50), 5, 10, 5, math.sqrt(50), 5, 5, math.cos(math.pi / 4.0), 1] for i in range(len(testOut)): self.assertAlmostEqual(testOut[i], actualOut[i], 1)
def __init__(self): mode = None #will be set to either monte carlo, q learning, sarsa, or manual control in the intro #RGB color self.white = (255,255,255) self.black = (0,0,0) self.red = (255,0,0) self.green = (0,155,0) self.blue = (0,0,255) #give the game a title pygame.display.set_caption('Keepaway') self.keeperScore = 0 self.displayGraphics = True #these are more or less global variables.. #I'm not sure if this is bad or not. self.worldImage = pygame.image.load('images/soccer_field.png') self.ballImage = pygame.image.load('images/ball.png') self.keeperImage = pygame.image.load('images/keeper.png') self.takerImage = pygame.image.load('images/taker.png') #block sizes are used for collision detection #only 1 size per element because all blocks are squares. block size = side length self.agent_block_size = 23 self.ball_block_size = 12 self.maxBallSpeed= 3 self.maxPlayerSpeed = 2 #dimensions of the game are the same as the soccer field image self.display_width = 550 self.display_height = 357 self.field_center = (self.display_width / 2 , self.display_height / 2) #gameDisplay is a pygame.surface object. it's your screen self.gameDisplay = pygame.display.set_mode((self.display_width,self.display_height)) self.fps = 60 self.clock = pygame.time.Clock() types = ["keeper", "taker"] agentSigmaError = .01 #start the ball kinda close to the keeper in the upper left corner self.fieldBall = ball.ball( (self.field_center[0]/4, self.field_center[1]/4), self.maxBallSpeed) #setup all the initial keepers and takers. They are all starting at different field positions, which is why #you can't have a for loop just iterate and declare all of them self.keeperArray = [] self.keeperArray.append(agent.agent(self, (12.5, 12.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed)) self.keeperArray.append(agent.agent(self, (25, self.display_width - 37.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed)) self.keeperArray.append(agent.agent(self, (self.display_height - 37.5, self.display_width - 37.5), agentSigmaError, types[0], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed)) self.takerArray = [] self.takerArray.append(agent.agent(self, (self.display_height - 25, 25), agentSigmaError, types[1], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed)) self.takerArray.append(agent.agent(self, (self.display_height - 37.5, 50), agentSigmaError, types[1], self.field_center, self.maxPlayerSpeed, self.maxBallSpeed)) #3 different font sizes self.smallfont = pygame.font.SysFont("comicsansms",25) #25 is font sizes self.medfont = pygame.font.SysFont("comicsansms",50) self.largefont = pygame.font.SysFont("comicsansms",80) self.verysmallfont = pygame.font.SysFont("comicsansms", 12)
def test_getStateVars(self): ballPos = (0,0) center = (0,0) simulatedError = 0.01 a1 = agent.agent((10,0),simulatedError,"Keeper",ballPos) a2 = agent.agent((0,0),simulatedError,"Keeper",ballPos) a3 = agent.agent((0,5),simulatedError,"Keeper",ballPos) keepers = [a1,a2,a3] t1 = agent.agent((5,5),simulatedError,"Taker",ballPos) t2 = agent.agent((5,0),simulatedError,"Taker",ballPos) takers = [t1,t2] testOut = getStateVarsKeepers(keepers, takers, center) actualOut = [0,5,10,5,math.sqrt(50),5,10,5,math.sqrt(50),5,5,math.cos(math.pi / 4.0),1] for i in range(len(testOut)): self.assertAlmostEqual(testOut[i], actualOut[i], 1)
def start_from_terminal(app): parser = optparse.OptionParser() parser.add_option("-p", "--port", help="which port to serve content on", type="int", default=port) opts, args = parser.parse_args() net_args = { "model_def_file": model_def_file, "pretrained_model_file": pretrained_model_file, "gpu_mode": gpu_mode, "device_id": 1, "image_dim": image_dim, "raw_scale": raw_scale, } # Initialize classifier app.agent = agent(**net_args) logging.info("Initialize vision model done") # warm start by forward for allocation app.agent.net.forward() logging.info("Net forward done") app.indexer = indexer() for category_id in CATEGORY_NAME: app.indexer.load(category_id, DATABASE_FILENAME % category_id) logging.info("Loading indexer for {}".format(category_id)) logging.info("Initialize indexer done") # app.indexer.load(DATABASE_FILE) start_tornado(app, opts.port)
def test_distCenter(self): a1 = agent.agent((0, 0), self.unitTestSigma, "Keeper", (0, 0)) self.assertAlmostEqual(__distCenter(a1, (0, 0)), 0, 1) self.assertAlmostEqual(__distCenter(a1, (0, 10)), 10, 1) self.assertAlmostEqual(__distCenter(a1, (10, 0)), 10, 1) self.assertAlmostEqual(__distCenter(a1, (1, 1)), math.sqrt(2), 1) self.assertAlmostEqual(__distCenter(a1, (10, 10)), math.sqrt(200), 1)
def getListOfAgents(version): worldFile = getWorldFile(version) if(worldFile == 0): return [] listOfAgents = [] for line in worldFile.readlines(): a = agent.agent(line) listOfAgents.append(a) return listOfAgents
def setup(): size(768, 768) frameRate(fps) global g g = grid.generate() global a a = agent()
def fuzz (self, this_node=None, path=[]): ''' Call this routine to get the ball rolling. No arguments are necessary as they are both utilized internally during the recursive traversal of the session graph. @type this_node: request (node) @param this_node: (Optional, def=None) Current node that is being fuzzed. @type path: List @param path: (Optional, def=[]) Nodes along the path to the current one. ''' # if no node is specified, we start from root and initialize the session. if not this_node: # we can't fuzz if we don't have at least one target and one request. if not self.target: syslog.syslog(syslog.LOG_ERR, self.session_id + ": no target specified for session") return if not self.edges_from(self.root.id): syslog.syslog(syslog.LOG_ERR, self.session_id + ": no request specified for session") return this_node = self.root self.total_mutant_index = 0 self.total_num_mutations = self.num_mutations() # If no errors above and not already connected to the agent, initialize the # agent connection. # If the agent cannot be initialized make sure the user is aware of it. if self.agent == None and self.agent_settings != None: try: self.agent = agent(self.config, self.session_id, self.agent_settings) self.agent.connect() except Exception, ex: syslog.syslog(syslog.LOG_ERR, self.session_id + ": failed to establish agent connection (%s)" % str(ex)) self.finished_flag = True self.stop_flag = True return # Get the agent to execute try: self.agent.start() except Exception, ex: syslog.syslog(syslog.LOG_ERR, self.session_id + ": agent failed to execute command (%s)" % str(ex)) self.finished_flag = True self.stop_flag = True return
def initialize( self ): self.inds = [] delprn( "Creating Trees\t\t", 2 ) #Set up random trees for i in range(0,self.mu): delprn( str(perStr( i/self.mu )), 3 ) self.inds.append(agent( self ) ) delprn( "Calc. Inital Fitness\t", 2 ) #Do our initial run for i in range(0,len(self.inds)): delprn( str(perStr( i/len(self.inds) )), 3 ) self.inds[i].fitness( )
def recombination( self ): parents = self.parentSelection( ) kids = [] delprn( "Creating Children\t", 2 ) for i in range(0,len(parents)): delprn( str(perStr( i/self.lamb )), 3 ) pair = parents[i] p1 = pair[0] p2 = pair[1] #We're just doing cross over for now, so hardcode this in: #Create two kids, one from each parent kid1 = agent( self, copy=p1 ) kid2 = agent( self, copy=p2 ) #And sample for a random crossover point from both kids kid1pt = random.sample(kid1.tree.nodes, 1)[0] kid2pt = random.sample(kid2.tree.nodes, 1)[0] #Now swap subtrees tree.swapsubtree( kid1.tree, kid1pt, kid2.tree, kid2pt ) #Mutate them kid1.mutate( ) kid2.mutate( ) kids.append(kid1) kids.append(kid2) if self.strat == PLUS: for ind in kids: self.inds.append( ind ) elif self.strat == COMMA: for ind in self.inds: ind.delete( ) self.inds = kids
def __init__(self, port): util_root = '/works/demon_11st/utils' sys.path.insert(0, util_root) from exifutil import exifutil app.exifutils = exifutil() #import pdb; pdb.set_trace() agent_root = '/works/demon_11st/agent/detection' sys.path.insert(0, agent_root) import _init_paths from conf import conf from agent import agent yaml_file = '/storage/product/detection/11st_All/cfg/faster_rcnn_end2end_test.yml' conf = conf(yaml_file, 0) app.agent = agent() from korean_url_handler import korean_url_handler app.korean_url_handler = korean_url_handler() # start web server web_server.__init__(self, app, port)
def __init__(self, port, net_args, oversample, category_no, max_num_items, database_filename): self.net_args = net_args self.database_filename = database_filename # Initialize classifier app.oversample = oversample app.agent = agent(**self.net_args) logging.info('Initialize vision model done') app.agent.net.forward() logging.info('Net forward done') # Initialize indexer app.indexer = indexer(category_no, max_num_items) app.indexer.load_category(database_filename) logging.info('Initialize indexer done') # get parser_utils app.parser_utils = parser_utils() app.korean_url_handler = korean_url_handler() # start web server web_server.__init__(self, app, port)
def absBestFinish( self, cfg, best ): self.res.write( "\nTree with the Global Best Fitness\n" ) #Mock container generation generation = gen( cfg ) #Avoiding errors best.gen = generation self.res.write( "\nRandom GP Performance\n" ) self.res.write( "Global best's gen #: " + str(best.gennum) + "\n" ) #Clear old payoffs best.payoffs = [] #Randomly make many individuals to face. for i in range(30): generation.inds.append( agent( generation ) ) for opp in generation.inds: beforepayoff = best.mem*2 for j in range(0,generation.seqs): tmoves = opp.mymoves oppres = opp.run( best.mymoves ) myres = best.run( opp.mymoves ) if j > beforepayoff: best.upres( myres, oppres ) opp.upres( oppres, myres ) avg = 0 for i in best.payoffs: avg += i avg /= len(best.payoffs) self.res.write( "Random fit: " + str(avg) + "\n" ) self.csv.write( "\n\n" + "Global Best Gen #,avgabsfit,lastwinfit,csv,random fit" + "\n" ) self.csv.write( str(best.gennum) + "," + str(best.fit) + "," + str(best.fits[0]) + "," + str(best.fits[1]) + "," + str(avg) + "\n" )
def __init__(self, _num_adv, _num_agents, _num_drones): g_var.num_of_adverseries = _num_adv g_var.num_of_agents = _num_agents g_var.num_of_drones = _num_drones # re-initialization of result variables g_var.arrested_poachers = 0 g_var.fled_poachers = 0 g_var.resource_poached = 0 g_var.resource_recovered = 0 g_var.distance_travelled = 0 self.refresh_counter = 0 self.refresh_limit = 40 print "Parameters: adversaries: " + str(g_var.num_of_adverseries),\ ", agents: " + str(g_var.num_of_agents),\ ", drones: " + str(g_var.num_of_drones) self.root = Tk() self.root.title("Green Security Game") self.root.geometry('640x480+620+120') self.canvas = Canvas(self.root, bg="#333333", height=480, width=640) self.canvas.pack() Frame.__init__(self) self.agent_pos = [[0 for i in range(g_var.dimension)] for j in range(g_var.dimension)] #self.cell_resources = [[random.randint(10,50) for i in range(global_var.dimension)] for j in range(global_var.dimension)] self.adv_pos = [[0 for i in range(g_var.dimension)] for j in range(g_var.dimension)] self.drone_pos = [[0 for i in range(g_var.dimension)] for j in range(g_var.dimension)] self.drone_signal = [[0 for i in range(g_var.dimension)] for j in range(g_var.dimension)] self.target_pos = [] self.round_marking = [] self.cell_resources = [[4, 9, 6, 7, 0, 2, 1, 6, 7, 0], [13, 50, 0, 0, 50, 0, 50, 0, 0, 21], [14, 0, 19, 13, 24, 23, 36, 17, 0, 11], [17, 50, 40, 10, 50, 50, 50, 6, 0, 6], [10, 31, 20, 13, 50, 0, 0, 10, 50, 3], [9, 34, 30, 10, 50, 50, 50, 10, 0, 5], [11, 37, 10, 22, 17, 15, 12, 10, 0, 6], [13, 0, 50, 14, 33, 17, 50, 32, 26, 11], [7, 0, 0, 50, 0, 0, 0, 50, 13, 23], [11, 12, 31, 10, 9, 8, 11, 13, 14, 21]] for i in range(g_var.dimension): for j in range(g_var.dimension): if self.cell_resources[i][j] > 0: self.target_pos.append((i, j)) if self.cell_resources[i][j] == -1: self.round_marking.append((i, j)) self.round_marking.append((5, 5)) # temporary dummy self.cell_coord = [[ i.__str__() + "," + j.__str__() for i in range(g_var.dimension) ] for j in range(g_var.dimension)] self.label_poacher_num = Label(self.root, text="Number of Total \nPoachers:\n" + str(g_var.num_of_adverseries)) self.label_poacher_num.place(relx=0.78, rely=0.05) self.label_arrest = Label(self.root, text=g_var.arrested_poachers) self.label_arrest.place(relx=0.78, rely=0.2) self.label_fled = Label(self.root, text=g_var.fled_poachers) self.label_fled.place(relx=0.78, rely=0.3) self.label_sack = Label(self.root, text=g_var.resource_poached) self.label_sack.place(relx=0.78, rely=0.4) self.label_recovered = Label(self.root, text=g_var.resource_poached) self.label_recovered.place(relx=0.78, rely=0.5) self.label_travelled = Label(self.root, text=g_var.distance_travelled) self.label_travelled.place(relx=0.78, rely=0.6) self.label_agent_num = Label(self.root, text="Number of Agents:\n" + str(g_var.num_of_agents)) self.label_agent_num.place(relx=0.78, rely=0.7) self.label_drone_num = Label(self.root, text="Number of Drones:\n" + str(g_var.num_of_drones)) self.label_drone_num.place(relx=0.78, rely=0.8) self.refresh() self.canvas.create_rectangle(0, 0, g_var.dimension * g_var.block_size, g_var.dimension * g_var.block_size, fill=g_var.bg_color) # for ONE TIME labelling ******************************************* for i in range(g_var.dimension): for j in range(g_var.dimension): self.coord_label = Label(self.root, text=self.cell_coord[i][j], bg="black", fg="white") self.coord_label.place(x=i * g_var.block_size + 2, y=j * g_var.block_size + 18) for i in range(g_var.dimension + 1): for j in range(g_var.dimension + 1): self.canvas.create_rectangle(i * g_var.block_size, j * g_var.block_size, g_var.block_size, g_var.block_size, outline="grey") for i in range(g_var.num_of_agents): agent_obj = agent(self.canvas, self.root, self.agent_pos, self.cell_resources, self.target_pos, self.round_marking, self.drone_signal) agent_obj.move_spec_guard() for i in range(g_var.num_of_drones): drone_obj = drone(self.canvas, self.root, self.drone_pos, self.drone_signal, self.adv_pos) drone_obj.move_drone() for i in range(g_var.num_of_adverseries): adv_obj = adv(self.canvas, self.root, self.agent_pos, self.drone_pos, self.cell_resources, self.target_pos, self.adv_pos) adv_obj.operate_adv() self.root.mainloop()
from simulGivenRoutes import execute from getRoutes import getRoutes from itertools import product from copy import deepcopy from ARoptimization import ARoptim from ExpOptim import updatePosterior from ExpOptim import generateDraws #network creation network=[ [None] * 4 for i in range(4)] network[0][1]=edge.edge('x') network[1][2]=edge.edge('x') network[1][3]=edge.edge('x') network[3][2]=edge.edge('x') #agent creation agents=list() agents.append(agent.agent(0,2,1)) agents.append(agent.agent(1,2,2)) agents.append(agent.agent(1,2,3)) agents.append(agent.agent(1,2,1)) #%% #Experience #sensitivity to New information delta=0.1 #getting routes and costs routesAndCosts=getRoutes(network, agents, 10, cost=True) #calculate the first probability distribution posterior=updatePosterior(routesAndCosts['costs']) prior=posterior #get a route draw for each agent while True: for i in range(3000):
f, ax = plt.subplots(1, 1) ax.plot(reward_ts) plt.show() else: if args.weight_sharing: model = rnn.RDQN_multi(args) target = rnn.RDQN_multi(args) optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, momentum=args.learning_momentum) game = env.env_multi(args) players = [] for i in range(args.n_agents): player = agent.agent(args, agent_id=i) if args.weight_sharing: player.setup_models(model, target) else: model = rnn.RDQN_multi(args) target = rnn.RDQN_multi(args) optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, momentum=args.learning_momentum) player.setup_models(model, target) player.set_optimizer(optimizer) players.append(player) criterion = nn.MSELoss() average_loss = 0 average_reward = 0 loss_record = []
class Matris(object): board = agent.board() agent_mode = True #used to check if agent is playing. Causes hard-drops to always happen. if agent_mode == True: if (sys.argv[1] == "-hh"): #Creates an agent that takes column differences, holes and height of the tallest column as inputs agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000, height=True, holes=True) elif (sys.argv[1] == "-ho"): #Creates an agent that takes column differences and holes as inputs agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000, holes=True) elif (sys.argv[1] == "-hi"): #Creates an agent that takes column differences and height of the tallest column as inputs agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000, height=True) elif (sys.argv[1] == "-no"): #Creates an agent that takes column differences as inputs only agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000) elif (sys.argv[1] == "-ra"): #Creates an agent that plays randomly agent = agent.agent([], int(sys.argv[2]), random_moves=True) elif (sys.argv[1] == "-lo"): #Loads an agent that has previously been trained in MaTris. Loads .obj file. agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000, filepath=sys.argv[3]) elif (sys.argv[1] == "-lt"): #Loads an agent that has previously been trained using supervised learning in MaTris-O. Loads .obj file. agent = agent.agent([], int(sys.argv[2]), random_moves=False, rewards_as_lines=True, epsilon=1, epsilon_decay=0.01, epsilon_minimum=0.01, memory_size=1000, sample_size=32, reset_steps=1000, filepath=sys.argv[3], supervised=True) else: raise Exception( error_message= "\n\nError inputting command line arguments\nUsage:\n[mode] [number of episodes]\nmode:\n\t-hh - holes and height and column differences\n\t-ho - holes and column differences\n\t-hi - height and column differences\n\t-no - column differences only\n\tLoad ANN\nSecond argument should be number of episodes\n third argument should be filepath if file is being loaded." ) seed = agent.load_new_seed() random.seed(seed) tetromino_placement = None def __init__(self): self.surface = screen.subsurface( Rect((MATRIS_OFFSET + BORDERWIDTH, MATRIS_OFFSET + BORDERWIDTH), (MATRIX_WIDTH * BLOCKSIZE, (MATRIX_HEIGHT - 2) * BLOCKSIZE))) self.matrix = dict() for y in range(MATRIX_HEIGHT): for x in range(MATRIX_WIDTH): self.matrix[(y, x)] = None """ `self.matrix` is the current state of the tetris board, that is, it records which squares are currently occupied. It does not include the falling tetromino. The information relating to the falling tetromino is managed by `self.set_tetrominoes` instead. When the falling tetromino "dies", it will be placed in `self.matrix`. """ self.next_tetromino = random.choice(list_of_tetrominoes) self.set_tetrominoes() if self.agent_mode == True: #Creates a representation of the initial board self.board.update_board_representation( self.create_board_representation()) self.board.set_board_height() self.board.set_holes() self.board.set_column_differences() print(str(self.board)) print("Column Height Differences:" + str(self.board.get_column_differences())) #Set up the the agent self.agent.set_current_board(self.board) self.agent.set_agent_tetromino(self.current_tetromino) self.tetromino_rotation = 0 self.downwards_timer = 0 self.base_downwards_speed = 0.4 # Move down every 400 ms self.movement_keys = {'left': 0, 'right': 0} self.movement_keys_speed = 0.05 self.movement_keys_timer = (-self.movement_keys_speed) * 2 self.level = 1 self.score = 0 self.lines = 0 self.combo = 1 # Combo will increase when you clear lines with several tetrominos in a row self.paused = False self.highscore = load_score() self.played_highscorebeaten_sound = False self.levelup_sound = get_sound("levelup.wav") self.gameover_sound = get_sound("gameover.wav") self.linescleared_sound = get_sound("linecleared.wav") self.highscorebeaten_sound = get_sound("highscorebeaten.wav") if self.agent_mode == True: #Agent's first move self.tetromino_placement = self.agent.make_move() self.tetromino_position = (0, self.tetromino_placement[2]) for rotations in range(self.tetromino_placement[0]): self.request_rotation() def set_tetrominoes(self): """ Sets information for the current and next tetrominos """ self.current_tetromino = self.next_tetromino self.next_tetromino = random.choice(list_of_tetrominoes) self.surface_of_next_tetromino = self.construct_surface_of_next_tetromino( ) self.tetromino_position = (0, 4) if len( self.current_tetromino.shape) == 2 else (0, 3) self.tetromino_rotation = 0 self.tetromino_block = self.block(self.current_tetromino.color) self.shadow_block = self.block(self.current_tetromino.color, shadow=True) def hard_drop(self): """ Instantly places tetrominos in the cells below """ amount = 0 while self.request_movement('down'): amount += 1 self.score += 10 * amount self.lock_tetromino() def update(self, timepassed): """ Main game loop """ try: self.needs_redraw = False if self.agent_mode == True: self.hard_drop() else: #Handles player input pressed = lambda key: event.type == pygame.KEYDOWN and event.key == key unpressed = lambda key: event.type == pygame.KEYUP and event.key == key events = pygame.event.get() #Controls pausing and quitting the game. for event in events: if pressed(pygame.K_p): self.surface.fill((0, 0, 0)) self.needs_redraw = True self.paused = not self.paused elif event.type == pygame.QUIT: self.gameover(full_exit=True) elif pressed(pygame.K_ESCAPE): self.gameover() if self.paused: return self.needs_redraw for event in events: #Handles player input #Controls movement of the tetromino if pressed(pygame.K_SPACE): self.hard_drop() elif pressed(pygame.K_UP) or pressed(pygame.K_w): self.request_rotation() elif pressed(pygame.K_LEFT) or pressed(pygame.K_a): self.request_movement('left') self.movement_keys['left'] = 1 elif pressed(pygame.K_RIGHT) or pressed(pygame.K_d): self.request_movement('right') self.movement_keys['right'] = 1 elif unpressed(pygame.K_LEFT) or unpressed(pygame.K_a): self.movement_keys['left'] = 0 self.movement_keys_timer = ( -self.movement_keys_speed) * 2 elif unpressed(pygame.K_RIGHT) or unpressed(pygame.K_d): self.movement_keys['right'] = 0 self.movement_keys_timer = ( -self.movement_keys_speed) * 2 self.downwards_speed = self.base_downwards_speed**( 1 + self.level / 10.) self.downwards_timer += timepassed downwards_speed = self.downwards_speed * 0.10 if any([ pygame.key.get_pressed()[pygame.K_DOWN], pygame.key.get_pressed()[pygame.K_s] ]) else self.downwards_speed if self.downwards_timer > downwards_speed: if not self.request_movement( 'down' ): #Places tetromino if it cannot move further down self.lock_tetromino() self.downwards_timer %= downwards_speed if any(self.movement_keys.values()): self.movement_keys_timer += timepassed if self.movement_keys_timer > self.movement_keys_speed: self.request_movement( 'right' if self.movement_keys['right'] else 'left') self.movement_keys_timer %= self.movement_keys_speed except: print("Error in agent running") print( "Manually causing gameover. Preserves continuation of agent running with minor potential impediment on learning." ) self.gameover() self.needs_redraw = True return self.needs_redraw def draw_surface(self): """ Draws the image of the current tetromino """ with_tetromino = self.blend(matrix=self.place_shadow()) for y in range(MATRIX_HEIGHT): for x in range(MATRIX_WIDTH): # I hide the 2 first rows by drawing them outside of the surface block_location = Rect(x * BLOCKSIZE, (y * BLOCKSIZE - 2 * BLOCKSIZE), BLOCKSIZE, BLOCKSIZE) if with_tetromino[(y, x)] is None: self.surface.fill(BGCOLOR, block_location) else: if with_tetromino[(y, x)][0] == 'shadow': self.surface.fill(BGCOLOR, block_location) self.surface.blit(with_tetromino[(y, x)][1], block_location) def gameover(self, full_exit=False): """ Gameover occurs when a new tetromino does not fit after the old one has died, either after a "natural" drop or a hard drop by the player. That is why `self.lock_tetromino` is responsible for checking if it's game over. """ write_score(self.score) if full_exit: if self.agent_mode == True: print("Runs completed.") self.serialize_agent() exit() else: if self.agent_mode == True: self.agent.complete_episode() #Manages the starting of a new game if self.agent.get_current_episode( ) < self.agent.get_number_of_episodes(): #Resets the board self.matrix = dict() for y in range(MATRIX_HEIGHT): for x in range(MATRIX_WIDTH): self.matrix[(y, x)] = None self.score = 0 self.lines = 0 self.board = agent.board( self.create_board_representation()) self.board.set_board_height() self.board.set_holes() self.board.set_column_differences() self.agent.set_current_board(self.board) print(str(self.board)) new_seed = self.agent.load_new_seed() if new_seed == None: try: raise ValueError( "Not enough seeds for current experiment!") except: print( "\nNot enough seeds for current experiment!\nExiting Matris..." ) exit() print("Generating new game with seed: " + str(new_seed)) random.seed(new_seed) self.set_tetrominoes() self.next_tetromino = random.choice(list_of_tetrominoes) self.agent.set_agent_tetromino(self.current_tetromino) #Agent's first move of the new game self.tetromino_placement = self.agent.make_move() self.tetromino_position = (0, self.tetromino_placement[2]) for rotations in range(self.tetromino_placement[0]): self.request_rotation() else: print("Runs completed.") self.serialize_agent() exit() else: raise GameOver("Sucker!") def place_shadow(self): """ Draws shadow of tetromino so player can see where it will be placed """ posY, posX = self.tetromino_position while self.blend(position=(posY, posX)): posY += 1 position = (posY - 1, posX) return self.blend(position=position, shadow=True) def fits_in_matrix(self, shape, position): """ Checks if tetromino fits on the board """ posY, posX = position for x in range(posX, posX + len(shape)): for y in range(posY, posY + len(shape)): if self.matrix.get((y, x), False) is False and shape[y - posY][ x - posX]: # outside matrix return False return position def request_rotation(self): """ Checks if tetromino can rotate Returns the tetromino's rotation position if possible """ rotation = (self.tetromino_rotation + 1) % 4 shape = self.rotated(rotation) y, x = self.tetromino_position position = (self.fits_in_matrix(shape, (y, x)) or self.fits_in_matrix(shape, (y, x + 1)) or self.fits_in_matrix(shape, (y, x - 1)) or self.fits_in_matrix(shape, (y, x + 2)) or self.fits_in_matrix(shape, (y, x - 2))) # ^ That's how wall-kick is implemented if position and self.blend(shape, position): self.tetromino_rotation = rotation self.tetromino_position = position self.needs_redraw = True return self.tetromino_rotation else: return False def request_movement(self, direction): """ Checks if teteromino can move in the given direction and returns its new position if movement is possible """ posY, posX = self.tetromino_position if direction == 'left' and self.blend(position=(posY, posX - 1)): self.tetromino_position = (posY, posX - 1) self.needs_redraw = True return self.tetromino_position elif direction == 'right' and self.blend(position=(posY, posX + 1)): self.tetromino_position = (posY, posX + 1) self.needs_redraw = True return self.tetromino_position elif direction == 'up' and self.blend(position=(posY - 1, posX)): self.needs_redraw = True self.tetromino_position = (posY - 1, posX) return self.tetromino_position elif direction == 'down' and self.blend(position=(posY + 1, posX)): self.needs_redraw = True self.tetromino_position = (posY + 1, posX) return self.tetromino_position else: return False def rotated(self, rotation=None): """ Rotates tetromino """ if rotation is None: rotation = self.tetromino_rotation return rotate(self.current_tetromino.shape, rotation) def block(self, color, shadow=False): """ Sets visual information for tetromino """ colors = { 'blue': (105, 105, 255), 'yellow': (225, 242, 41), 'pink': (242, 41, 195), 'green': (22, 181, 64), 'red': (204, 22, 22), 'orange': (245, 144, 12), 'cyan': (10, 255, 226) } if shadow: end = [90] # end is the alpha value else: end = [ ] # Adding this to the end will not change the array, thus no alpha value border = Surface((BLOCKSIZE, BLOCKSIZE), pygame.SRCALPHA, 32) border.fill(list(map(lambda c: c * 0.5, colors[color])) + end) borderwidth = 2 box = Surface( (BLOCKSIZE - borderwidth * 2, BLOCKSIZE - borderwidth * 2), pygame.SRCALPHA, 32) boxarr = pygame.PixelArray(box) for x in range(len(boxarr)): for y in range(len(boxarr)): boxarr[x][y] = tuple( list( map( lambda c: min(255, int(c * random.uniform( 0.8, 1.2))), colors[color])) + end) del boxarr # deleting boxarr or else the box surface will be 'locked' or something like that and won't blit. border.blit(box, Rect(borderwidth, borderwidth, 0, 0)) return border def lock_tetromino(self): """ This method is called whenever the falling tetromino "dies". `self.matrix` is updated, the lines are counted and cleared, and a new tetromino is chosen. """ self.matrix = self.blend() lines_cleared = self.remove_lines() if lines_cleared == -1: #Indicates that clearing the lines failed. This is due to the tetromino reaching higher than 2 above the skyline. """ End episode: game will be in a terminal state as the skyline was occupied 3 cells high however MaTris can only handle the skyline being occupied by 2 cells high. This causes the memory to be stored as if it were a terminal state. The board is then cleared, and a new episode restarted. """ self.agent.remember_state_action(self.agent.previous_state, self.agent.previous_action, -1000, self.agent.get_current_board(), True) self.agent.update_approximater() self.agent.reset_approximaters() self.gameover() else: self.lines += lines_cleared if lines_cleared: self.score += 100 * (lines_cleared**2) * self.combo if not self.played_highscorebeaten_sound and self.score > self.highscore: self.played_highscorebeaten_sound = True if self.lines >= self.level * 10: self.level += 1 self.combo = self.combo + 1 if lines_cleared else 1 self.set_tetrominoes() if not self.blend() and lines_cleared != -1: self.gameover() self.needs_redraw = True if self.agent_mode == True: #Collects information from the board. self.board.update_board_representation( self.create_board_representation()) self.board.set_board_height() self.board.set_holes() self.board.set_column_differences() print(str(self.board)) print("Column Height Differences:" + str(self.board.get_column_differences())) if self.agent.holes == True: print("Holes: " + str(self.board.get_holes())) if self.agent.height == True: print("Height: " + str(self.board.get_board_height())) print(str(self.tetromino_placement)) print("\nTetromino:") for line in range(0, len(self.agent.agent_tetromino[0])): print(str(self.agent.agent_tetromino[0][line])) print("Epsilon: " + str(self.agent.epsilon)) reward = self.agent.update_score_and_lines(self.score, self.lines) print("Score: " + str(self.agent.score)) print("Lines Cleared: " + str(self.agent.lines_cleared)) print("Current Episode number: " + str(self.agent.current_episode + 1) + " / " + str(self.agent.number_of_episodes)) print("**********************************") #Passes tetromino and board information to the agent. self.agent.set_agent_tetromino(self.current_tetromino) self.agent.set_current_board(self.board) #Remembers previous S,A,R,S if self.agent.check_game_over( ) and lines_cleared != -1: #Ends episode if previous turn was terminal #End of episode if self.agent.random_moves == False: self.agent.remember_state_action( self.agent.previous_state, self.agent.previous_action, -1000, self.agent.get_current_board(), True) self.agent.update_approximater() self.agent.reset_approximaters() self.gameover() else: #Continue episode as not in terminal state self.tetromino_placement = self.agent.make_move() if self.tetromino_placement == False: #Tetromino placed in state that causes a game over if self.agent.random_moves == False: #Tetromino placed in state that causes a game over self.agent.remember_state_action( self.agent.previous_state, self.agent.previous_action, -1000, self.agent.get_current_board(), True) self.agent.update_approximater() self.agent.reset_approximaters() self.gameover() else: #Tetromino placed in a non-terminal state. if self.agent.random_moves == False: self.agent.remember_state_action( self.agent.previous_state, self.agent.previous_action, reward, self.agent.get_current_board(), False) self.agent.update_approximater() self.agent.reset_approximaters() self.tetromino_position = (0, self.tetromino_placement[2]) for rotations in range(self.tetromino_placement[0]): self.request_rotation() def remove_lines(self): """ Removes lines from the board """ try: lines = [] for y in range(MATRIX_HEIGHT): #Checks if row if full, for each row line = (y, []) for x in range(MATRIX_WIDTH): if self.matrix[(y, x)]: line[1].append(x) if len(line[1]) == MATRIX_WIDTH: lines.append(y) for line in sorted(lines): #Moves lines down one row for x in range(MATRIX_WIDTH): self.matrix[(line, x)] = None for y in range(0, line + 1)[::-1]: for x in range(MATRIX_WIDTH): self.matrix[(y, x)] = self.matrix.get((y - 1, x), None) return len(lines) except: print("ERROR REMOVING LINES:\t DEBUG INFORMATION") print(self.tetromino_placement) print(self.board.board_representation) return -1 def blend(self, shape=None, position=None, matrix=None, shadow=False): """ Does `shape` at `position` fit in `matrix`? If so, return a new copy of `matrix` where all the squares of `shape` have been placed in `matrix`. Otherwise, return False. This method is often used simply as a test, for example to see if an action by the player is valid. It is also used in `self.draw_surface` to paint the falling tetromino and its shadow on the screen. """ if shape is None: shape = self.rotated() if position is None: position = self.tetromino_position copy = dict(self.matrix if matrix is None else matrix) posY, posX = position for x in range(posX, posX + len(shape)): for y in range(posY, posY + len(shape)): if (copy.get((y, x), False) is False and shape[y - posY][x - posX] # shape is outside the matrix or # coordinate is occupied by something else which isn't a shadow copy.get((y, x)) and shape[y - posY][x - posX] and copy[(y, x)][0] != 'shadow'): return False # Blend failed; `shape` at `position` breaks the matrix elif shape[y - posY][x - posX]: copy[(y, x)] = ('shadow', self.shadow_block) if shadow else ( 'block', self.tetromino_block) return copy def construct_surface_of_next_tetromino(self): """ Draws the image of the next tetromino """ shape = self.next_tetromino.shape surf = Surface((len(shape) * BLOCKSIZE, len(shape) * BLOCKSIZE), pygame.SRCALPHA, 32) for y in range(len(shape)): for x in range(len(shape)): if shape[y][x]: surf.blit(self.block(self.next_tetromino.color), (x * BLOCKSIZE, y * BLOCKSIZE)) return surf def create_board_representation(self): lines = [] for y in range(MATRIX_HEIGHT): #Checks if row if full, for each row line = (y, []) for x in range(MATRIX_WIDTH): if self.matrix[(y, x)]: line[1].append(1) else: line[1].append(0) lines.append(line[1]) board = [] for i in range(len(lines)): board.append(lines[i]) return board def serialize_agent(self): """ Serializes the agent. This saves the epsilon value, whether holes or height was used and the current ANN of the agent. """ agent_information = [ self.agent.epsilon, self.agent.holes, self.agent.height, self.agent.current_net ] handler = open(self.agent.file_path + ".obj", 'wb') pickle.dump(agent_information, handler) handler.close()
def gen(self): fh = open(self.package_name + ".sv", "w") fh.write(self.header.replace("file_name", self.package_name + ".sv")) fh.write("`ifndef _%s_\n" % (self.package_name.upper())) fh.write("`define _%s_\n" % (self.package_name.upper())) fh.write("\n") fh.write("package %s;\n" % (self.package_name)) fh.write(" import uvm_pkg::*;\n") fh.write("\n") fh.write(" `include \"%s.sv\"\n" % (self.defines_name)) fh.write(" `include \"%s.sv\"\n" % (self.config_name)) fh.write(" `include \"%s.sv\"\n" % (self.transaction_name)) fh.write(" `include \"%s.sv\"\n" % (self.config_name)) fh.write(" `include \"%s.sv\"\n" % (self.callback_name)) fh.write(" `include \"%s.sv\"\n" % (self.cov_callback_name)) fh.write(" `include \"%s.sv\"\n" % (self.master_driver_name)) fh.write(" `include \"%s.sv\"\n" % (self.master_sequencer_name)) fh.write(" `include \"%s.sv\"\n" % (self.master_sequence_name)) fh.write(" `include \"%s.sv\"\n" % (self.slave_driver_name)) fh.write(" `include \"%s.sv\"\n" % (self.slave_sequencer_name)) fh.write(" `include \"%s.sv\"\n" % (self.slave_sequence_name)) fh.write(" `include \"%s.sv\"\n" % (self.monitor_name)) fh.write(" `include \"%s.sv\"\n" % (self.master_agent_name)) fh.write(" `include \"%s.sv\"\n" % (self.slave_agent_name)) fh.write("\n") fh.write("endpackage: %s\n" % (self.package_name)) fh.write("\n") fh.write("`endif //_%s_\n" % (self.package_name.upper())) fh.close() #Generate agent components agent_defines = defines.defines(self.header, self.agent_setting) agent_defines.gen() agent_interface = interface.interface(self.header, self.agent_setting) agent_interface.gen() agent_cfg = cfg.cfg(self.header, self.agent_setting) agent_cfg.gen() agent_transaction = transaction.transaction(self.header, self.agent_setting) agent_transaction.gen() agent_sequencer = sequencer.sequencer(self.header, self.agent_setting) agent_sequencer.sequencer_gen() agent_sequence = sequence.sequence(self.header, self.agent_setting) agent_sequence.sequence_gen() agent_drv = driver.driver(self.header, self.agent_setting) agent_drv.master_driver_gen() agent_drv.slave_driver_gen() agent_mon = monitor.monitor(self.header, self.agent_setting) agent_mon.monitor_gen() agent_callback = callback.callback(self.header, self.agent_setting) agent_callback.callback_gen() agent_callback.cov_callback_gen() agent_agent = agent.agent(self.header, self.agent_setting) agent_agent.agent_gen()
def genesis(size): population = [agent() for _ in range(size)] population = genes(population) return population
from arguments import get_args from baselines.common.atari_wrappers import make_atari from baselines import bench from baselines import logger from baselines.common.atari_wrappers import wrap_deepmind from agent import agent import os if __name__ == '__main__': if not os.path.exists('logs/'): os.mkdir('logs/') envArgs = get_args() logAddress = 'logs/' + envArgs.env_name + '/' if not os.path.exists(logAddress): os.mkdir(logAddress) logger.configure(logAddress) # start to create the environment environment = make_atari(envArgs.env_name) environment = wrap_deepmind(environment, frame_stack=True) environment = bench.Monitor(environment, logger.get_dir()) # train the agent trainer = agent(environment, envArgs) trainer.learn() environment.close()
def init_agents(): for k in range(2, 26): soc = random.normal(g.average_soc, g.sd_average_soc, 1) g.agents[k] = agent(soc, k, g.time) g.agents[k].copy_all = g.copy_al
def __init__(self, name=None, discount=0.9, lr=None, alpha=1, policy_type='greedy', policy_param={ 'eps': 0.05, 'min_eps': 0.01, 'eps_decay': 0.9999 }, env=hb, suits='rgbyp', players=3, mode='standard', hidden_layers=[200, 200, 200, 150, 100], batch_size=512, l1=0, optimizer='adagrad', mem_size=2000, max_steps=130, plot_frequency=1, discrete_agents=True, Double_DQN_version=1, accelerated=True, games_per_epoch=100): self.name = name self.weights_dir = model_directory # if self.name == None: # date = str(time.strftime('%m%d-%H%M')) # self.name = f'{date}-{mode}-{suits}' if self.name != None: self.model_file = os.path.join(self.weights_dir, self.name + '.h5') self.env = hb.hanabi_env(players, suits, mode) self.iterations_done = 0 self.gamma = discount self.learning_rate = lr self.alpha = alpha self.max_steps = max_steps self.policy_param = policy_param self.hidden_layers = hidden_layers self.discrete_agents = discrete_agents self.epoch = 0 self.epoch_size = games_per_epoch self.epoch_history = {} self.epoch_history['steps'] = [] self.epoch_history['rewards'] = [] self.epoch_history['discounted_rewards'] = [] self.epoch_history['rps'] = [] self.epoch_history['loss'] = [] self.batch_size = batch_size self.plot_frequency = plot_frequency self.suits = suits self.mem_size = mem_size self.players = players self.mode = mode self.l1 = l1 self.Double_DQN_version = Double_DQN_version self.optimizer = get_optimizer(optimizer, lr) self.action_map = self._create_action_map() self.action_space = len(self.action_map) self.action_totals = [0] * self.action_space self.accelerated = accelerated move_func = self._create_valid_moves_function() self.policy = BehaviorPolicy(self.action_space, move_func, policy_type=policy_type, param=policy_param) if self.name != None and os.path.exists(self.model_file): self.online_model = models.load_model(self.model_file) self.target_model = models.load_model(self.model_file) self.target_model.name = 'target_' + self.target_model.name else: self.online_model = create_Q_model(self.env, self.action_space, self.optimizer, self.hidden_layers, self.learning_rate, self.l1, 'online_model') self.online_model.name = 'online_model' self.target_model = create_Q_model(self.env, self.action_space, self.optimizer, self.hidden_layers, self.learning_rate, self.l1, 'target_model') self.target_model.name = 'target_model' self._freeze_target_model() if self.accelerated: self.training_model = training_strategy.build_accelerated_model( self.Double_DQN_version, self.env.get_input_dim(), self.online_model, self.target_model, self.batch_size * self.players, self.optimizer, self.learning_rate, self.gamma) self._update_online_model = training_strategy.get_accelerated_update_strategy( self.action_space, training_model=self.training_model, ) else: self._update_online_model = training_strategy.get_CPU_update_strategy( alpha, self.gamma, Double_DQN_version, self.online_model, self.target_model) self.player = [] for playerID in range(self.players): self.player.append( agent(self.env, self.online_model, self.policy.choose_action, self.mem_size, self.action_map, playerID))
def fuzz(self, this_node=None, path=[]): ''' Call this routine to get the ball rolling. No arguments are necessary as they are both utilized internally during the recursive traversal of the session graph. @type this_node: request (node) @param this_node: (Optional, def=None) Current node that is being fuzzed. @type path: List @param path: (Optional, def=[]) Nodes along the path to the current one. ''' # if no node is specified, we start from root and initialize the session. if not this_node: # we can't fuzz if we don't have at least one target and one request. if not self.transport_media.media_target(): self.database.log("error", "no target specified for job %s" %\ self.session_id) return if not self.edges_from(self.root.id): self.database.log("error", "no request specified for job %s" %\ self.session_id) return this_node = self.root self.total_mutant_index = 0 self.total_num_mutations = self.num_mutations() # If no errors above and not already connected to the agent, initialize the # agent connection. # If the agent cannot be initialized make sure the user is aware of it. if self.agent == None and self.agent_settings != None: try: self.agent = agent(self.root_dir, self.config, self.session_id, self.agent_settings) self.agent.connect() except Exception, ex: self.database.log("error", "failed to establish agent connection for job %s" %\ self.session_id, str(ex)) self.finished_flag = True self.stop_flag = True self.save_status() return # Get the agent to execute try: self.agent.start() except Exception, ex: self.database.log("error", "agent failed to execute command for job %s" %\ self.session_id, str(ex)) self.finished_flag = True self.stop_flag = True self.save_status() return
# # MAC0425/5730 - Inteligencia Artificial - EP1 @ 2013.2 # Autor: Bruno Nunes Leal Faria - nUSP: 8765551 # # FILE: environment.py # import agent import search import time a = agent.agent() # enviroment class # @map - multi dimensional array to hold mine map # @agent - search type to execute class environment: def __init__(self): self.map = [[]] self.graph = {} self.size = 0 self.gold_count = 0 self.gold_left = [] self.gold_locations = [] self.search_type = None # creates matrix from file def create_matrix(self, stream): x = 0 y = 0 # read dimension c = stream.readline()
def __init__(self, inputAgentSigmaNoise=0.1): pygame.init() # RGB color self.__white = (255, 255, 255) self.__black = (0, 0, 0) self.__red = (255, 0, 0) self.__green = (0, 155, 0) self.__blue = (0, 0, 255) # give the game a title pygame.display.set_caption("Keepaway") self.keeperScore = 0 # these are more or less global variables.. # I'm not sure if this is bad or not. self.__worldImage = pygame.image.load("images/soccer_field.png") self.__ballImage = pygame.image.load("images/ball.png") self.__keeperImage = pygame.image.load("images/keeper.png") self.__takerImage = pygame.image.load("images/taker.png") self.__predictedImage = pygame.image.load("images/x.png") self.__debugYellowDotImage = pygame.image.load("images/yellow_dot.png") self.__debugRedDotImage = pygame.image.load("images/red_dot.png") # block sizes are used for collision detection # only 1 size per element because all blocks are squares. block size = side length self.__agent_block_size = 23 self.ball_block_size = 12 self.maxBallSpeed = 4 self.maxPlayerSpeed = 2 # dimensions of the game are the same as the soccer field image self.__display_width = 550 self.display_height = 357 self.__field_center = (self.__display_width / 2, self.display_height / 2) # gameDisplay is a pygame.surface object. it's your screen self.gameDisplay = pygame.display.set_mode((self.__display_width, self.display_height)) self.test_fps = 60 self.train_fps = 10000 self.clock = pygame.time.Clock() # start the ball kinda close to the keeper in the upper left corner self.fieldBall = ball.ball((self.__field_center[0] / 4, self.__field_center[1] / 4), self.maxBallSpeed) # setup all the initial keepers and takers. They are all starting at different field positions, which is why # you can't have a for loop just iterate and declare all of them types = ["keeper", "taker"] self.agentSigmaError = inputAgentSigmaNoise self.keeperArray = [] self.keeperTruePosArray = [] self.keeperTruePosArray.append((12.5, 12.5)) self.keeperTruePosArray.append((25, self.__display_width - 37.5)) self.keeperTruePosArray.append((self.display_height - 37.5, self.__display_width - 37.5)) self.keeperArray.append( agent.agent( self, 0, kUtil.getNoisyVals(self.keeperTruePosArray[0], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed, ) ) self.keeperArray.append( agent.agent( self, 1, kUtil.getNoisyVals(self.keeperTruePosArray[1], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed, ) ) self.keeperArray.append( agent.agent( self, 2, kUtil.getNoisyVals(self.keeperTruePosArray[2], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed, ) ) self.takerArray = [] self.takerTruePosArray = [] self.takerTruePosArray.append((self.display_height - 25, 25)) self.takerTruePosArray.append((self.display_height - 37.5, 50)) self.takerArray.append( agent.agent( self, 0, self.takerTruePosArray[0], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed, ) ) self.takerArray.append( agent.agent( self, 1, self.takerTruePosArray[1], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed, ) ) # 3 different font sizes self.smallfont = pygame.font.SysFont("comicsansms", 25) # 25 is font sizes self.medfont = pygame.font.SysFont("comicsansms", 50) self.largefont = pygame.font.SysFont("comicsansms", 80) self.verysmallfont = pygame.font.SysFont("comicsansms", 12)
from grid import grid from gridQController import gridQController from agent import agent import matplotlib.pyplot as plt gridSize = 5 grid = grid(gridSize) controller = gridQController(gridSize) agentSmith = agent(grid, controller) iterations = 200000 rewards = [0] * iterations for i in range(0, iterations): agentSmith.step() if i > 100000: controller.setGreed(1) rewards[i] = agentSmith.getReward() plt.plot(rewards) plt.show() print("Total reward: " + str(agentSmith.getReward()) + " Iterations: " + str(iterations) + " Success rate: " + str(agentSmith.getReward() / iterations))
def __init__(self, phone_number, pin): self.agent = agent() self.agent.autho(phone_number, pin) self.steps = []
def __init__(self, inputAgentSigmaNoise = .1, alreadyTrained = True, bevCustomTileSize = None): pygame.init() #RGB color self.__white = (255,255,255) self.__black = (0,0,0) self.__red = (255,0,0) self.__green = (0,155,0) self.__blue = (0,0,255) #give the game a title pygame.display.set_caption('Keepaway') self.keeperScore = 0 #these are more or less global variables.. #I'm not sure if this is bad or not. self.__worldImage = pygame.image.load('images/soccer_field.png') self.__ballImage = pygame.image.load('images/ball.png') self.__keeperImage = pygame.image.load('images/keeper.png') self.__keeperGoldImage = pygame.image.load('images/keeperGold.png') self.__takerImage = pygame.image.load('images/taker.png') self.__predictedImage = pygame.image.load('images/x.png') self.__debugYellowDotImage = pygame.image.load('images/yellow_dot.png') self.__debugRedDotImage = pygame.image.load('images/red_dot.png') self.__debugBlackDotImage = pygame.image.load('images/black_dot.png') self.__debugWhiteDotImage = pygame.image.load('images/white_dot.png') self.__debugBlueDotImage = pygame.image.load('images/blue_dot.png') self.__debugTakerPathTile = pygame.image.load('images/takerPathSquare.png') self.__debugKeeperPathTile = pygame.image.load('images/keeperPathSquare.png') self.__debugKeeperTile = pygame.image.load('images/keeperSquare.png') self.__debugTakerTile = pygame.image.load('images/takerSquare.png') self.__debugEmptyTile = pygame.image.load('images/emptySquare.png') self.__debugTakerPathTileTwo = pygame.image.load('images/takerPathSquare2.png') self.__debugKeeperPathTileTwo = pygame.image.load('images/keeperPathSquare2.png') #block sizes are used for collision detection #only 1 size per element because all blocks are squares. block size = side length self.__agent_block_size = 23 self.__ball_block_size = 12 self.maxBallSpeed= 4 self.maxPlayerSpeed = 2 #self.rDecision = None #dimensions of the game are the same as the soccer field image self.__display_width = 550 self.__display_height = 357 self.displayGraphics = True self.__field_center = (self.__display_width / 2 , self.__display_height / 2) #gameDisplay is a pygame.surface object. it's your screen self.gameDisplay = pygame.display.set_mode((self.__display_width,self.__display_height)) self.test_fps = 60 self.train_fps = 10000 self.clock = pygame.time.Clock() #start the ball kinda close to the keeper in the upper left corner self.fieldBall = ball.ball( (self.__field_center[0]/4, self.__field_center[1]/4), self.maxBallSpeed) #the simple state variables for agents like NEAT, novelty search, and maybe sarsa self.simpleStateVars = None self.alreadyTrained = alreadyTrained #False if you want agent to learn and True if you want to demo #setup all the initial keepers and takers. They are all starting at different field positions, which is why #you can't have a for loop just iterate and declare all of them types = ["keeper", "taker"] self.agentSigmaError = inputAgentSigmaNoise self.keeperArray = [] self.keeperTruePosArray = [] self.keeperTruePosArray.append((12.5, 12.5)) self.keeperTruePosArray.append((25, self.__display_width - 37.5)) self.keeperTruePosArray.append((self.__display_height - 37.5, self.__display_width - 37.5)) self.keeperArray.append(agent.agent(self, 0, kUtil.getNoisyVals( self.keeperTruePosArray[0], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed)) self.keeperArray.append(agent.agent(self, 1, kUtil.getNoisyVals( self.keeperTruePosArray[1], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed)) self.keeperArray.append(agent.agent(self, 2, kUtil.getNoisyVals( self.keeperTruePosArray[2], self.agentSigmaError), self.agentSigmaError, types[0], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed)) self.takerArray = [] self.takerTruePosArray = [] self.takerTruePosArray.append((self.__display_height - 25, 25)) self.takerTruePosArray.append((self.__display_height - 37.5, 50)) self.takerArray.append(agent.agent(self, 0, self.takerTruePosArray[0], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed)) self.takerArray.append(agent.agent(self, 1, self.takerTruePosArray[1], self.agentSigmaError, types[1], kUtil.getNoisyVals(self.fieldBall.trueBallPos, self.agentSigmaError), self.maxPlayerSpeed, self.maxBallSpeed)) #3 different font sizes self.smallfont = pygame.font.SysFont("comicsansms",25) #25 is font sizes self.medfont = pygame.font.SysFont("comicsansms",50) self.largefont = pygame.font.SysFont("comicsansms",80) self.verysmallfont = pygame.font.SysFont("comicsansms", 12) #birdsEyeView generator for agents like hyperNEAT: if bevCustomTileSize == None: bevCustomTileSize = self.__agent_block_size self.bev = birdsEyeView.birdsEyeView(self.__display_width, self.__display_height, bevCustomTileSize, self.__ball_block_size ) self.bev_grid_as_grid = self.bev.getBirdsEyeView(self.keeperArray, self.takerArray); self.bev_grid_as_list = self.bev.getBirdsEyeViewAsList(self.keeperArray, self.takerArray); self.bev_substrate = self.bev.getSubstrate(self.keeperArray, self.takerArray); self.bev_keeper_sub_index = self.bev.getBallHolderTile(self.keeperArray)
sigmaM = 10. sigmaN = 1. # time length for ticks per time period deltaT = 100 # holder list for agent objects agentList = [] # price, return, and volume time series price = pf * np.ones(Tmax + 1) ret = np.zeros(Tmax + 1) totalV = np.zeros(Tmax + 1) rprice = np.zeros((Tmax + 1) / 100) # create agents in list of objects for i in range(nAgents): agentList.append(agent(sigmaF, sigmaM, sigmaN, kMax, Lmin, Lmax)) # create set of forecastSet = forecasts(Lmax, pf, sigmae) # create order book marketBook = orderBook(600., 1400., deltaP) # set up initial prices price[0:Tinit] = pf * (1. + 0.001 * np.random.randn(Tinit)) ret[0:Tinit] = 0.001 * np.random.randn(Tinit) for t in range(Tinit, Tmax): # update all forecasts forecastSet.updateForecasts(t, price[t], ret) tradePrice = -1 # draw random agent randomAgent = agentList[np.random.randint(1, nAgents)]
def agent_ip(): agentout = open('agent_ip.txt','w') ag = agent() ag.get_ip(agentout) agentout.close()
parser.add_argument("--exploration", type=float, default=0.2) parser.add_argument("--save_freq", type=int, default=100) parser.add_argument("--save_folder", type=str, default="model") parser.add_argument("--reload", type=str, default=None) args = parser.parse_args() gmm = gameMgr.tetris(20, 10) epoch = 0 write_epoch = 100 reward_history = collections.deque(maxlen=1000) loss_history = collections.deque(maxlen=1000) agt = agent.agent(gmm.getActionList(), gmm.getStateSize(), n_batch=args.batch_size, replay_size=args.replay_size, learning_rate=args.learn_rate, discountRate=args.discount_rate, saveFreq=args.save_freq, saveFolder=args.save_folder, memoryLimit=args.memory_limit) if args.reload: agt.load(args.reload) fig = plt.figure(figsize=(gmm.getScreenSize()[0], gmm.getScreenSize()[1])) fig.canvas.set_window_title("TeTris") setFile = file(os.path.join(args.save_folder, "settings.dat"), "w") setFile.write(str(args)) setFile.close() logFile = file(os.path.join(args.save_folder, "log.dat"), "w") logCSV = csv.writer(logFile) logCSV.writerow([ "epoch", "last_loss", "loss_mean", "last_reward", "mean_reward",
def parameter_camp_test(parameter_list): """ This function should take a camp ID, train an agent for that specific campaign and then test the agent for that campaign. We start by defining the hyper-parameters. It (currently) takes the whole campaign as an episode. """ epsilon_max = 0.9 epsilon_min = 0.05 discount_factor = 1 batch_size = 32 memory_cap = 100000 update_frequency = 100 episode_length = 96 camp_id = parameter_list[0] budget_scaling = parameter_list[1] initial_Lambda = parameter_list[2] epsilon_decay_rate = parameter_list[3] budget_init_var = parameter_list[4] * budget_scaling step_length = parameter_list[5] learning_rate = parameter_list[6] seed = parameter_list[7] action_size = 7 state_size = 5 tf.reset_default_graph() np.random.seed(seed) tf.set_random_seed(seed) sess = tf.Session() rtb_agent = agent(epsilon_max, epsilon_min, epsilon_decay_rate, discount_factor, batch_size, memory_cap, state_size, action_size, learning_rate, sess) camp_n = [ '1458', '2259', '2997', '2821', '3358', '2261', '3386', '3427', '3476' ] train_file_dict, test_file_dict = get_data(camp_n) test_file_dict = test_file_dict[camp_id] total_budget = 0 total_impressions = 0 global_step_counter = 0 for i in camp_n: rtb_environment = RTB_environment(train_file_dict[i], episode_length, step_length) total_budget += train_file_dict[i]['budget'] total_impressions += train_file_dict[i]['imp'] while rtb_environment.data_count > 0: episode_size = min(episode_length * step_length, rtb_environment.data_count) budget = train_file_dict[i]['budget'] * min(rtb_environment.data_count, episode_size) \ / train_file_dict[i]['imp'] * budget_scaling budget = np.random.normal(budget, budget_init_var) state, reward, termination = rtb_environment.reset( budget, initial_Lambda) while not termination: action, _, _ = rtb_agent.action(state) next_state, reward, termination = rtb_environment.step(action) memory_sample = (action, state, reward, next_state, termination) rtb_agent.replay_memory.store_sample(memory_sample) rtb_agent.q_learning() if global_step_counter % update_frequency == 0: rtb_agent.target_network_update() rtb_agent.e_greedy_policy.epsilon_update(global_step_counter) state = next_state global_step_counter += 1 epsilon = rtb_agent.e_greedy_policy.epsilon budget = total_budget / total_impressions * test_file_dict[ 'imp'] * budget_scaling imp, click, cost, wr, ecpc, ecpi, camp_info = drlb_test( test_file_dict, budget, initial_Lambda, rtb_agent, episode_length, step_length) sess.close() lin_bid_result = list( lin_bidding_test(train_file_dict[camp_id], test_file_dict, budget, 'historical')) rand_bid_result = list( rand_bidding_test(train_file_dict[camp_id], test_file_dict, budget, 'uniform')) result_dict = { 'camp_id': camp_id, 'parameters': parameter_list[1:], 'epsilon': epsilon, 'total budget': budget, 'auctions': test_file_dict['imp'], 'camp_result': np.array([imp, click, cost, wr, ecpc, ecpi]).tolist(), 'budget': camp_info[0], 'lambda': camp_info[1], 'unimod': camp_info[2], 'action values': camp_info[3], 'lin_bid_result': lin_bid_result, 'rand_bid_result': rand_bid_result } return result_dict
SAVE_FREQ = 50 # episodes between saving the network SAVE_PATH = "ai.data" MIN_NO_OPS = 4 # lower limit of the no-ops inserted at the start of each episode MAX_NO_OPS = 30 # upper limit of the no-ops inserted at the start of each episode """ The main training loop. A random number of no-ops is inserted at the start of each episode. The DQN is saved periodically. """ def training_loop(env, ai, total_frames): while(env.frame_count < total_frames): t0 = time.time() no_ops = random.randrange(MIN_NO_OPS, MAX_NO_OPS + 1) reward = env.run_episode(ai, no_ops) print(env.episode, env.frame_count, reward, time.time()-t0) if (env.episode % SAVE_FREQ) == SAVE_FREQ - 1: ai.save(SAVE_PATH) # Running the training env = enviroment(GAME) ai = agent(env.n_actions) training_loop(env, ai, TOTAL_FRAMES)
if gpus: try: # Pour avoir 2 tensorflow en meme temps ( tensorboard & model ) for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) #Liste GPU logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: print(e) # Creation de l'agent et de l'environnement agent = agent(MODEL_NAME, RESTART) env = environement(IP, PORT, S_PER_EPISODE) env.startClient() # On lance le thread de training trainer_thread = Thread(target=agent.train_in_loop, daemon=True) trainer_thread.start() #On attend que le thread soit OK while not agent.training_initialized: time.sleep(0.01) #On genere une prediction, car la 1ere est toujours longue print(agent.get_qs(np.ones((1, 16))))
def get_agent(features, players, args): agent = ag.agent(features, players) if args.load: agent.model = load_model(args.load) return agent
def run(rule, attacker, epochs): torch.manual_seed(1) start_time = time.time() N = 10 N1 = 5 tr_split_len1 = 2000 te_split_len1 = 400 tr_split_len2 = 2000 te_split_len2 = 400 A = [] train_data1, test_data1 = readData_mnist() train_data2, test_data2 = readData_synthetic_digits() remaining_tr, remaining_te = train_data2, test_data2 Parameters = [] # attacker_num = 2 # attacker = [2, 7] attacker_num = len(attacker) # Accumulated_Loss = np.zeros((N, N)) Accumulated_Loss = np.ones((N, N)) average_train_loss, average_train_acc = [], [] average_test_loss, average_test_acc = [], [] individual_average_train_loss, individual_average_train_acc = np.zeros( (epochs, N)), np.zeros((epochs, N)) individual_average_test_loss, individual_average_test_acc = np.zeros( (epochs, N)), np.zeros((epochs, N)) for k in range(0, N): net = Net().to(device) # print(net) # summary(net, (1,28,28), batch_size=-1) a = agent(net) A.append(a) Parameters.append({}) for name, param in a.net.named_parameters(): if param.requires_grad: Parameters[k][name] = param.data for epoch in range(epochs): print('epoch {}'.format(epoch + 1)) Train_loader_iter = [] Test_loader = [] total_train_loss = 0. total_train_acc = 0. total_eval_loss = 0. total_eval_acc = 0. remaining_tr, remaining_te = train_data2, test_data2 Count = np.zeros((N, )) ave_train_loss = 0. ave_train_acc = 0. ave_eval_loss = 0. ave_eval_acc = 0. nanCount = 0 for k in range(0, N): a = A[k] a.train_loss = 0. a.train_acc = 0. if k < N1: train_loader_no, test_loader_no = generateData_mnist( train_data1, test_data1, tr_split_len1, te_split_len1, k) else: train_loader_no, test_loader_no, remaining_tr, remaining_te = generateData_synthetic_digits( remaining_tr, remaining_te, tr_split_len2, te_split_len2) Train_loader_iter.append(iter(train_loader_no)) Test_loader.append(test_loader_no) # for iteration in range(0, tr_split_len//64): # for k in range(0, N): # training----------------------------- try: while True: A_last = deepcopy(A) Batch_X, Batch_Y = {}, {} for k in range(0, N): batch_x, batch_y = next(Train_loader_iter[k]) Batch_X[k] = batch_x.to(device) Batch_Y[k] = batch_y.to(device) if k in attacker: continue # 5 agents, get access to 1, 1/2, 1/3, 1/5, 1/10 data, so their models have different accuracy if k % 5 == 0: if random.randint(0, 1) in [0]: continue if k % 5 == 1: if random.randint(0, 2) in [0, 1]: continue if k % 5 in [2, 3]: if random.randint(0, 3) in [0, 1, 2]: continue # if k % 5 == 3: # if random.randint(0, 9) in [0,1,2,3,4,5,6,7,8]: # continue a = A[k] loss, acc = a.optimize(batch_x.to(device), batch_y.to(device)) total_train_loss += loss total_train_acc += acc Count[k] += len(batch_x) A, Accumulated_Loss = cooperation(A, A_last, Batch_X, Batch_Y, Accumulated_Loss, rule, attacker) # print(Accumulated_Loss) except StopIteration: # print(iteration) Eval_count = np.zeros((N, )) for k in range(0, N): if k in attacker: continue print('Agent: {:d}, Train Loss: {:.6f}, Acc: {:.6f}'.format( k, A[k].train_loss / Count[k], A[k].train_acc / Count[k])) individual_average_train_loss[epoch, k] = A[k].train_loss / Count[k] individual_average_train_acc[epoch, k] = A[k].train_acc / Count[k] if not (math.isnan(A[k].train_loss / Count[k]) or math.isnan(A[k].train_acc / Count[k])): ave_train_loss += A[k].train_loss / Count[k] ave_train_acc += A[k].train_acc / Count[k] else: nanCount += 1 # evaluation-------------------------------- A[k].net.eval() eval_loss = 0. eval_acc = 0. for batch_x, batch_y in Test_loader[k]: batch_x, batch_y = Variable( batch_x, volatile=True).to(device), Variable( batch_y, volatile=True).to(device) out = A[k].net(batch_x) loss_func = torch.nn.CrossEntropyLoss() loss = loss_func(out, batch_y) eval_loss += loss.item() total_eval_loss += loss.item() pred = torch.max(out, 1)[1] num_correct = (pred == batch_y).sum() eval_acc += num_correct.item() total_eval_acc += num_correct.item() Eval_count[k] += len(batch_x) if not (math.isnan(eval_loss / Eval_count[k]) or math.isnan(eval_acc / Eval_count[k])): ave_eval_loss += eval_loss / Eval_count[k] ave_eval_acc += eval_acc / Eval_count[k] print('Agent: {:d}, Test Loss: {:.6f}, Acc: {:.6f}'.format( k, eval_loss / Eval_count[k], eval_acc / Eval_count[k])) individual_average_test_loss[epoch, k] = eval_loss / Eval_count[k] individual_average_test_acc[epoch, k] = eval_acc / Eval_count[k] # print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format(total_train_loss / sum(Count), # total_train_acc / sum(Count))) # average_train_loss.append(total_train_loss / sum(Count)) # average_train_acc.append(total_train_acc / sum(Count)) # print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format(total_eval_loss / sum(Eval_count), # total_eval_acc / sum(Eval_count))) # # print('Training time by far: {:.2f}s'.format(time.time() - start_time)) # average_test_loss.append(total_eval_loss / sum(Eval_count)) # average_test_acc.append(total_eval_acc / sum(Eval_count)) print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format( ave_train_loss / (N - nanCount - attacker_num), ave_train_acc / (N - nanCount - attacker_num))) average_train_loss.append(ave_train_loss / (N - nanCount - attacker_num)) average_train_acc.append(ave_train_acc / (N - nanCount - attacker_num)) print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format( ave_eval_loss / (N - attacker_num), ave_eval_acc / (N - attacker_num))) print('Training time by far: {:.2f}s'.format(time.time() - start_time)) average_test_loss.append(ave_eval_loss / (N - attacker_num)) average_test_acc.append(ave_eval_acc / (N - attacker_num)) if epoch % 10 == 0 or epoch == epochs - 1: if attacker_num == 0: try: os.makedirs("results") except OSError: print("Creation of the directory %s failed") np.save('results/average_train_loss_%s.npy' % rule, average_train_loss) np.save('results/average_train_acc_%s.npy' % rule, average_train_acc) np.save('results/average_test_loss_%s.npy' % rule, average_test_loss) np.save('results/average_test_acc_%s.npy' % rule, average_test_acc) np.save('results/individual_average_train_loss_%s.npy' % rule, individual_average_train_loss) np.save('results/individual_average_train_acc_%s.npy' % rule, individual_average_train_acc) np.save('results/individual_average_test_loss_%s.npy' % rule, individual_average_test_loss) np.save('results/individual_average_test_acc_%s.npy' % rule, individual_average_test_acc) else: try: os.makedirs("results/attacked/%d" % attacker_num) except OSError: print("Creation of the directory %s failed") np.save( 'results/attacked/%d/average_train_loss_%s.npy' % (attacker_num, rule), average_train_loss) np.save( 'results/attacked/%d/average_train_acc_%s.npy' % (attacker_num, rule), average_train_acc) np.save( 'results/attacked/%d/average_test_loss_%s.npy' % (attacker_num, rule), average_test_loss) np.save( 'results/attacked/%d/average_test_acc_%s.npy' % (attacker_num, rule), average_test_acc) np.save( 'results/attacked/%d/individual_average_train_loss_%s.npy' % (attacker_num, rule), individual_average_train_loss) np.save( 'results/attacked/%d/individual_average_train_acc_%s.npy' % (attacker_num, rule), individual_average_train_acc) np.save( 'results/attacked/%d/individual_average_test_loss_%s.npy' % (attacker_num, rule), individual_average_test_loss) np.save( 'results/attacked/%d/individual_average_test_acc_%s.npy' % (attacker_num, rule), individual_average_test_acc)
import pygame from Grid import Grid from consts import SCREEN_SIZE import agent pygame.init() pygame.display.set_caption('2048') pygame.font.init() clock = pygame.time.Clock() clock.tick(60) screen = pygame.display.set_mode([SCREEN_SIZE, SCREEN_SIZE]) screen.fill((127, 127, 127)) g = Grid(pygame, screen) a = agent.agent() running = True agent_playing = False g.render() while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False if event.type == pygame.KEYDOWN: if event.key == pygame.K_i: agent_playing = not agent_playing g.reset(hardreset=True) break if event.key == pygame.K_r: g.reset()
def test_angleAgent(self): a1 = agent.agent((0.010631645330612073, 5.000750148780534), self.unitTestSigma, "Keeper", (0, 0)) a2 = agent.agent((-0.008793653992994898, -0.0003569779220770502), self.unitTestSigma, "Taker", (0, 0)) a3 = agent.agent((5.000443882611892, -0.017223221164217175), self.unitTestSigma, "Keeper", (0, 0)) self.assertAlmostEqual(__getCosAngle(a1, a2, a3), 0, 1)
'cin': pChans[6].reader(), 'cout': pChans[7].writer(), 'cnote': pChans[8].writer(), 'items': []}) pObjects.append({ 'name': 'Agent #2', 'cin': pChans[9].reader(), 'cout': pChans[10].writer(), 'cnote': pChans[11].writer(), 'items': []}) # Initialize client/player processes pAgents = [] pAgents.append(player.player(-pChans[0], +pChans[1], +pChans[2])) pAgents.append(agent.agent(-pChans[3], +pChans[4], +pChans[5], 'Agent #0')) pAgents.append(agent.agent(-pChans[6], +pChans[7], +pChans[8], 'Agent #1')) pAgents.append(agent.agent(-pChans[9], +pChans[10], +pChans[11], 'Agent #2')) ##### Initialize world # Room channels rChans = Channel() * 4 # Room layout: # 0 1 # 3 2 # Room 0 # Add all player objects rooms = [] rooms.append(room.room(
def run(rule, attacker, epochs): torch.manual_seed(0) start_time = time.time() N = 30 A = [] batch_size = 10 train_data, test_data = readData() Parameters = [] attacker_num = len(attacker) # Accumulated_Loss = np.zeros((N, N)) Accumulated_Loss = np.ones((N, N)) middle1_neurons = 50 Train_loader, Test_loader = [], [] Val_loader_iter = [] Val_loader = [] average_train_loss, average_train_acc = [], [] average_test_loss, average_test_acc = [], [] individual_average_train_loss, individual_average_train_acc = np.zeros( (epochs, N)), np.zeros((epochs, N)) individual_average_test_loss, individual_average_test_acc = np.zeros( (epochs, N)), np.zeros((epochs, N)) for k in range(0, N): # net = Net(n_feature=561, n_hidden1=middle1_neurons, n_output=6) net = linearRegression(561, 6) a = agent(net) A.append(a) train_loader_no, val_loader_no, test_loader_no = generateData( train_data, test_data, k + 1, batch_size) Train_loader.append(train_loader_no) Test_loader.append(test_loader_no) Val_loader.append(val_loader_no) Val_loader_iter.append(iter(val_loader_no)) for epoch in range(epochs): print('epoch {}'.format(epoch + 1)) Train_loader_iter = [] total_train_loss = 0. total_train_acc = 0. total_eval_loss = 0. total_eval_acc = 0. Count = np.zeros((N, )) ave_train_loss = 0. ave_train_acc = 0. ave_eval_loss = 0. ave_eval_acc = 0. nanCount = 0 for k in range(0, N): a = A[k] a.train_loss = 0. a.train_acc = 0. Train_loader_iter.append(iter(Train_loader[k])) try: while True: A_last = deepcopy(A) Batch_X, Batch_Y = {}, {} for k in range(0, N): # if k in attacker: # continue batch_x, batch_y = Train_loader_iter[k].next() Batch_X[k] = batch_x Batch_Y[k] = batch_y # only process 1/10 data for 1/3 of agents if k % 3 == 0: if random.randint( 0, 10) in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: continue # if k % 3 == 0: # train_loader = Train_loader_iter[k].next() # batch_x, batch_y = (train_loader[0]).narrow(0,0,1), (train_loader[1]).narrow(0,0,1) # else: # batch_x, batch_y = Train_loader_iter[k].next() # # Batch_X.append(batch_x) # Batch_Y.append(batch_y) # if k % 3 == 0: # if random.randint(0, 5) == 1: # pass # batch_x = gaussian(batch_x, 5, 5) # batch_y = torch.LongTensor(np.random.randint(6, size=batch_size)) # if random.randint(0, 2) == 1: # batch_y = torch.LongTensor(np.random.randint(6, size=batch_size)) # if (k+1) % 5 == 0: # try: # batch_x, batch_y = Train_loader_iter[k].next() # except: # Train_loader_iter[k] = iter(Train_loader[k]) # batch_x, batch_y = Train_loader_iter[k].next() # else: # batch_x, batch_y = Train_loader_iter[k].next() a = A[k] loss, acc = a.optimize(batch_x, batch_y) if math.isnan(loss) or math.isnan(acc): continue total_train_acc += acc # try: # val_x, val_y = Val_loader_iter[k].next() # except: # Val_loader_iter[k] = iter(Val_loader[k]) # val_x, val_y = Val_loader_iter[k].next() # Batch_X.append(val_x) # Batch_Y.append(val_y) Count[k] += len(batch_x) A, Accumulated_Loss = cooperation(A, A_last, Batch_X, Batch_Y, Accumulated_Loss, rule, attacker) # print(Accumulated_Loss) except StopIteration: # print(iteration) Eval_count = np.zeros((N, )) for k in range(0, N): if k in attacker: continue print('Agent: {:d}, Train Loss: {:.6f}, Acc: {:.6f}'.format( k, A[k].train_loss / Count[k], A[k].train_acc / Count[k])) individual_average_train_loss[epoch, k] = A[k].train_loss / Count[k] individual_average_train_acc[epoch, k] = A[k].train_acc / Count[k] if not (math.isnan(A[k].train_loss / Count[k]) or math.isnan(A[k].train_acc / Count[k])): ave_train_loss += A[k].train_loss / Count[k] ave_train_acc += A[k].train_acc / Count[k] else: nanCount += 1 # evaluation-------------------------------- A[k].net.eval() eval_loss = 0. eval_acc = 0. for batch_x, batch_y in Test_loader[k]: batch_x, batch_y = Variable( batch_x, volatile=True), Variable(batch_y, volatile=True) out = A[k].net(batch_x) loss_func = torch.nn.CrossEntropyLoss() loss = loss_func(out, batch_y) pred = torch.max(out, 1)[1] num_correct = (pred == batch_y).sum() if math.isnan(loss) or math.isnan(num_correct): continue eval_loss += loss.item() eval_acc += num_correct.item() total_eval_loss += loss.item() total_eval_acc += num_correct.item() Eval_count[k] += len(batch_x) if not (math.isnan(eval_loss / Eval_count[k]) or math.isnan(eval_acc / Eval_count[k])): ave_eval_loss += eval_loss / Eval_count[k] ave_eval_acc += eval_acc / Eval_count[k] print('Agent: {:d}, Test Loss: {:.6f}, Acc: {:.6f}'.format( k, eval_loss / Eval_count[k], eval_acc / Eval_count[k])) individual_average_test_loss[epoch, k] = eval_loss / Eval_count[k] individual_average_test_acc[epoch, k] = eval_acc / Eval_count[k] try: print('Total Average Train Loss: {:.6f}, Train Acc: {:.6f}'.format( ave_train_loss / (N - nanCount - attacker_num), ave_train_acc / (N - nanCount - attacker_num))) average_train_loss.append(ave_train_loss / (N - nanCount - attacker_num)) average_train_acc.append(ave_train_acc / (N - nanCount - attacker_num)) print('Total Average Test Loss: {:.6f}, Test Acc: {:.6f}'.format( ave_eval_loss / (N - attacker_num), ave_eval_acc / (N - attacker_num))) except: pass print('Training time by far: {:.2f}s'.format(time.time() - start_time)) average_test_loss.append(ave_eval_loss / (N - attacker_num)) average_test_acc.append(ave_eval_acc / (N - attacker_num)) if epoch % 10 == 0 or epoch == epochs - 1: if attacker_num == 0: try: os.makedirs("results") except OSError: print("Creation of the directory %s failed") np.save('results/average_train_loss_%s.npy' % rule, average_train_loss) np.save('results/average_train_acc_%s.npy' % rule, average_train_acc) np.save('results/average_test_loss_%s.npy' % rule, average_test_loss) np.save('results/average_test_acc_%s.npy' % rule, average_test_acc) np.save('results/individual_average_train_loss_%s.npy' % rule, individual_average_train_loss) np.save('results/individual_average_train_acc_%s.npy' % rule, individual_average_train_acc) np.save('results/individual_average_test_loss_%s.npy' % rule, individual_average_test_loss) np.save('results/individual_average_test_acc_%s.npy' % rule, individual_average_test_acc) else: try: os.makedirs("results/attacked/%d" % attacker_num) except OSError: print("Creation of the directory %s failed") np.save( 'results/attacked/%d/average_train_loss_%s.npy' % (attacker_num, rule), average_train_loss) np.save( 'results/attacked/%d/average_train_acc_%s.npy' % (attacker_num, rule), average_train_acc) np.save( 'results/attacked/%d/average_test_loss_%s.npy' % (attacker_num, rule), average_test_loss) np.save( 'results/attacked/%d/average_test_acc_%s.npy' % (attacker_num, rule), average_test_acc) np.save( 'results/attacked/%d/individual_average_train_loss_%s.npy' % (attacker_num, rule), individual_average_train_loss) np.save( 'results/attacked/%d/individual_average_train_acc_%s.npy' % (attacker_num, rule), individual_average_train_acc) np.save( 'results/attacked/%d/individual_average_test_loss_%s.npy' % (attacker_num, rule), individual_average_test_loss) np.save( 'results/attacked/%d/individual_average_test_acc_%s.npy' % (attacker_num, rule), individual_average_test_acc)
def simulate(self): a = agent() a.load_memory() s = snake() offset = 50 pygame.init() screen = pygame.display.set_mode((400,450)) head_color = self.pink snake_color = self.green pygame.display.set_caption("snake.ai") self.my_font = pygame.font.SysFont("arial", 24) self.surface = self.my_font.render(self.intro, True, self.blue, self.pink) running = True screen.fill(self.white) pygame.draw.rect(screen, self.green, [0, 200, 40, 40], 3) x = 0. direction = "right" while running: time.sleep(0.5) for event in pygame.event.get(): if event.type == KEYDOWN: if event.key == K_SPACE: self.is_agent = not self.is_agent self.update_word() if event.key == K_LEFT: direction = "left" elif event.key == K_RIGHT: direction = "right" elif event.key == K_UP: direction = "down" elif event.key == K_DOWN: direction = "up" if event.type == QUIT: print "quit" a.output() pygame.quit() exit() if not self.is_agent: snake_color = self.green ans = s.run(direction) else: snake_color = self.blue state = a.build_state() a.create_Q(state) # Create 'state' in Q-table direction = a.choose_action(state) ans = a.s.run(direction) if ans == False: s.reset() direction = "right" continue if ans[1]: if self.is_agent and a.learning: self.train_time += 1 self.update_word() if self.train_time == 10000: a.output() a.learning = False screen.fill(self.white) for index, item in enumerate(ans[0]): if index == 0: color = head_color else: # prev = ans[0][index - 1] color = snake_color pygame.draw.rect(screen, color, [item[0] * 40 + 2,item[1] * 40 + offset + 2, 36, 36], 0) food = s.get_food() pygame.draw.rect(screen, self.red, [food[0] * 40, food[1] * 40 + offset, 40, 40], 0) screen.blit(self.surface,(0,0)) pygame.display.update()
def main(args): env_name = args.env env = gym.make(env_name) a2c_agent = agent(env) a2c_agent.train(env_name)
import sys from world import world from agent import agent import random from PyQt4 import QtGui, QtCore from map import map app = QtGui.QApplication(sys.argv) inname = sys.argv[1] mp = map(inname) wrld = world(mp) for i in range(mp.mwidth*mp.mheight/8): agent(wrld,i) wrld.show() sys.exit(app.exec_())
def qLearn(): a = agent.agent() t = agent.target() ql = QLearner.QLearner() # Initialize vectors and starting coordinates for agents and targets ql.reset_qTable() # Create output files learning = open('BestFit_QL.txt', 'w') # Records best fitnesses perf = open('SystemReward_QL.txt', 'w') rel = open('Reliability_QL.txt', 'w') # Records how successful trained NN is using "best" policy eff = open('Alg_Time_QL.txt', 'w') stp = open('Steps_Taken_QL.txt', 'w') for srun in range(p.stat_runs): print('current stat run: ', srun) a.assign_acoords(p.x_dim, p.y_dim) t.assign_tcoords(p.x_dim, p.y_dim, a.ax_init, a.ay_init) time_begin = process_time() ql.reset_qTable() for ep in range(p.episodes): k = 0 while k < p.steps: ql.update_prev_state(a.agent_x, a.agent_y) act = ql.epsilon_select() a.agent_move(act) ql.update_curr_state(a.agent_x, a.agent_y) a.update_reward_QL(t.tx, t.ty) ql.update_qTable(a.agent_reward, act) if a.goal_captured == True: k = p.steps # Stop iterating if target is captured k += 1 a.reset_agent() learning.write('%f' % np.max(ql.qtable[:, :])) learning.write('\t') # Records max reward in Qtable time_end = process_time() total_time = time_end - time_begin eff.write('%f' % total_time) eff.write('\n') # Test Best Policy Found a.reset_agent() k = 0 while k < p.steps: ql.update_prev_state(a.agent_x, a.agent_y) a.update_state_vec(t.tx, t.ty) act = ql.greedy_select() a.agent_move(act) a.update_reward_QL(t.tx, t.ty) if a.goal_captured == True: stp.write('%f' % k) stp.write('\n') k = p.steps # Stop iterating if target is captured k += 1 if a.goal_captured == True: # Record reliability of agent rel.write('%d' % 1) rel.write('\t') else: rel.write('%d' % 0) rel.write('\t') system_reward = a.agent_reward # Record system performance for stat run perf.write('%f' % system_reward) perf.write('\t') learning.write('\n') perf.write('\n') rel.write('\n') # New line for new stat run learning.close() perf.close() rel.close() stp.close()
def run_one_episode(FM_model, user_id, busi_id, MAX_TURN, do_random, write_fp, strategy, TopKTaxo, PN_model, gamma, trick, mini, optimizer1_fm, optimizer2_fm, alwaysupdate, start_facet, mask, sample_dict): # _______ initialize user and agent _______ # Initialize the user the_user = env.user(user_id, busi_id) # Initialize done numpy_list = list() log_prob_list, reward_list = Variable(torch.Tensor()), list() action_tracker, candidate_length_tracker = list(), list() the_agent = agent.agent(FM_model, user_id, busi_id, do_random, write_fp, strategy, TopKTaxo, numpy_list, PN_model, log_prob_list, action_tracker, candidate_length_tracker, mini, optimizer1_fm, optimizer2_fm, alwaysupdate, sample_dict) # _______ chat history _______ chat_history = dict() # _______ initialize start message _______ data = dict() # data['facet'] = choose_start_facet(busi_id) data['facet'] = start_facet # print('Starting facet is : {}'.format(data['facet'])) start_signal = message(cfg.AGENT, cfg.USER, cfg.EPISODE_START, data) agent_utterance = None while (the_agent.turn_count < MAX_TURN): if the_agent.turn_count == 0: user_utterance = the_user.response(start_signal) else: user_utterance = the_user.response(agent_utterance) # print('The user utterance in #{} turn, type: {}, data: {}\n'.format(the_agent.turn_count, user_utterance.message_type, user_utterance.data)) with open(write_fp, 'a') as f: f.write( 'The user utterance in #{} turn, type: {}, data: {}\n'.format( the_agent.turn_count, user_utterance.message_type, user_utterance.data)) if user_utterance.message_type == cfg.ACCEPT_REC: the_agent.history_list.append(2) print('Rec Success! in Turn: {}.'.format(the_agent.turn_count)) rewards = get_reward(the_agent.history_list, gamma, trick, user_utterance.data) if cfg.purpose == 'pretrain': return numpy_list else: return (the_agent.log_prob_list, rewards, the_agent.history_list) agent_utterance = the_agent.response(user_utterance) the_agent.turn_count += 1 if the_agent.turn_count == MAX_TURN: the_agent.history_list.append(-2) print('Max turn quit...') rewards = get_reward(the_agent.history_list, gamma, trick) if cfg.purpose == 'pretrain': return numpy_list else: return (the_agent.log_prob_list, rewards, the_agent.history_list)
from environment import environment from agent import agent import numpy as np agent_o = agent('agent_o','O') agent_x = agent('agent_x','X') env = environment(agent_o,agent_x) env.render(['X','O','X','O','X','O','X','O','X'])
def test__distAgent(self): a1 = agent.agent((0, 0), self.unitTestSigma, "Keeper", (0, 0)) a2 = agent.agent((10, 10), self.unitTestSigma, "Taker", (0, 0)) self.assertAlmostEqual(__distAgent(a1, a2), math.sqrt(200), 1)
'11st_julia', category_no[current_category]) OUTPUT_FILENAME = \ '/home/taey16/storage/product/11st_julia/demo_{}.txt.wrap_size0.oversampleFalse.pickle'.format( category_no[current_category]) if __name__ == '__main__': print 'Start to indexing for {}'.format(INPUT_FILENAME) print 'output will be saved in {}'.format(OUTPUT_FILENAME) #import pdb; pdb.set_trace() meta_filename = '{}/{}'.format(DATASET_ROOT, INPUT_FILENAME) parser = parser_utils() input = parser.parse(meta_filename) agent = agent(**net_args) agent.net.forward() indexer = indexer(category_no, max_num_items) item_counter = 0 for item in input: try: prd_no = item['__prd_no__'] fname = \ '/userdata2/index_11st_20151020/october_11st_imgdata/{}.jpg'.format(prd_no) object_roi = item['__object_roi__'].strip().split(',') category_id = item['__mctgr_no__'] roi = parser.get_roi_meta_dic(object_roi) start_loading = time.time() image = agent.load_image_roi(fname, roi, 0) elapsed_loading = time.time() - start_loading
from environment import environment from agent import agent from player import player # agent plays with human player agent_o = agent('agent_o', 'O') player_x = player('player_x', 'X') env = environment(agent_o, player_x) # X plays first env.play(player_x) env.close()
import agent as ag from threading import Thread from rgui import * from bot import * import time def printTime(): while True: print(time.ctime(time.time())) if __name__ == '__main__': ob=ag.agent() bb=bot() #bb.ru() Thread(target=bb.ru).start() a=GUI(ob, bb) Thread(target=a.run).start() #thread.start_new_thread(bb.ru) #thread.start_new_thread(a.run) #bb.ru() #a.run()
canvas.create_oval((W/2.-4, H/2.-4, W/2.+4, H/2.+4), fill='goldenrod') canvas.create_oval((W/2.-2, H/2.-2, W/2.+2, H/2.+2), fill='orange red') for i in range(300): x = random.random()*W y = random.random()*H canvas.create_oval((x-1, y-1, x+1, y+1), fill='white') canvas.pack() items = [] normeSpeed = 60 distToCenter = 250 for i in range(2000): posRandom = 2*3.14*random.random() items.append(agent(canvas, Vec2d(W/2. + distToCenter*cos(posRandom) + 10*(2*random.random()-1), H/2. + distToCenter*sin(posRandom) + 10*(2*random.random()-1)), Vec2d(normeSpeed*sin(posRandom) + 10*(2*random.random()-1), -normeSpeed*cos(posRandom) + 10*(2*random.random()-1)))) #Vec2d(normeSpeed*(2*random.random()-1), normeSpeed*(2*random.random() - 1)))) #items.append(agent(canvas, #Vec2d(520, 300), #Vec2d(0, 0))) root.update() # fix geometry # loop over items try: while 1: t1 = time.time() for agent in items:
from environment import environment from agent import agent agent_o = agent('agent_o', 'O', exp_rate=0.3) agent_x = agent('agent_x', 'X', exp_rate=0.3) env = environment(agent_o, agent_x) #rounds = 9*8*7*6*5*4*3*2*1 * 10 rounds = 9 print(len(agent_o.Q)) env.train(agent_x, rounds) print(len(agent_o.Q)) env.close()
plt.clf() if agent_.env.maze.ravel()[s_] != 0: break if __name__ == '__main__': train_settings() seed() brain_ = brain(size=args.arena_size, gamma=0.9, l_r=0.9) env_ = env(size=args.arena_size, cat_r=[-10, -20], cheese_r=[10, 20]) agent_ = agent(env=env_, brain=brain_) plt.imshow(env_.maze) plt.pause(1) for i in range(args.random_steps): agent_.step() if i % 10 == 0: plt.imshow(agent_.brain.q_mat) plt.pause(0.01) plt.clf() animate(agent_)
def evo_net(): nn = neuralnet.NN() g = GA.GA() a = agent.agent() t = agent.target() # Initialize vectors and starting coordinates for agents and targets nn.create_NN(2, 3, 4) # (n_inputs, n_outputs, hidden layer size) # Create output files learning = open('BestFit_NN.txt', 'w') # Records best fitnesses perf = open('SystemReward_NN.txt', 'w') rel = open('Reliability_NN.txt', 'w') # Records how successful trained NN is using "best" policy eff = open('Alg_Time_NN.txt', 'w') stp = open('Steps_Taken_NN.txt', 'w') for srun in range(p.stat_runs): print('current stat run: ', srun) a.assign_acoords(p.x_dim, p.y_dim) t.assign_tcoords(p.x_dim, p.y_dim, a.ax_init, a.ay_init) time_begin = process_time() g.create_pop() # (policy_size) for j in range(g.population_size): # Evaluate the initial population nn.get_weights(g.population[j]) a.reset_agent() k = 0 while k < p.steps: # Move around for certain number of steps unless target is captured a.update_state_vec(t.tx, t.ty) # Updates state input to NN nn.get_inputs(a.state_vector) act = nn.get_ouput() # Get output from NN a.agent_move(act) # Agent moves a.update_reward_NN(t.tx, t.ty) if a.goal_captured == True: k = p.steps # Stop iterating, target is captured k += 1 g.pop_fit[j] = a.agent_reward # Fitness is sum of agent rewards learning.write('%f' % max(g.pop_fit)) learning.write('\t') # Train weights or neural network for i in range(p.generations - 1): g.crossover() g.mutate() # Create new population for testing for j in range(g.population_size): # Test offspring population nn.get_weights(g.offspring_pop[j]) a.reset_agent() k = 0 while k < p.steps: # Move around for certain number of steps unless target is captured a.update_state_vec(t.tx, t.ty) # Updates state input to NN nn.get_inputs(a.state_vector) act = nn.get_ouput() # Get output from NN a.agent_move(act) # Agent moves a.update_reward_NN(t.tx, t.ty) if a.goal_captured == True: k = p.steps # Stop iterating, target is captured k += 1 g.pop_fit[j] = a.agent_reward g.down_select() # Establish new parent population learning.write('%f' % g.pop_fit[0]) learning.write('\t') time_end = process_time() total_time = time_end - time_begin eff.write('%f' % total_time) eff.write('\n') # Test Best Policy Found nn.get_weights(g.population[0]) a.reset_agent() k = 0 best_fitn = max(g.pop_fit) assert (best_fitn == g.pop_fit[0]) while k < p.steps: a.update_state_vec(t.tx, t.ty) nn.get_inputs(a.state_vector) act = nn.get_ouput() a.agent_move(act) a.update_reward_NN(t.tx, t.ty) if a.goal_captured == True: stp.write('%f' % k) stp.write('\n') k = p.steps # Stop iterating if target is captured k += 1 if a.goal_captured == True: rel.write('%d' % 1) rel.write('\t') else: rel.write('%d' % 0) rel.write('\t') system_reward = a.agent_reward perf.write('%f' % system_reward) perf.write('\t') learning.write('\n') perf.write('\n') rel.write('\n') # New line for new stat run learning.close() perf.close() rel.close() eff.close() stp.close()