class Server: """ Contains all the information that is required to use a remote object (alone and within a grid) Also it does all the job to launch the instance on remote side and wrap it within a Grid specific Proxy object 1) self.service_id 2) self.proxy 3) self.host """ def __init__(self, proxy, service_id, host, debug): self.debug = debug self.debug_out = sys.stdout self.proxy = proxy self.service_id = service_id self.host = host ## register itself (server) in the proxy self.proxy.__dict__['_server'] = self ## variables for load balancing self.time = Pipe(50) self.time.put(0.) self.jobs = 0 ## (log) number of jobs done by the Server self.acquired = False ## (log) def dp(self, str): print >>self.debug_out, str self.debug_out.flush() def terminate(self): """ It terminates self.proxy if needed """ raise NotImplementedError def __str__(self): s = '%s(busy=%s, <time>=%.2f, jobs=%d, host="%s", proxy="%s"' \ % (self.__class__.__name__, self.acquired, average(self.time), self.jobs, \ self.host.name, self.proxy) return s __repr__ = __str__
def randomSteps(env,steps,dqn): t0 = time.time() env.reset() i = 0 frame_stack = Pipe(4) initial_no_op = np.random.randint(4, NO_OP_MAX) for _ in range(0,steps): if i < initial_no_op: # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS action = NO_OP_CODE state, reward, done, info = env.step(action) greyObservation = rgb2gray(state) state = downSample(greyObservation) frame_stack.push(state) i += 1 else: state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) action = np.random.randint(0, len(ACTIONS)) actionH =np.zeros(len(ACTIONS)) actionH[action] = 1 next_state, reward, game_over, info = env.step(action) greyObservation = rgb2gray(next_state) next_state = downSample(greyObservation) frame_stack.push(next_state) next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) dqn.storeExperience( state.astype(type), actionH, reward, next_state.astype(type), game_over) if done: #print("Episode finished after {} timesteps".format(_ + 1)) env.reset() i=0 frame_stack=[] t1 = time.time() print("Fullfilling replay memory operation took:",t1-t0,) print('Size of replay memory %s bytes and has %s elements' % ((sys.getsizeof(dqn.replayMemory)),len(dqn.replayMemory))) print
def __init__(self, proxy, service_id, host, debug): self.debug = debug self.debug_out = sys.stdout self.proxy = proxy self.service_id = service_id self.host = host ## register itself (server) in the proxy self.proxy.__dict__['_server'] = self ## variables for load balancing self.time = Pipe(50) self.time.put(0.) self.jobs = 0 ## (log) number of jobs done by the Server self.acquired = False ## (log)
def run(self): self.removed = Pipe(100000) while not self.__stop: if os.path.exists(os.path.join(self.hosts[0].temp_path, 'quit')): [self.send(server.url, MSG_TERMINATE) for \ server in self.servers[service_id]] #self.halt() self.terminate() break recv = self.recv(-1, -1) if recv is None: continue tid, msg, data = recv if self.debug: template = 'received: tid=%d, msg=%d, type(data)=%s' if msg == MSG_INIT_DONE: for service_id, server_list in self.servers.items(): server = [s for s in server_list if s.url == tid] if server: if len(server) > 1: raise 'Inconsistency' if self.debug: print server[0].host.name, 'ready.' continue elif msg == MSG_CALL_METHOD_RESULT: try: key, value, elapsed = data except: print data, len(data) if key in self.results: result = self.results[key] result.value = value result.event.set() self.add_time(result.proxy, elapsed) if result.proxy._selfrelease: if self.debug: self.dp( 'FileBasedGrid.run: releasing server %s for the grid' \ % result.proxy._server ) result.proxy._selfrelease = False self._release_service(result.proxy) del self.results[key] self.removed.put(key) else: print 'Result object, key %d, not known.' % key if key in self.removed.pipe: print 'Key has already been deleted.' elif msg == MSG_CLIENT_CRASHED: ## find service to which the crashed client belongs for service_id, server_list in self.servers.items(): crashed = [server for server in server_list \ if server.url == tid] if crashed: break else: raise 'Inconsistency: TID %d not known.' % tid msg = 'Client on host "%s" inaccessible. Attempting shutdown...' print msg % crashed[0].host.name self.terminate(service_id) else: print 'Unknown message', tid, msg self.__stopped = self.__stop
class FileBasedGrid(AbstractGrid): def __init__(self, hosts, src_path, display, X11_delay, debug, verbose, nfs_care=False): AbstractGrid.__init__(self, hosts, src_path, display, X11_delay, debug, verbose, shared_temp_path=True) #copy all files in current dir to remote host. if self.debug: print "initialising" self.initialise(src_path, ['filebased_loader','ro', 'FileBasedGrid','OrderedDict']) if self.debug: print "setting filebased_loader" self.set_loader(src_path, 'filebased_loader') if self.debug: print "creating communicator" self.create_communicator(nfs_care) self.results = {} self.key = 0 self.__stop = False self.__stopped = False if self.debug: print 'FileBasedGrid created: tid = ', self.communicator.tid def set_debug(self, debug): AbstractGrid.set_debug(self, debug) self.communicator.debug = debug def create_communicator(self, nfs_care): self.communicator = FileBasedCommunicator(self.hosts[0].temp_path, debug = self.debug, nfs_care = nfs_care) def publish(self, instance): """ for each host of self.hosts -launch a FileBasedRemoteObjectHandler through the filebased loader. - pickle the instance object and send it to the handler. setting an attribute of the proxy results in a message being sent to the concerned host. - create a FileBasedServed for the proxy - add it to the queue self.queues[sid] and to the list self.servers[sid] and set server.grid and server.proxy returns the service id associated to this instance. note from the authors In FileBasedGrid there is one to one correspondence between a Server and a Handler... """ if self.debug: try: print "publishing instance %s" % \ instance.__class__.__name__ except: print "publishing instance" if self.debug: print " creating sevice id" service_id = self.create_service_id(instance) for host in self.hosts: if self.debug: print " host ",host.name if self.debug: print " creating proxy" proxy = self.create_proxy(instance, host, self.display, daemon = 1) if self.debug: print " creating FileBasedServer" server = FileBasedServer(proxy, service_id, host, self.debug) if self.debug: print " proxy._get_url()" server.url = proxy._get_url() if self.debug: print " adding server" self.add_server(server) if self.display and self.X11_delay is not None: time.sleep(self.X11_delay) return service_id def create_proxy(self, instance, host, display = 0, daemon = 0): """ (copied from Grid, called from AbstractISDGrid.create_server) """ if self.debug: print " creating handler" handler_tid = self.create_handler(instance, host, display, daemon) if self.debug: print " creating proxy" proxy = FileBasedRemoteObject(instance, handler_tid, manager = self) if self.debug: print 'Connected: tid=%d' % handler_tid return proxy def create_handler(self, instance, host, display, daemon): d = {'object': instance, 'daemon': daemon} handler_tid = self.spawn_handler(host, display) if self.debug: print 'Initialising service on host "%s"' % host.name self.send(handler_tid, MSG_INIT, d) if self.debug: print 'MSG_INIT sent.' return handler_tid def spawn_handler(self, host, display): handler_script = os.path.join(self.hosts[0].temp_path, self.loader_filename) init_data = {'niceness': host.niceness, 'display': display} argv = [handler_script] #add required init commands prior to launching anything else on the target host. if host.init_cmd != '': if host.init_cmd.rstrip().endswith(';'): command = host.init_cmd elif host.init_cmd.rstrip().endswith('!'): command = host.init_cmd.rstrip()[:-1] else: command = host.init_cmd + ';' else: command = '' if display: if type(display) is type(0): master_name = socket.gethostname() if host.name == master_name: display = ':0.0' else: display = master_name + ':0.0' command += 'xterm' argv = ['-title', host.name, '-geometry', self.window_size, '-hold', '-e', host.python, '-i'] + argv pipe = '' else: command += host.python pipe = '> /dev/null' if self.debug: print 'Spawning service on host "%s"' % host.name X_forward = display tid = self.communicator.spawn(host.name, command, argv, init_data, pipe, X_forward) if self.debug: print 'Service spawned: tid = ', tid return tid def recv(self, tid, msg): return self.communicator.recv(tid, msg) def send(self, tid, msg, value = None): self.communicator.send(tid, msg, value) def create_result_object(self, proxy): """ Results has to be temporarily stored in the Grid until their values are calculated """ key = self.key self.key += 1 if key in self.results: print 'Result object for key %d already exists.' % key #result = Result(tid, key, self) result = Result(proxy) result.key = key self.results[key] = result return result def run(self): self.removed = Pipe(100000) while not self.__stop: if os.path.exists(os.path.join(self.hosts[0].temp_path, 'quit')): [self.send(server.url, MSG_TERMINATE) for \ server in self.servers[service_id]] #self.halt() self.terminate() break recv = self.recv(-1, -1) if recv is None: continue tid, msg, data = recv if self.debug: template = 'received: tid=%d, msg=%d, type(data)=%s' if msg == MSG_INIT_DONE: for service_id, server_list in self.servers.items(): server = [s for s in server_list if s.url == tid] if server: if len(server) > 1: raise 'Inconsistency' if self.debug: print server[0].host.name, 'ready.' continue elif msg == MSG_CALL_METHOD_RESULT: try: key, value, elapsed = data except: print data, len(data) if key in self.results: result = self.results[key] result.value = value result.event.set() self.add_time(result.proxy, elapsed) if result.proxy._selfrelease: if self.debug: self.dp( 'FileBasedGrid.run: releasing server %s for the grid' \ % result.proxy._server ) result.proxy._selfrelease = False self._release_service(result.proxy) del self.results[key] self.removed.put(key) else: print 'Result object, key %d, not known.' % key if key in self.removed.pipe: print 'Key has already been deleted.' elif msg == MSG_CLIENT_CRASHED: ## find service to which the crashed client belongs for service_id, server_list in self.servers.items(): crashed = [server for server in server_list \ if server.url == tid] if crashed: break else: raise 'Inconsistency: TID %d not known.' % tid msg = 'Client on host "%s" inaccessible. Attempting shutdown...' print msg % crashed[0].host.name self.terminate(service_id) else: print 'Unknown message', tid, msg self.__stopped = self.__stop def ishalted(self): return self.__stopped def terminate(self, service_id = None): AbstractGrid.terminate(self, service_id) self.communicator.halt() self.__stop = True if self.debug: ## please keep debug statement! print 'FileBasedGrid: terminated.' def __del__(self): self.terminate() #### YS: a few additions def broadcast(self, sfo_id, funcname, *args, **kw): results = [] for server in self.servers[sfo_id]: func=getattr(server.proxy, funcname) results.append(func(*args, **kw)) return results def scatter(self, sfo_id, funcname, arglist, kwlist=None): results = [] if kwlist is None: kwlist=[{} for i in xrange(len(arglist))] if not hasattr(arglist[0],'__iter__'): arglist = [[i] for i in arglist] for server,args,kw in zip(self.servers[sfo_id],arglist,kwlist): func=getattr(server.proxy, funcname) results.append(func(*args, **kw)) return results def gather(self, results): retval=[] for server in results: retval.append(server.get()) return retval
#Saving and loading networks saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path and LOAD_NETWORK: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) game = int(re.match('.*?([0-9]+)$', checkpoint.model_checkpoint_path).group(1)) else: print("Could not find old network weights") sess.run(tf.global_variables_initializer()) game = 0 game_scores =[] initial_no_op = np.random.randint(4, NO_OP_MAX) i=0 frame_stack=Pipe(4) score=0 summary_writer = tf.summary.FileWriter('logs',sess.graph) summary_writer = tf.summary.FileWriter(LOG_DIRECTORY + RUN_STRING, sess.graph) print('Started training') for step in range(STEPS): if i < initial_no_op: # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS action = NO_OP_CODE state, reward, done, info = env.step(action) greyObservation = rgb2gray(state) state = downSample(greyObservation) frame_stack.push(state) i+=1 else: