def V(S,model_fn): """Value function on a serialized tf model""" with tf.Session() as sess: saver=tf.train.import_meta_graph(model_fn+'.meta') saver.restore(sess,model_fn) approx=Approximator(sess) x=np.array([faster_featurize(s) for s in S]) print x.shape x=np.reshape(x,(x.shape[0],x.shape[-1])) print x.shape v=approx.value(x) return v
def run(self): np.random.seed() saver=tf.train.import_meta_graph(self.prev_model+'.meta') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)) as sess: saver.restore(sess,self.prev_model) approx=Approximator(sess) with sess.as_default(): forward=tf.get_collection('forward')[0] X_pl=tf.get_collection('X_pl')[0] error=tf.get_collection('loss') self.init_cond.acquire() self.init_cond.notify_all() self.init_cond.release() X_s=faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -') while True: # waiting for an external signal to terminate print 'BEFORE VALUE ITERATION: ',approx.value(X_s) while not self.stop_play.is_set(): try: (proc_name,X)=self.ep_task_q.get(timeout=0.01) except: continue Y=approx.value(X) self.conns[proc_name].send(Y) self.play_finished.acquire() self.play_finished.notify_all() self.play_finished.release() self.fit_cond.acquire() self.fit_cond.wait() (X,Y)=self.fit_q.get() if self.internal_flag: approx.fit(X,Y,self.X_train,self.Y_train,saver,fp=self.fp) print 'AFTER VALUE ITERATION: ',approx.value(X_s) print saver.last_checkpoints[-1] self.internal_flag=False else: self.X_train=X self.Y_train=Y self.internal_flag=True self.fit_cond.release() self.stop_play.clear() self.fit_done.acquire() self.fit_done.notify_all() self.fit_done.release()
def get_av_pairs(self, env): """ get action value (AV) pairs corresponding with Environment """ as_pairs = env.get_as_pairs() # need to take into account that it's a zero sum game # invert value if black S = [t[1] for t in as_pairs] N = len(S) S = np.array([faster_featurize(s) for s in S]) S = np.reshape(S, (S.shape[0], S.shape[-1])) v = self.approx.value(S) v = map_side_to_int(env.get_turn()) * v av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)] return av
def run(self): # random seed ensuring reproducibality np.random.seed() with tf.Session(graph=self.graph,config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)) as sess: approx=Approximator(sess) with sess.as_default(): saver=tf.train.Saver() sess.run(tf.initialize_all_variables()) forward=tf.get_collection('forward')[0] X_pl=tf.get_collection('X_pl')[0] error=tf.get_collection('loss') self.init_cond.acquire() self.init_cond.notify_all() self.init_cond.release() # getting example batch for checking functionality during run X_s=faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -') while True: # waiting for an external signal to terminate print 'BEFORE VALUE ITERATION: ',approx.value(X_s) while not self.stop_play.is_set(): # fetch tasks and calculate approximations try: (proc_name,X)=self.ep_task_q.get(timeout=0.01) except: continue Y=approx.value(X) self.conns[proc_name].send(Y) # Synchronization with finished EpisodeProcesses -> start # fitting self.play_finished.acquire() self.play_finished.notify_all() self.play_finished.release() self.fit_cond.acquire() self.fit_cond.wait() (X,Y)=self.fit_q.get() if self.internal_flag: # retrieved to learn model approx.fit(X,Y,self.X_train,self.Y_train,saver,fp=self.fp) print 'AFTER VALUE ITERATION: ',approx.value(X_s) print saver.last_checkpoints[-1] self.internal_flag=False else: # retrieved for validation self.X_train=X self.Y_train=Y self.internal_flag=True # more synchronization before new iteration self.fit_cond.release() self.stop_play.clear() self.fit_done.acquire() self.fit_done.notify_all() self.fit_done.release()
def run(self, I, N, graph_f, kwargs, name='', state=None, fn=None, prev_model=None): """ the core method of the Supervisor. Runs an entire simulation. params: I: number of iterations N: number of episodes every iteration graph_f: how to build a tensorflow graph kwargs: arguments for graph_f state: starting position of the episodes fn: where to store the final model """ self.meta['kwargs'] = kwargs self.meta['state'] = state X = faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -') pool_size = mp.cpu_count() - 2 self.meta['cpus'] = pool_size mn = self.get_name() + '_' + name + '_' + time.strftime( '_%d_%m', time.gmtime(time.time())) directory = os.path.join(Supervisor.STORE_DIR, mn) os.mkdir(directory) fp = os.path.join(directory, 'model') tim = time.time() # Creation of synchronization variables fit_q = mp.Queue() init_cond = mp.Condition() new_iter_cond = mp.Condition() stop_play = mp.Event() eps_change_ev = mp.Event() task_q = mp.JoinableQueue() q_lock = mp.RLock() res_q = mp.Queue() res_lock = mp.RLock() ep_task_q = mp.Queue() ep_task_lock = mp.RLock() print 'ep task queue {} created'.format(id(ep_task_q)) n_process = NetworkProcess(fit_q, graph_f, kwargs, init_cond, os.path.join(directory, mn), ep_task_q, ep_task_lock, stop_play) ep_proc = [] for i in xrange(pool_size): n = 'EpisodeProcess-' + str(i) a, b = mp.Pipe() ep_proc.append( self.create_proc(task_q, q_lock, state, new_iter_cond, res_q, res_lock, ep_task_q, ep_task_lock, eps_change_ev, name=n)) ep_proc[i].set_conn(a) n_process.register_conn(n, b) n_process.start() for proc in ep_proc: proc.start() init_cond.acquire() init_cond.wait() init_cond.release() print 'Number of cpus used: {}'.format(pool_size) for i in xrange(2 * I): print 'Iteration {}'.format(i) #print 'BEFORE SELF PLAY: ',sess.run(forward,{X_pl:X}) eps_change_ev.set() W = 0 ctr = 0 while W < 50: ctr += 1 start = time.time() for _ in xrange(N): task_q.put(1) for _ in xrange(pool_size): task_q.put(None) #print 'All processes: {}'.format(mp.active_children()) new_iter_cond.acquire() new_iter_cond.notify_all() #print 'I: {} notifying all threads: new iteration'.format(mp.current_process().name) new_iter_cond.release() task_q.join() end = time.time() # TODO here: assimilate data data = [] while not res_q.qsize() <= 0: it = res_q.get() data.append(it) rewards = [t[0] for t in data] lengths = [t[1] for t in data] data = [t[2] for t in data] #[rewards,lengths,data]=map(list,zip(*data)) r_w = sum([r[0] for r in rewards]) / float(N) r_b = sum([r[1] for r in rewards]) / float(N) avg_len = sum(lengths) / float(N) outcomes = sum([ dt.get_outcome() for dt in data if not np.isnan(dt.get_outcome()) ]) wins = sum([ np.abs(dt.get_outcome()) for dt in data if not np.isnan(dt.get_outcome()) ]) avg_outcome = outcomes / float(N) win_rate = wins / float(N) #print wins W += wins #print avg_len, win_rate, r_w, r_b mps = sum(lengths) / (end - start) starttime = time.time() for dt in data: self.dm.update(dt) avg_time = (time.time() - starttime) / float(len(data)) print 'Glambda calculation TIME avg: {}\t size data:{}'.format( avg_time, self.dm.Y.shape) self.meta['sim_time'] += (end - start) self.meta['r_lists'][0].append(r_w) self.meta['r_lists'][1].append(r_b) self.meta['w_list'].append(wins) self.meta['avg_len'].append(avg_len) self.meta['mps'].append(mps) self.meta['eps'].append(self.pol.eps) self.meta['episodes'] += N self.meta['outcomes'].append(outcomes) self.meta['N'].append(N) print('I: {} episodes, {} wins in iteration {},MPS: {}'.format( N * ctr, wins, i, mps)) eps_change_ev.clear() self.pol.update() mps = sum(self.meta['mps'][-ctr:]) / ctr win_rate = sum(self.meta['w_list'][-ctr:]) / ctr outcome_rate = sum(self.meta['outcomes'][-ctr:]) / ctr avg_len = sum(self.meta['avg_len'][-ctr:]) / ctr print('I: MPS: {}\tWIN RATE: {}\tOUTCOME RATE: {}\tAVG LENGTH: {}'. format(mps, win_rate, outcome_rate, avg_len)) print 'I: fitting data after {} episodes'.format(ctr * N) #print 'ID stop_play main {}'.format(id(stop_play)) stop_play.set() n_process.play_f_wait() self.dm.write_out_windata('data_visualized.txt') self.dm.clean() X, Y = self.dm.get_data() #X,Y=self.dm.get_balanced_data() print 'Dimensions Data: ', X.shape, Y.shape fit_q.put((X, Y)) # Synchronization between NetworkProcess & mainProcess n_process.fit_notify() n_process.fit_wait() self.pol.eps = max([0.05, self.pol.eps - 0.05]) print 'New Epsilon: {}'.format(self.pol.eps) for p in ep_proc: p.terminate() n_process.terminate() self.meta['elapsed_time'] += time.time() - tim print 'I: NetworkProcess terminated' self.store(os.path.join(directory, name + '_meta.sv')) print 'I: metadata stored in {}'.format( os.path.join(directory, name + 'meta.sv'))
settings.init() settings.params['USE_DSET'] = args.dset settings.params['OC_DEPTH'] = args.ocd settings.params['MK'] = args.mk change_PL(list(args.piece_conf)) settings.params['RAND'] = args.R print settings.params['PL'] if settings.params['USE_DSET']: load_DS(args.ds_file) if args.eps_factor == 0: decay_f = lambda n: 1 - 0.01 * n else: decay_f = lambda n: (n + 1)**(-args.eps_factor) if args.old_model == None: D = faster_featurize('8/6k1/8/8/3K4/8/8/8 w - -').shape[1] print D M = [int(m) for m in args.M.split()] if args.cnn: c0 = len(args.piece_conf) * 2 if args.cnn_f: kwargs = { 'D': D, 'M': M, 'learning_rate': args.alpha, 'F': [(1, 1), (2, 2), (3, 3), (3, 3)], 'c0': c0, 'C': [(c0, ), (c0, ), (c0, ), (2 * c0, )] } else: kwargs = {
import multiprocessing as mp import time from learn.preprocessing import faster_featurize S=['2k5/8/3K4/8/8/3R4/8/8 w - -','8/8/1k6/8/5K2/3R4/8/8 w - -'] s=time.time() p=mp.Pool(5) r=p.map(faster_featurize,S) e=time.time()-s print 'Parallelized: ', e s=time.time() r=[faster_featurize(st) for st in S] e=time.time()-s print 'Serialized: ', e
def put_and_get(self, s): if s not in self.f.keys(): self.f[s] = faster_featurize(s) return self.f[s]