示例#1
0
 def V(S,model_fn):
     """Value function on a serialized tf model"""
     with tf.Session() as sess:
         saver=tf.train.import_meta_graph(model_fn+'.meta')
         saver.restore(sess,model_fn)
         approx=Approximator(sess)
         x=np.array([faster_featurize(s) for s in S])
         print x.shape
         x=np.reshape(x,(x.shape[0],x.shape[-1]))
         print x.shape
         v=approx.value(x)
     return v
示例#2
0
    def run(self):
        np.random.seed()
        saver=tf.train.import_meta_graph(self.prev_model+'.meta')
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)) as sess:
            saver.restore(sess,self.prev_model)
            approx=Approximator(sess)
            with sess.as_default():
                forward=tf.get_collection('forward')[0]
                X_pl=tf.get_collection('X_pl')[0]
                error=tf.get_collection('loss')

                self.init_cond.acquire()
                self.init_cond.notify_all()
                self.init_cond.release()
                X_s=faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -')            
                while True:
                    # waiting for an external signal to terminate

                    print 'BEFORE VALUE ITERATION: ',approx.value(X_s) 
                    while not self.stop_play.is_set():
                        try:
                            (proc_name,X)=self.ep_task_q.get(timeout=0.01)
                        except:
                            continue
                        Y=approx.value(X)
                        self.conns[proc_name].send(Y)
                    
                    self.play_finished.acquire()
                    self.play_finished.notify_all()
                    self.play_finished.release()

                    self.fit_cond.acquire()
                    self.fit_cond.wait()

                    (X,Y)=self.fit_q.get()
                    if self.internal_flag:
                        approx.fit(X,Y,self.X_train,self.Y_train,saver,fp=self.fp)
                        print 'AFTER VALUE ITERATION: ',approx.value(X_s) 
                        print saver.last_checkpoints[-1]
                        self.internal_flag=False
                    else:
                        self.X_train=X
                        self.Y_train=Y
                        self.internal_flag=True

                    self.fit_cond.release()

                    self.stop_play.clear()

                    self.fit_done.acquire()
                    self.fit_done.notify_all()
                    self.fit_done.release()
示例#3
0
    def get_av_pairs(self, env):
        """
        get action value (AV) pairs corresponding with Environment 
        """
        as_pairs = env.get_as_pairs()
        # need to take into account that it's a zero sum game
        # invert value if black
        S = [t[1] for t in as_pairs]
        N = len(S)
        S = np.array([faster_featurize(s) for s in S])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))

        v = self.approx.value(S)
        v = map_side_to_int(env.get_turn()) * v

        av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)]
        return av
示例#4
0
    def run(self):
        # random seed ensuring reproducibality
        np.random.seed()

        with tf.Session(graph=self.graph,config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)) as sess:
            approx=Approximator(sess)
            with sess.as_default():
                saver=tf.train.Saver()
                sess.run(tf.initialize_all_variables())

                forward=tf.get_collection('forward')[0]
                X_pl=tf.get_collection('X_pl')[0]
                error=tf.get_collection('loss')

                self.init_cond.acquire()
                self.init_cond.notify_all()
                self.init_cond.release()

                # getting example batch for checking functionality during run
                X_s=faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -')            

                while True:
                    # waiting for an external signal to terminate

                    print 'BEFORE VALUE ITERATION: ',approx.value(X_s) 
                    while not self.stop_play.is_set():
                        # fetch tasks and calculate approximations
                        try:
                            (proc_name,X)=self.ep_task_q.get(timeout=0.01)
                        except:
                            continue
                        Y=approx.value(X)
                        self.conns[proc_name].send(Y)
                    
                    # Synchronization with finished EpisodeProcesses -> start
                    # fitting
                    self.play_finished.acquire()
                    self.play_finished.notify_all()
                    self.play_finished.release()
                    self.fit_cond.acquire()
                    self.fit_cond.wait()

                    (X,Y)=self.fit_q.get()
                    if self.internal_flag:
                        # retrieved to learn model
                        approx.fit(X,Y,self.X_train,self.Y_train,saver,fp=self.fp)
                        print 'AFTER VALUE ITERATION: ',approx.value(X_s) 
                        print saver.last_checkpoints[-1]
                        self.internal_flag=False
                    else:
                        # retrieved for validation
                        self.X_train=X
                        self.Y_train=Y
                        self.internal_flag=True

                    # more synchronization before new iteration
                    self.fit_cond.release()
                    self.stop_play.clear()
                    self.fit_done.acquire()
                    self.fit_done.notify_all()
                    self.fit_done.release()
示例#5
0
    def run(self,
            I,
            N,
            graph_f,
            kwargs,
            name='',
            state=None,
            fn=None,
            prev_model=None):
        """ the core method of the Supervisor. Runs an entire simulation.
         params:
             I: number of iterations
             N: number of episodes every iteration
             graph_f: how to build a tensorflow graph
             kwargs: arguments for graph_f
             state: starting position of the episodes
             fn: where to store the final model
        """

        self.meta['kwargs'] = kwargs
        self.meta['state'] = state

        X = faster_featurize('6k1/8/8/5K2/8/8/8/8 w - -')

        pool_size = mp.cpu_count() - 2
        self.meta['cpus'] = pool_size

        mn = self.get_name() + '_' + name + '_' + time.strftime(
            '_%d_%m', time.gmtime(time.time()))
        directory = os.path.join(Supervisor.STORE_DIR, mn)
        os.mkdir(directory)
        fp = os.path.join(directory, 'model')

        tim = time.time()

        # Creation of synchronization variables
        fit_q = mp.Queue()
        init_cond = mp.Condition()
        new_iter_cond = mp.Condition()
        stop_play = mp.Event()
        eps_change_ev = mp.Event()

        task_q = mp.JoinableQueue()
        q_lock = mp.RLock()
        res_q = mp.Queue()
        res_lock = mp.RLock()
        ep_task_q = mp.Queue()
        ep_task_lock = mp.RLock()
        print 'ep task queue {} created'.format(id(ep_task_q))

        n_process = NetworkProcess(fit_q, graph_f, kwargs, init_cond,
                                   os.path.join(directory, mn), ep_task_q,
                                   ep_task_lock, stop_play)
        ep_proc = []
        for i in xrange(pool_size):
            n = 'EpisodeProcess-' + str(i)
            a, b = mp.Pipe()
            ep_proc.append(
                self.create_proc(task_q,
                                 q_lock,
                                 state,
                                 new_iter_cond,
                                 res_q,
                                 res_lock,
                                 ep_task_q,
                                 ep_task_lock,
                                 eps_change_ev,
                                 name=n))
            ep_proc[i].set_conn(a)
            n_process.register_conn(n, b)

        n_process.start()
        for proc in ep_proc:
            proc.start()

        init_cond.acquire()
        init_cond.wait()
        init_cond.release()

        print 'Number of cpus used: {}'.format(pool_size)

        for i in xrange(2 * I):
            print 'Iteration {}'.format(i)
            #print 'BEFORE SELF PLAY: ',sess.run(forward,{X_pl:X})
            eps_change_ev.set()

            W = 0
            ctr = 0
            while W < 50:
                ctr += 1

                start = time.time()

                for _ in xrange(N):
                    task_q.put(1)
                for _ in xrange(pool_size):
                    task_q.put(None)

                #print 'All processes: {}'.format(mp.active_children())
                new_iter_cond.acquire()
                new_iter_cond.notify_all()
                #print 'I: {} notifying all threads: new iteration'.format(mp.current_process().name)
                new_iter_cond.release()

                task_q.join()

                end = time.time()

                # TODO here: assimilate data
                data = []
                while not res_q.qsize() <= 0:
                    it = res_q.get()
                    data.append(it)

                rewards = [t[0] for t in data]
                lengths = [t[1] for t in data]
                data = [t[2] for t in data]

                #[rewards,lengths,data]=map(list,zip(*data))
                r_w = sum([r[0] for r in rewards]) / float(N)
                r_b = sum([r[1] for r in rewards]) / float(N)
                avg_len = sum(lengths) / float(N)
                outcomes = sum([
                    dt.get_outcome() for dt in data
                    if not np.isnan(dt.get_outcome())
                ])
                wins = sum([
                    np.abs(dt.get_outcome()) for dt in data
                    if not np.isnan(dt.get_outcome())
                ])
                avg_outcome = outcomes / float(N)
                win_rate = wins / float(N)
                #print wins
                W += wins

                #print avg_len, win_rate, r_w, r_b
                mps = sum(lengths) / (end - start)

                starttime = time.time()
                for dt in data:
                    self.dm.update(dt)
                avg_time = (time.time() - starttime) / float(len(data))
                print 'Glambda calculation TIME avg: {}\t size data:{}'.format(
                    avg_time, self.dm.Y.shape)

                self.meta['sim_time'] += (end - start)
                self.meta['r_lists'][0].append(r_w)
                self.meta['r_lists'][1].append(r_b)
                self.meta['w_list'].append(wins)
                self.meta['avg_len'].append(avg_len)
                self.meta['mps'].append(mps)
                self.meta['eps'].append(self.pol.eps)
                self.meta['episodes'] += N
                self.meta['outcomes'].append(outcomes)
                self.meta['N'].append(N)

                print('I: {} episodes, {} wins in iteration {},MPS: {}'.format(
                    N * ctr, wins, i, mps))
                eps_change_ev.clear()

            self.pol.update()
            mps = sum(self.meta['mps'][-ctr:]) / ctr
            win_rate = sum(self.meta['w_list'][-ctr:]) / ctr
            outcome_rate = sum(self.meta['outcomes'][-ctr:]) / ctr
            avg_len = sum(self.meta['avg_len'][-ctr:]) / ctr
            print('I: MPS: {}\tWIN RATE: {}\tOUTCOME RATE: {}\tAVG LENGTH: {}'.
                  format(mps, win_rate, outcome_rate, avg_len))
            print 'I: fitting data after {} episodes'.format(ctr * N)

            #print 'ID stop_play main {}'.format(id(stop_play))
            stop_play.set()
            n_process.play_f_wait()

            self.dm.write_out_windata('data_visualized.txt')

            self.dm.clean()
            X, Y = self.dm.get_data()
            #X,Y=self.dm.get_balanced_data()
            print 'Dimensions Data: ', X.shape, Y.shape

            fit_q.put((X, Y))

            # Synchronization between NetworkProcess & mainProcess
            n_process.fit_notify()
            n_process.fit_wait()

            self.pol.eps = max([0.05, self.pol.eps - 0.05])
            print 'New Epsilon: {}'.format(self.pol.eps)

        for p in ep_proc:
            p.terminate()
        n_process.terminate()
        self.meta['elapsed_time'] += time.time() - tim
        print 'I: NetworkProcess terminated'
        self.store(os.path.join(directory, name + '_meta.sv'))
        print 'I: metadata stored in {}'.format(
            os.path.join(directory, name + 'meta.sv'))
示例#6
0
    settings.init()
    settings.params['USE_DSET'] = args.dset
    settings.params['OC_DEPTH'] = args.ocd
    settings.params['MK'] = args.mk
    change_PL(list(args.piece_conf))
    settings.params['RAND'] = args.R
    print settings.params['PL']
    if settings.params['USE_DSET']:
        load_DS(args.ds_file)
    if args.eps_factor == 0:
        decay_f = lambda n: 1 - 0.01 * n
    else:
        decay_f = lambda n: (n + 1)**(-args.eps_factor)

    if args.old_model == None:
        D = faster_featurize('8/6k1/8/8/3K4/8/8/8 w - -').shape[1]
        print D
        M = [int(m) for m in args.M.split()]
        if args.cnn:
            c0 = len(args.piece_conf) * 2
            if args.cnn_f:
                kwargs = {
                    'D': D,
                    'M': M,
                    'learning_rate': args.alpha,
                    'F': [(1, 1), (2, 2), (3, 3), (3, 3)],
                    'c0': c0,
                    'C': [(c0, ), (c0, ), (c0, ), (2 * c0, )]
                }
            else:
                kwargs = {
示例#7
0
import multiprocessing as mp
import time
from learn.preprocessing import faster_featurize

S=['2k5/8/3K4/8/8/3R4/8/8 w - -','8/8/1k6/8/5K2/3R4/8/8 w - -']

s=time.time()
p=mp.Pool(5)
r=p.map(faster_featurize,S)
e=time.time()-s
print 'Parallelized: ', e

s=time.time()
r=[faster_featurize(st) for st in S]
e=time.time()-s
print 'Serialized: ', e 

示例#8
0
 def put_and_get(self, s):
     if s not in self.f.keys():
         self.f[s] = faster_featurize(s)
     return self.f[s]