Exemplo n.º 1
0
def test_rldriver_main():
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    g = tf.Graph()
    util.mkdir_p(ckpt_dir)
    with g.as_default():
        learning_rate_input = tf.placeholder(tf.float32)
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                      decay=RMSP_ALPHA,
                                      momentum=0.0,
                                      epsilon=RMSP_EPSILON,
                                      clip_norm=GRAD_NORM_CLIP,
                                      device=device)
        masterdriver = rldriver.RLDriver(MODELS,
                init_state,
                view_config,
                config.SV_VISCFG,
                config.MV_VISCFG,
                use_rgb=True)
        driver = rldriver.RLDriver(MODELS,
                    init_state,
                    view_config,
                    config.SV_VISCFG,
                    config.MV_VISCFG,
                    use_rgb=True,
                    master_driver=masterdriver,
                    grads_applier=grad_applier)
        driver.get_sync_from_master_op()
        driver.get_apply_grads_op()
        driver.learning_rate_input = learning_rate_input
        driver.a3c_local_t = 32
        global_step = tf.contrib.framework.get_or_create_global_step()
        increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step')
        saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step])
        last_time = time.time()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir)
            print('ckpt {}'.format(ckpt))
            epoch = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                epoch = sess.run(global_step)
                print('Restored!, global_step {}'.format(epoch))
            while epoch < 100 * 1000:
                driver.train_a3c(sess)
                epoch += 1
                sess.run(increment_global_step)
                if epoch % 1000 == 0 or time.time() - last_time >= 10 * 60:
                    print("Saving checkpoint")
                    fn = saver.save(sess, ckpt_dir+ckpt_prefix, global_step=global_step)
                    print("Saved checkpoint to {}".format(fn))
                    last_time = time.time()
                print("Epoch {}".format(epoch))
        '''
def test_rldriver_main():
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    g = tf.Graph()
    with g.as_default():
        learning_rate_input = tf.placeholder(tf.float32)
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                      decay=RMSP_ALPHA,
                                      momentum=0.0,
                                      epsilon=RMSP_EPSILON,
                                      clip_norm=GRAD_NORM_CLIP,
                                      device=device)
        masterdriver = rldriver.RLDriver(
            ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
            init_state,
            view_config,
            config.SV_VISCFG,
            config.MV_VISCFG,
            use_rgb=True)
        driver = rldriver.RLDriver(
            ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
            init_state,
            view_config,
            config.SV_VISCFG,
            config.MV_VISCFG,
            use_rgb=True,
            master_driver=masterdriver,
            grads_applier=grad_applier)
        driver.get_sync_from_master_op()
        driver.get_apply_grads_op()
        driver.learning_rate_input = learning_rate_input
        driver.a3c_local_t = 2
        global_step = tf.contrib.framework.get_or_create_global_step()
        saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step])
        last_time = time.time()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir)
            print('ckpt {}'.format(ckpt))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Restored!')
            epoch = 0
            while True:
                driver.train_a3c(sess)
                epoch += 1
                if epoch % 1000 == 0 or time.time() - last_time >= 10:
                    print("Saving checkpoint")
                    saver.save(sess, ckpt_dir, global_step=global_step)
                    last_time = time.time()
                print("Epoch {}".format(epoch))
        '''
def test_rldriver_main():
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    init_state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5],
                          dtype=np.float32)
    g = tf.Graph()
    with g.as_default():
        driver = rldriver.RLDriver(
            ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
            np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5],
                     dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4),
                                         (90, 1), (-90, 1)],
            config.SV_VISCFG,
            config.MV_VISCFG,
            use_rgb=True)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
    thread = threading.Thread(target=test_rldriver_worker,
                              args=(dpy, glctx, driver, g))
    thread.start()
    r = driver.renderer
    w = r.pbufferWidth
    h = r.pbufferHeight
    r.state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32)
    r.render_mvrgbd()
    img = r.mvrgb.reshape(w * r.views.shape[0], h, 3)
    dep = r.mvdepth.reshape(w * r.views.shape[0], h)
    depimg = Image.fromarray(dep)
    imsave('mvrgb-master.png', img)
    depimg.save('mvdepth-master.tiff')
    thread.join()
Exemplo n.º 4
0
 def __init__(self, global_step, ckpt_dir='./cat/ckpt', data_dir='./cat/depth_data'):
     self.ckpt_dir = ckpt_dir
     self.data_dir = data_dir
     self.images, self.labels = distorted_inputs(self.data_dir)
     self.driver = rldriver.RLDriver(['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
             np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32),
             [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)],
             config.SV_VISCFG,
             config.SV_VISCFG,
             58,
             input_tensor=self.images)
     print('sv_colorfv = {}'.format(self.driver.sv_colorfv.shape))
     print('mv_colorfv = {}'.format(self.driver.mv_colorfv.shape))
     print('final = {}'.format(self.driver.final.shape))
     self.logits = self.driver.final
     self.global_step = global_step
     print('(sparse) labels {}'.format(self.labels.shape))
     print('logits {}'.format(self.logits.shape))
     self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
         labels=self.labels, logits=self.logits, name='cross_entropy_per_example')
     cross_entropy_mean = tf.reduce_mean(self.cross_entropy, name='cross_entropy')
     tf.add_to_collection('losses', cross_entropy_mean)
     self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
     opt = tf.train.AdamOptimizer(1e-4)
     self.train_op = opt.minimize(loss=self.loss, global_step=self.global_step)
def torus_master():
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    g = tf.Graph()
    util.mkdir_p(ckpt_dir)
    with g.as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()
        increment_global_step = tf.assign_add(global_step,
                                              1,
                                              name='increment_global_step')
        learning_rate_input = tf.placeholder(tf.float32)
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                      decay=RMSP_ALPHA,
                                      momentum=0.0,
                                      epsilon=RMSP_EPSILON,
                                      clip_norm=GRAD_NORM_CLIP,
                                      device=device)
        masterdriver = rldriver.RLDriver(MODELS,
                                         init_state,
                                         view_config,
                                         config.SV_VISCFG,
                                         config.MV_VISCFG,
                                         use_rgb=True)
        saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step])
        with tf.Session() as sess:
            threads = []
            for i in range(THREAD):
                thread_args = (i, dpy, glctx, masterdriver, g, grad_applier,
                               learning_rate_input, global_step,
                               increment_global_step, sess, saver)
                thread = threading.Thread(target=torus_worker,
                                          args=thread_args)
                thread.start()
                graph_completes[i].wait()
                threads.append(thread)
            '''
            We need to run the initializer because only master's variables are stored.
            '''
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir)
            print('ckpt {}'.format(ckpt))
            epoch = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                epoch = sess.run(global_step)
                print('Restored!, global_step {}'.format(epoch))

            init_done.set()
            for thread in threads:
                thread.join()
            print("Saving final checkpoint")
            fn = saver.save(sess,
                            ckpt_dir + ckpt_prefix,
                            global_step=global_step)
            print("Saved checkpoint to {}".format(fn))
Exemplo n.º 6
0
def train_puzzle():
    pyosr.init()
    pyosr.create_gl_context(pyosr.create_display()) # FIXME: Each thread has one ctx
    init_state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32)
    with tf.Graph().as_default():
        driver = rldriver.RLDriver(['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
                np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32),
                [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)],
                config.SV_VISCFG,
                config.MV_VISCFG,
                use_rgb=True)
def test_rldriver_worker(dpy, glctx, masterdriver, tfgraph):
    pyosr.create_gl_context(dpy, glctx)  # OpenGL context for current thread.
    with tfgraph.as_default():
        driver = rldriver.RLDriver(
            ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'],
            np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5],
                     dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4),
                                         (90, 1), (-90, 1)],
            config.SV_VISCFG,
            config.MV_VISCFG,
            use_rgb=True,
            master_driver=masterdriver)
        sync_op = driver.get_sync_from_master_op()
    r = driver.renderer
    w = r.pbufferWidth
    h = r.pbufferHeight
    r.state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32)
    r.render_mvrgbd()
    print('worker mvrgb shape {}'.format(r.mvrgb.shape))
    img = r.mvrgb.reshape(w * r.views.shape[0], h, 3)
    dep = r.mvdepth.reshape(w * r.views.shape[0], h)
    depimg = Image.fromarray(dep)
    imsave('mvrgb-worker.png', img)
    depimg.save('mvdepth-worker.tiff')
    with tfgraph.as_default():
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            value_before = np.array(sess.run(driver.get_nn_args()[0][0][0]),
                                    dtype=np.float32)
            print('Before {}'.format(value_before))
            sess.run(sync_op)
            value_after = np.array(sess.run(driver.get_nn_args()[0][0][0]),
                                   dtype=np.float32)
            print('After {}'.format(value_after))
            print('Delta {}'.format(np.linalg.norm(value_before -
                                                   value_after)))
def torus_worker(index, dpy, glctx, masterdriver, tfgraph, grad_applier,
                 lrtensor, global_step, increment_global_step, sess, saver):
    global graph_completes
    global init_done
    pyosr.create_gl_context(dpy, glctx)  # OpenGL context for current thread.
    with tfgraph.as_default():
        driver = rldriver.RLDriver(MODELS,
                                   init_state,
                                   view_config,
                                   config.SV_VISCFG,
                                   config.MV_VISCFG,
                                   use_rgb=True,
                                   master_driver=masterdriver,
                                   grads_applier=grad_applier,
                                   worker_thread_index=index)
        print("THREAD {} DRIVER CREATED".format(index))
        driver.epsilon = 1.0 - (index + 1) * (1.0 / (THREAD + 1))
        driver.get_sync_from_master_op()
        driver.get_apply_grads_op()
        driver.learning_rate_input = lrtensor
        # driver.a3c_local_t = 32
        graph_completes[index].set()
        init_done.wait()
        '''
        if index == 0:
            for i in range(1,4):
                graph_completes[i].wait()
                print("Graph {} waited".format(i))
            sess.run(tf.global_variables_initializer())
            init_done.set()
        else:
            graph_completes[index].set()
            print("Graph {} Set".format(index))
            init_done.wait()
            print("Init_done on thread {}, continuing".format(index))
        '''
        last_time = time.time()
        # FIXME: ttorus/ckpt-mt-5 are not loading global_step into epoch
        epoch = 0
        driver.verbose_training = True
        while epoch < 100 * 1000:
            #while epoch < 2 * 1000:
            driver.epsilon = random.choice(POLICIES)
            driver.train_a3c(sess)
            epoch += 1
            sess.run(increment_global_step)
            if index == 0 and time.time() - last_time >= 60 * 10:
                print("Saving checkpoint")
                fn = saver.save(sess,
                                ckpt_dir + ckpt_prefix,
                                global_step=global_step)
                print("Saved checkpoint to {}".format(fn))
                last_time = time.time()
            print("[{}] Epoch {}, global_step {}".format(
                index, epoch, sess.run(global_step)))
            if index == 0:
                sess.run(driver.get_sync_from_master_op())
                driver.renderer.state = init_state
                _, value, _, _ = driver.evaluate(sess)
                print("Master Driver V for init_state {}".format(value))
                # Choose some random initial conf for better training
                if random.random() > driver.epsilon:
                    driver.restart_epoch()
            else:
                driver.restart_epoch()
def show_torus_ring():
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    g = tf.Graph()
    util.mkdir_p(ckpt_dir)
    with g.as_default():
        learning_rate_input = tf.placeholder(tf.float32)
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                      decay=RMSP_ALPHA,
                                      momentum=0.0,
                                      epsilon=RMSP_EPSILON,
                                      clip_norm=GRAD_NORM_CLIP,
                                      device=device)
        masterdriver = rldriver.RLDriver(MODELS,
                init_state,
                view_config,
                config.SV_VISCFG,
                config.MV_VISCFG,
                use_rgb=True)
        global_step = tf.contrib.framework.get_or_create_global_step()
        increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step')
        saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step])
        last_time = time.time()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir)
            print('ckpt {}'.format(ckpt))
            epoch = 0
            policy_before, value_before, _, _ = masterdriver.evaluate(sess)
            #print("Last b before {}".format(sess.run(masterdriver.get_nn_args()[-2])))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                epoch = sess.run(global_step)
                print('Restored!, global_step {}'.format(epoch))
            else:
                print('Cannot find checkpoint at {}'.format(ckpt_dir))
                return
            policy_after, value_after, _, _ = masterdriver.evaluate(sess)
            print("Value Before Restoring {} and After {}".format(value_before, value_after))
            # print("Last b {}".format(sess.run(masterdriver.get_nn_args()[-2])))
            driver = masterdriver
            r = masterdriver.renderer
            fig = plt.figure()
            class ReAnimator(object):
                reaching_terminal = False
                driver = None
                im = None
                sess = None

                def __init__(self, driver, sess):
                    self.driver = driver
                    self.sess = sess

                def perform(self, framedata):
                    driver = self.driver
                    r = driver.renderer
                    sess = self.sess
                    if not self.reaching_terminal:
                        policy, value, img, dep = driver.evaluate(sess)
                        policy = policy.reshape(driver.action_size)
                        action = driver.make_decision(policy, sess)
                        nstate,reward,self.reaching_terminal = driver.get_reward(action)
                        valid = r.is_valid_state(nstate)
                        print('Current Value {} Policy {} Action {} Reward {}'.format(value, policy, action, reward))
                        print('\tNew State {} Collision Free ? {}'.format(nstate, valid))
                        # print('Action {}, New State {}'.format(action, nstate))
                        rgb = np.squeeze(img[0, 0, :, : ,:], axis=[0,1])
                        if self.im is None:
                            print('rgb {}'.format(rgb.shape))
                            self.im = plt.imshow(rgb)
                        else:
                            self.im.set_array(rgb)
                        r.state = nstate
            ra = ReAnimator(driver, sess)
            ani = animation.FuncAnimation(fig, ra.perform)
            plt.show()
Exemplo n.º 10
0
def aa_train_main(args):
    ckpt_dir = args.ckptdir
    ckpt_prefix = args.ckptprefix
    device = args.device
    pyosr.init()
    dpy = pyosr.create_display()
    glctx = pyosr.create_gl_context(dpy)
    g = tf.Graph()
    util.mkdir_p(ckpt_dir)
    with g.as_default():
        learning_rate_input = tf.placeholder(tf.float32)
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                      decay=RMSP_ALPHA,
                                      momentum=0.0,
                                      epsilon=RMSP_EPSILON,
                                      clip_norm=GRAD_NORM_CLIP,
                                      device=device)
        masterdriver = rldriver.RLDriver(MODELS,
                                         init_state,
                                         view_config,
                                         config.SV_VISCFG,
                                         config.MV_VISCFG,
                                         output_number=AA_OUTPUT_NUMBER,
                                         use_rgb=True,
                                         continuous_policy_loss=True)
        driver = rldriver.RLDriver(MODELS,
                                   init_state,
                                   view_config,
                                   config.SV_VISCFG,
                                   config.MV_VISCFG,
                                   output_number=AA_OUTPUT_NUMBER,
                                   use_rgb=True,
                                   master_driver=masterdriver,
                                   grads_applier=grad_applier,
                                   continuous_policy_loss=True)
        driver.get_sync_from_master_op()
        driver.get_apply_grads_op()
        driver.learning_rate_input = learning_rate_input
        driver.a3c_local_t = 32
        global_step = tf.contrib.framework.get_or_create_global_step()
        increment_global_step = tf.assign_add(global_step,
                                              1,
                                              name='increment_global_step')
        saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step])
        last_time = time.time()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir)
            print('ckpt {}'.format(ckpt))
            epoch = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                epoch = sess.run(global_step)
                print('Restored!, global_step {}'.format(epoch))
            while epoch < args.iter:
                fn = "{}/{}{:06d}.npz".format(args.path, args.prefix,
                                              epoch % args.gtnumber)
                dic = np.load(fn)
                driver.train_from_gt(sess, dic['KEYS'], dic['TR'], dic['ROT'],
                                     dic['DIST'])
                epoch += 1
                sess.run(increment_global_step)
                if epoch % 1000 == 0 or time.time() - last_time >= 10 * 60:
                    print("Saving checkpoint")
                    fn = saver.save(sess,
                                    ckpt_dir + ckpt_prefix,
                                    global_step=global_step)
                    print("Saved checkpoint to {}".format(fn))
                    last_time = time.time()
                print("Epoch {}".format(epoch))