Exemplo n.º 1
0
            n_ep += 1
            cum_reward = 0.0
            n_steps = 0
            cum_td_loss = 0.0
            img, rule_action = env.reset()
            # print "state shape: {}".format(state.shape)
            # print "state type: {}".format(type(state))
            # resize maybe different from tf.resize
            # tensor_state = tf.convert_to_tensor(state)
            # img = np.array([img])
            # tens_img = tf.image.resize_images(img, [224, 224])
            # img = tf.image.convert_image_dtype(img, tf.float32)
            # tens_img = initialD_input.preprocess_image(tens_img)
            # np_img = sess.run(tens_img)
            img = cv2.resize(img, (224, 224))
            np_img = initialD_input.preprocess_image(img)
            print "=========img shape: {}".format(img.shape) + "=========\n"

            using_learning_agent = True

            # actions, np_probs = sess.run([network_train.preds, probs], feed_dict={
            #     network_train._images:np.array([np_img]),
            #     network_train.is_train:False})
            # action = actions[0]
            np_probs = sess.run(pi, feed_dict={images: np.array([np_img])})
            action = np.argmax(np_probs)
            all_scenes.append([np.copy(img), action, np_probs])
            next_state, reward, done, info = env.step(ACTIONS[action])
            next_img, next_rule_action = next_state
            while True:
                n_steps += 1
Exemplo n.º 2
0
try:
    # config = tf.ConfigProto()
    with sv.managed_session(config=config) as sess:
        agent.set_session(sess)
        n_steps = 0
        while True:
            n_ep += 1
            cum_reward = 0.0
            cum_td_loss = 0.0
            cum_spg_loss = 0.0
            all_scenes = []
            state = env.reset()
            print "========reset======\n" * 5
            img = cv2.resize(state, (224, 224))
            pr_img = initialD_input.preprocess_image(img)
            while True:
                action = agent.act(state=pr_img, evaluate=False, sess=sess)
                print "action: ", action
                all_scenes.append([np.copy(img), action])
                next_state, reward, done, info = env.step(ACTIONS[action])
                next_img = cv2.resize(next_state, (224, 224))
                pr_next_img = initialD_input.preprocess_image(next_img)
                info = agent.step(state=pr_img, action=action, reward=reward, next_state=pr_next_img, episode_done=done)
                record(summary_writer, n_steps, info)
                n_steps += 1
                if done is True:
                    print "========Run Done=======\n" * 5
                    break
                img = next_img
                pr_img = pr_next_img
        # print "========\n"*5
        # lr = graph.get_operation_by_name('lr').outputs[0]
        while True:
            n_ep += 1
            cum_reward = 0.0
            n_steps = 0
            cum_td_loss = 0.0
            img, rule_action = env.reset()
            # print "state shape: {}".format(state.shape)
            # print "state type: {}".format(type(state))
            # resize maybe different from tf.resize
            # tensor_state = tf.convert_to_tensor(state)
            # img = np.array([img])
            tens_img = tf.image.resize_images(img, [224, 224])
            # img = tf.image.convert_image_dtype(img, tf.float32)
            tens_img = initialD_input.preprocess_image(tens_img)
            np_img = sess.run(tens_img)

            print "=========img shape: {}".format(img.shape)+"=========\n"


            using_learning_agent = True

            actions, np_probs = sess.run([network_train.preds, probs], feed_dict={
                network_train._images:np.array([np_img]),
                network_train.is_train:False})
            action = actions[0]

            all_scenes.append([np.copy(img),action,rule_action,np_probs])
            if action != rule_action:
                print "not equal, sl: ", action, " rule: ", rule_action