예제 #1
0
def enjoyPrius(args):
    prius = simulator.Prius(args)
    time.sleep(0.5)
    #while True:
    #    if args.mode == 0:
    #        prius.control_prius(args.value,-prius.pose().position.y/80,0)
    #    if args.mode == 1:
    #        prius.control_world()
    #        break
    #    time.sleep(0.1)
    while True:
        #prius.control_world(False)
        #time.sleep(5)
        #print str(prius.pose())
        if args.mode == 0:
            #prius.control_prius(args.value,-prius.pose().position.y/80,0)
            #prius.control_prius(args.value,-prius.pose().position.y/20,0)
            prius.control_prius(args.value, 0.2, 0)
        if args.mode == 1:
            prius.control_world()
            break
        print prius.pose().orientation.x
        #print str(prius.collisions())
        #print prius.collisions().position.x
        #print str(prius.velocity())
        print '===================='
    pass
예제 #2
0
def enjoyPrius(args):
    prius = simulator.Prius(args)
    env = En.Env(args.road, args.vehicle, args.track) 

    while True:
        print(env._terminal(prius.collisions()))
        #env.testRender(pos)
        #time.sleep(0.5)
    #for i in range(0,90):
    #    pos.position.x = 42 + i/30
    #    pos.position.y = i / 30
    #    pos.orientation.x = i/90 * math.pi/2
    #    print str(pos)
    #    print(env.testRender(pos))
    #    #env.testRender(pos)
    #    time.sleep(0.05)

    #time.sleep(0.5)
    #while True:
    #    if args.mode == 0:
    #        prius.control_prius(args.value,-prius.pose().position.y/80,0)
    #    if args.mode == 1:
    #        prius.control_world()
    #        break
    #    time.sleep(0.1)
    #while True:
        #prius.control_world(False)
        #time.sleep(5)
        #print str(prius.pose())
        #if args.mode == 0:
        #    #prius.control_prius(args.value,-prius.pose().position.y/80,0)
        #    #prius.control_prius(args.value,-prius.pose().position.y/20,0)
        #    prius.control_prius(args.value,0.2,0)
        #if args.mode == 1:
        #    prius.control_world()
        #    break
        #print prius.pose().orientation.x
        #print str(prius.collisions())
        #print prius.collisions().position.x
        #print str(prius.velocity())
        print '===================='
    pass
예제 #3
0
def enjoyPrius(args):
    # controlling loop
    global going_on
    signal.signal(signal.SIGINT, handler)
    going_on = True

    # prius and its's environment
    prius = simulator.Prius(args)
    thread_update_pos = threading.Thread(target = updatePriusPos, args = (prius,))
    thread_update_collisions = threading.Thread(target = updatePriusCollision, args = (prius,))
    thread_update_pos.start()
    thread_update_collisions.start()
    env = En.Env(args.road, args.vehicle, args.track) 

    # network session and it's parameters
    sess = tf.InteractiveSession()
    inputState, outputQ, h_fc1 = createNetwork()
    
    # action is choosed by policy pi
    action = tf.placeholder("float", [None, ACTIONS])
    # optimal q value of actions taken just now
    target_q = tf.placeholder("float", [None])
    # real q value when these actions selected and actuated
    action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1)
    # target is q -> *q
    cost = tf.reduce_mean(tf.square(target_q - action_q))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    while terminal:
        prius.reset()
        time.sleep(0.2)
        x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0
    prius.control_prius(0.2, 0, 0)
    x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
    ret , x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2)
    state_t1 = state_t

    epsilon = INITIAL_EPSILON
    step = 1

    store = deque()
    simTime = time.time()

    while going_on:
        # always run at high frequency
        x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
        # train when step over observation stage
        if step > OBSERVE:
            # sample a minibatch to train
            minibatch = random.sample(store, BATCH)
            # get the batch variables
            state_j_batch = [d[0] for d in minibatch]
            action_batch = [d[1] for d in minibatch]
            reward_batch = [d[2] for d in minibatch]
            state_j1_batch = [d[3] for d in minibatch]
            # qV_batch = []
            target_q_batch = []
            q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch})
            for i in range(0, len(minibatch)):
                terminal_j = minibatch[i][4]
                if terminal_j:
                    target_q_batch.append(reward_batch[i])
                else:
                    target_q_batch.append(reward_batch[i] + GAMMA * \
                                      ( np.max(q_action1_batch[i][0:2] ) + \
                                        np.max(q_action1_batch[i][2:4] ) + \
                                        np.max(q_action1_batch[i][4:6] ) + \
                                        np.max(q_action1_batch[i][6:8] ) + \
                                        np.max(q_action1_batch[i][8:10] ) + \
                                        np.max(q_action1_batch[i][10:12]) +\
                                        np.max(q_action1_batch[i][12:14]) +\
                                        np.max(q_action1_batch[i][14:16]) +\
                                        np.max(q_action1_batch[i][16:18]) +\
                                        np.max(q_action1_batch[i][18:20])
                                     )  )
            train_step.run(feed_dict = {
                target_q: target_q_batch,
                action: action_batch,
                inputState: state_j_batch }
                )
        # save progress every 10000 iterations
        if step % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = step)
        # step > OBSERVE end train_step

        # control by frequecy of 10HZ
        if (time.time() - simTime < 0.098) and (not terminal):
            continue
        simTime = time.time()

        ## current q value
        #readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        qV_t = outputQ.eval(feed_dict={inputState : [state_t]})[0]
        ## epsilon-greedy policy
        action_array_t = np.zeros([ACTIONS])
        action_angle_t = 0
        # scale down epsilon
        if epsilon > FINAL_EPSILON and step > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
        if random.random() < epsilon:
            action_array_t, action_angle_t = getRandomAction()
        else:
            action_array_t, action_angle_t = getAction(qV_t)

        prius.control_prius(0.2, -1*action_angle_t, 0)
        x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
        ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
        x_t = np.reshape(x_t, (160, 160, 1))
        state_t1 = np.append(x_t, state_t[:, :, :9], axis=2)
        store.append((state_t, action_array_t, reward, state_t1, terminal))
        if len(store) > REPLAY_MEMORY:
            store.popleft()
        # print on-time reward
        line1 = "step :====================== ",step,"======================== "
        line3 = "reward                       ",reward,"                      "
        if terminal:
            printRed(line1)
            printRed(line3)
        elif reward < 0.04:
            printYellow(line1)
            printYellow(line3)
        elif reward < 0.1:
            printCyan(line1)
            printCyan(line3)
        else:
            printGreen(line1)
            printGreen(line3)
        # if terminal, restart
        if terminal:
            prius.reset()
            time.sleep(0.2)
            x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
            x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
            ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
            state_t1 = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2)
        state_t = state_t1
        step += 1
예제 #4
0
def enjoyPrius(args):
    # controlling loop
    global going_on
    signal.signal(signal.SIGINT, handler)
    going_on = True

    env = En.Env(args.road, args.vehicle, args.track)

    # network session and it's parameters
    sess = tf.InteractiveSession()
    inputState, outputQ, h_fc1 = createNetwork()
    #
    # action is choosed by policy pi
    #action = tf.placeholder("float", [None, ACTIONS])
    ## optimal q value of actions taken just now
    #target_q = tf.placeholder("float", [None])
    ## real q value when these actions selected and actuated
    #action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1)
    ## target is q -> *q
    #cost = tf.reduce_mean(tf.square(target_q - action_q))
    #train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("zero_networks_add")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    sessAvoid = tf.InteractiveSession()
    sessAvoid.run(tf.initialize_all_variables())
    inputStateAvoid, outputQAvoid, h_fc1Avoid = createNetwork()
    action = tf.placeholder("float", [None, ACTIONS])
    target_q = tf.placeholder("float", [None])
    action_q = tf.reduce_sum(tf.multiply(outputQ, action), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(target_q - action_q))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
    saverAvoid = tf.train.Saver()
    sessAvoid.run(tf.initialize_all_variables())
    checkpointAvoid = tf.train.get_checkpoint_state("avoid_networks_add")
    if checkpointAvoid and checkpointAvoid.model_checkpoint_path:
        saverAvoid.restore(sessAvoid, checkpointAvoid.model_checkpoint_path)
        print("Successfully loaded:", checkpointAvoid.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    prius = simulator.Prius(args)
    thread_update_pos = threading.Thread(target=updatePriusPos, args=(prius, ))
    thread_update_collisions = threading.Thread(target=updatePriusCollision,
                                                args=(prius, ))
    thread_update_velocity = threading.Thread(target=updatePriusVelocity,
                                              args=(prius, ))
    thread_update_pos.start()
    thread_update_collisions.start()
    thread_update_velocity.start()
    prius.reset()
    time.sleep(0.5)
    prius.control_world()
    x_t, reward, terminal, intersection, errorYaw = env.render(
        prius.collisions(), prius.pose(), prius.velocity())
    terminal = prius.terminal() or terminal
    if terminal:
        reward = -1
    while terminal and going_on:
        print(terminal)
        prius.reset()
        time.sleep(0.5)
        #prius.update_velocity()
        #prius.update_collisions()
        #prius.update_pos()
        x_t, reward, terminal, intersection, errorYaw = env.render(
            prius.collisions(), prius.pose(), prius.velocity())
        terminal = prius.terminal() or terminal
        if terminal:
            reward = -1
        #x_t, reward, terminal, intersection = env.render(prius.conllided(), prius.pose(), prius.velocity())
    # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0
    prius.control_prius(0.4, 0, 0)
    #prius.update_velocity()
    #prius.update_collisions()
    #prius.update_pos()
    x_t, reward, terminal, intersection, errorYaw = env.render(
        prius.collisions(), prius.pose(), prius.velocity())
    reward = round(reward, 2)
    terminal = prius.terminal() or terminal
    if terminal:
        reward = -1
    #x_t, reward, terminal, intersection = env.render(prius.conllided(), prius.pose(), prius.velocity())
    x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
    state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t),
                       axis=2)
    state_t1 = state_t

    epsilon = INITIAL_EPSILON
    step = INITIAL_STEP

    store = deque()
    simTime = time.time()

    action_array_t = np.zeros([ACTIONS])
    action_angle_t = 0

    while going_on:
        # always run at high frequency
        #prius.update_velocity()
        #prius.update_collisions()
        #prius.update_pos()
        x_t, reward, terminal, intersection, errorYaw = env.render(
            prius.collisions(), prius.pose(), prius.velocity())
        reward = round(reward, 2)
        terminal = prius.terminal() or terminal
        if terminal:
            reward = -1
        #if len(store) > BATCH:
        if step > OBSERVE + INITIAL_STEP:
            # sample a minibatch to train
            minibatch = random.sample(store, BATCH)
            # get the batch variables
            state_j_batch = [d[0] for d in minibatch]
            action_batch = [d[1] for d in minibatch]
            reward_batch = [d[2] for d in minibatch]
            state_j1_batch = [d[3] for d in minibatch]
            # qV_batch = []
            target_q_batch = []
            ##q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch})
            q_action1_batch = outputQAvoid.eval(
                feed_dict={inputStateAvoid: state_j1_batch})
            for i in range(0, len(minibatch)):
                terminal_j = minibatch[i][4]
                if terminal_j:
                    target_q_batch.append(reward_batch[i])
                else:
                    target_q_batch.append(reward_batch[i] + GAMMA *
                                          (np.max(q_action1_batch[i])))
            train_step.run(
                feed_dict={
                    target_q: target_q_batch,
                    action: action_batch,
                    inputState: state_j_batch
                })
        # save progress every 10000 iterations
        #if step % 10000 == 0:
        if step % 10000 == 0:
            saverAvoid.save(sessAvoid,
                            'avoid_networks_add/' + GAME + '-dqn',
                            global_step=step)
        # step > OBSERVE end train_step

        #prius.control_prius(0.3, -1*action_angle_t, 0)
        # control by frequecy of 10HZ
        if step < 50000 + INITIAL_STEP and (time.time() - simTime <
                                            0.098) and (not terminal):
            continue
        simTime = time.time()

        ## current q value
        #readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        qV_t = outputQ.eval(feed_dict={inputState: [state_t]})[0]
        qV_tAvoid = outputQAvoid.eval(
            feed_dict={inputStateAvoid: [state_t]})[0]
        ## epsilon-greedy policy
        action_array_t, action_angle_t = getAction(qV_t)
        action_array_tAvoid, action_angle_tAvoid = getAction(qV_tAvoid)
        #base_angle_t = math.sin(max(-1*math.pi * 0.5, min(math.pi * 0.5,errorYaw *2.0))) * 7.4
        base_angle_t = math.sin(
            max(-1 * math.pi * 0.5, min(math.pi * 0.5,
                                        errorYaw * 2.5))) * 7 + action_angle_t
        base_angle_t = round(base_angle_t, 2)
        if step % 5 == 0 or terminal:
            printPink("   action from q value : " + str(action_angle_tAvoid))
            print("base angle is : " + str(base_angle_t))
            print("sum angle is : " + str(base_angle_t + action_angle_tAvoid))
        # scale down epsilon
        if epsilon > FINAL_EPSILON and step > 50000 + INITIAL_STEP:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
        if random.random() < epsilon:
            action_array_t, action_angle_t = getRandomAction(qV_tAvoid)
            action_angle_t = round(action_angle_t, 2)
            if step % 5 == 0 or terminal:
                printYellow("   action from random : " +
                            str(action_angle_tAvoid))
                print("base angle is : " + str(base_angle_t))
                print("sum angle is : " + str(base_angle_t + action_angle_t))

        prius.control_prius(0.6, base_angle_t + action_angle_tAvoid, 0)
        #prius.control_prius(0.6, base_angle_t , 0)
        x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
        ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
        x_t = np.reshape(x_t, (160, 160, 1))
        state_t1 = np.append(x_t, state_t[:, :, :9], axis=2)
        store.append((state_t, action_array_t, reward, state_t1, terminal))
        if len(store) > REPLAY_MEMORY:
            store.popleft()
        # print on-time reward
        line1 = str(step) + " \tstep and reward is :" + str(reward)
        if step % 5 == 0 or terminal:
            print(round(base_angle_t, 2), '\t')
            print(round(action_angle_t, 2))
            if terminal:
                printRed(line1)
            elif reward < 0.15:
                printYellow(line1)
            elif reward < 0.3:
                printCyan(line1)
            else:
                printGreen(line1)
        # if terminal, restart
        if terminal:
            prius.reset()
            time.sleep(0.5)
            #prius.update_velocity()
            #prius.update_collisions()
            #prius.update_pos()
            x_t, reward, terminal, intersection, errorYaw = env.render(
                prius.collisions(), prius.pose(), prius.velocity())
            terminal = prius.terminal() or terminal
            if terminal:
                reward = -1
            #x_t, reward, terminal, intersection= env.render(prius.conllided(), prius.pose(), prius.velocity())
            x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
            ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
            state_t1 = np.stack(
                (x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2)
        state_t = state_t1
        step += 1
    while going_on:
        time.sleep(2)
        print("dafdfweewq")
    prius.control_world()
예제 #5
0
def enjoyPrius(args):
    # prius and its's environment
    prius = simulator.Prius(args)
    prius.reset()
    time.sleep(2)