def enjoyPrius(args): prius = simulator.Prius(args) time.sleep(0.5) #while True: # if args.mode == 0: # prius.control_prius(args.value,-prius.pose().position.y/80,0) # if args.mode == 1: # prius.control_world() # break # time.sleep(0.1) while True: #prius.control_world(False) #time.sleep(5) #print str(prius.pose()) if args.mode == 0: #prius.control_prius(args.value,-prius.pose().position.y/80,0) #prius.control_prius(args.value,-prius.pose().position.y/20,0) prius.control_prius(args.value, 0.2, 0) if args.mode == 1: prius.control_world() break print prius.pose().orientation.x #print str(prius.collisions()) #print prius.collisions().position.x #print str(prius.velocity()) print '====================' pass
def enjoyPrius(args): prius = simulator.Prius(args) env = En.Env(args.road, args.vehicle, args.track) while True: print(env._terminal(prius.collisions())) #env.testRender(pos) #time.sleep(0.5) #for i in range(0,90): # pos.position.x = 42 + i/30 # pos.position.y = i / 30 # pos.orientation.x = i/90 * math.pi/2 # print str(pos) # print(env.testRender(pos)) # #env.testRender(pos) # time.sleep(0.05) #time.sleep(0.5) #while True: # if args.mode == 0: # prius.control_prius(args.value,-prius.pose().position.y/80,0) # if args.mode == 1: # prius.control_world() # break # time.sleep(0.1) #while True: #prius.control_world(False) #time.sleep(5) #print str(prius.pose()) #if args.mode == 0: # #prius.control_prius(args.value,-prius.pose().position.y/80,0) # #prius.control_prius(args.value,-prius.pose().position.y/20,0) # prius.control_prius(args.value,0.2,0) #if args.mode == 1: # prius.control_world() # break #print prius.pose().orientation.x #print str(prius.collisions()) #print prius.collisions().position.x #print str(prius.velocity()) print '====================' pass
def enjoyPrius(args): # controlling loop global going_on signal.signal(signal.SIGINT, handler) going_on = True # prius and its's environment prius = simulator.Prius(args) thread_update_pos = threading.Thread(target = updatePriusPos, args = (prius,)) thread_update_collisions = threading.Thread(target = updatePriusCollision, args = (prius,)) thread_update_pos.start() thread_update_collisions.start() env = En.Env(args.road, args.vehicle, args.track) # network session and it's parameters sess = tf.InteractiveSession() inputState, outputQ, h_fc1 = createNetwork() # action is choosed by policy pi action = tf.placeholder("float", [None, ACTIONS]) # optimal q value of actions taken just now target_q = tf.placeholder("float", [None]) # real q value when these actions selected and actuated action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1) # target is q -> *q cost = tf.reduce_mean(tf.square(target_q - action_q)) train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) # saving and loading networks saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) while terminal: prius.reset() time.sleep(0.2) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0 prius.control_prius(0.2, 0, 0) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY) state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t1 = state_t epsilon = INITIAL_EPSILON step = 1 store = deque() simTime = time.time() while going_on: # always run at high frequency x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) # train when step over observation stage if step > OBSERVE: # sample a minibatch to train minibatch = random.sample(store, BATCH) # get the batch variables state_j_batch = [d[0] for d in minibatch] action_batch = [d[1] for d in minibatch] reward_batch = [d[2] for d in minibatch] state_j1_batch = [d[3] for d in minibatch] # qV_batch = [] target_q_batch = [] q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch}) for i in range(0, len(minibatch)): terminal_j = minibatch[i][4] if terminal_j: target_q_batch.append(reward_batch[i]) else: target_q_batch.append(reward_batch[i] + GAMMA * \ ( np.max(q_action1_batch[i][0:2] ) + \ np.max(q_action1_batch[i][2:4] ) + \ np.max(q_action1_batch[i][4:6] ) + \ np.max(q_action1_batch[i][6:8] ) + \ np.max(q_action1_batch[i][8:10] ) + \ np.max(q_action1_batch[i][10:12]) +\ np.max(q_action1_batch[i][12:14]) +\ np.max(q_action1_batch[i][14:16]) +\ np.max(q_action1_batch[i][16:18]) +\ np.max(q_action1_batch[i][18:20]) ) ) train_step.run(feed_dict = { target_q: target_q_batch, action: action_batch, inputState: state_j_batch } ) # save progress every 10000 iterations if step % 10000 == 0: saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = step) # step > OBSERVE end train_step # control by frequecy of 10HZ if (time.time() - simTime < 0.098) and (not terminal): continue simTime = time.time() ## current q value #readout_t = readout.eval(feed_dict={s : [s_t]})[0] qV_t = outputQ.eval(feed_dict={inputState : [state_t]})[0] ## epsilon-greedy policy action_array_t = np.zeros([ACTIONS]) action_angle_t = 0 # scale down epsilon if epsilon > FINAL_EPSILON and step > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE if random.random() < epsilon: action_array_t, action_angle_t = getRandomAction() else: action_array_t, action_angle_t = getAction(qV_t) prius.control_prius(0.2, -1*action_angle_t, 0) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) x_t = np.reshape(x_t, (160, 160, 1)) state_t1 = np.append(x_t, state_t[:, :, :9], axis=2) store.append((state_t, action_array_t, reward, state_t1, terminal)) if len(store) > REPLAY_MEMORY: store.popleft() # print on-time reward line1 = "step :====================== ",step,"======================== " line3 = "reward ",reward," " if terminal: printRed(line1) printRed(line3) elif reward < 0.04: printYellow(line1) printYellow(line3) elif reward < 0.1: printCyan(line1) printCyan(line3) else: printGreen(line1) printGreen(line3) # if terminal, restart if terminal: prius.reset() time.sleep(0.2) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) state_t1 = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t = state_t1 step += 1
def enjoyPrius(args): # controlling loop global going_on signal.signal(signal.SIGINT, handler) going_on = True env = En.Env(args.road, args.vehicle, args.track) # network session and it's parameters sess = tf.InteractiveSession() inputState, outputQ, h_fc1 = createNetwork() # # action is choosed by policy pi #action = tf.placeholder("float", [None, ACTIONS]) ## optimal q value of actions taken just now #target_q = tf.placeholder("float", [None]) ## real q value when these actions selected and actuated #action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1) ## target is q -> *q #cost = tf.reduce_mean(tf.square(target_q - action_q)) #train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) # saving and loading networks saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("zero_networks_add") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") sessAvoid = tf.InteractiveSession() sessAvoid.run(tf.initialize_all_variables()) inputStateAvoid, outputQAvoid, h_fc1Avoid = createNetwork() action = tf.placeholder("float", [None, ACTIONS]) target_q = tf.placeholder("float", [None]) action_q = tf.reduce_sum(tf.multiply(outputQ, action), reduction_indices=1) cost = tf.reduce_mean(tf.square(target_q - action_q)) train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) saverAvoid = tf.train.Saver() sessAvoid.run(tf.initialize_all_variables()) checkpointAvoid = tf.train.get_checkpoint_state("avoid_networks_add") if checkpointAvoid and checkpointAvoid.model_checkpoint_path: saverAvoid.restore(sessAvoid, checkpointAvoid.model_checkpoint_path) print("Successfully loaded:", checkpointAvoid.model_checkpoint_path) else: print("Could not find old network weights") prius = simulator.Prius(args) thread_update_pos = threading.Thread(target=updatePriusPos, args=(prius, )) thread_update_collisions = threading.Thread(target=updatePriusCollision, args=(prius, )) thread_update_velocity = threading.Thread(target=updatePriusVelocity, args=(prius, )) thread_update_pos.start() thread_update_collisions.start() thread_update_velocity.start() prius.reset() time.sleep(0.5) prius.control_world() x_t, reward, terminal, intersection, errorYaw = env.render( prius.collisions(), prius.pose(), prius.velocity()) terminal = prius.terminal() or terminal if terminal: reward = -1 while terminal and going_on: print(terminal) prius.reset() time.sleep(0.5) #prius.update_velocity() #prius.update_collisions() #prius.update_pos() x_t, reward, terminal, intersection, errorYaw = env.render( prius.collisions(), prius.pose(), prius.velocity()) terminal = prius.terminal() or terminal if terminal: reward = -1 #x_t, reward, terminal, intersection = env.render(prius.conllided(), prius.pose(), prius.velocity()) # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0 prius.control_prius(0.4, 0, 0) #prius.update_velocity() #prius.update_collisions() #prius.update_pos() x_t, reward, terminal, intersection, errorYaw = env.render( prius.collisions(), prius.pose(), prius.velocity()) reward = round(reward, 2) terminal = prius.terminal() or terminal if terminal: reward = -1 #x_t, reward, terminal, intersection = env.render(prius.conllided(), prius.pose(), prius.velocity()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t1 = state_t epsilon = INITIAL_EPSILON step = INITIAL_STEP store = deque() simTime = time.time() action_array_t = np.zeros([ACTIONS]) action_angle_t = 0 while going_on: # always run at high frequency #prius.update_velocity() #prius.update_collisions() #prius.update_pos() x_t, reward, terminal, intersection, errorYaw = env.render( prius.collisions(), prius.pose(), prius.velocity()) reward = round(reward, 2) terminal = prius.terminal() or terminal if terminal: reward = -1 #if len(store) > BATCH: if step > OBSERVE + INITIAL_STEP: # sample a minibatch to train minibatch = random.sample(store, BATCH) # get the batch variables state_j_batch = [d[0] for d in minibatch] action_batch = [d[1] for d in minibatch] reward_batch = [d[2] for d in minibatch] state_j1_batch = [d[3] for d in minibatch] # qV_batch = [] target_q_batch = [] ##q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch}) q_action1_batch = outputQAvoid.eval( feed_dict={inputStateAvoid: state_j1_batch}) for i in range(0, len(minibatch)): terminal_j = minibatch[i][4] if terminal_j: target_q_batch.append(reward_batch[i]) else: target_q_batch.append(reward_batch[i] + GAMMA * (np.max(q_action1_batch[i]))) train_step.run( feed_dict={ target_q: target_q_batch, action: action_batch, inputState: state_j_batch }) # save progress every 10000 iterations #if step % 10000 == 0: if step % 10000 == 0: saverAvoid.save(sessAvoid, 'avoid_networks_add/' + GAME + '-dqn', global_step=step) # step > OBSERVE end train_step #prius.control_prius(0.3, -1*action_angle_t, 0) # control by frequecy of 10HZ if step < 50000 + INITIAL_STEP and (time.time() - simTime < 0.098) and (not terminal): continue simTime = time.time() ## current q value #readout_t = readout.eval(feed_dict={s : [s_t]})[0] qV_t = outputQ.eval(feed_dict={inputState: [state_t]})[0] qV_tAvoid = outputQAvoid.eval( feed_dict={inputStateAvoid: [state_t]})[0] ## epsilon-greedy policy action_array_t, action_angle_t = getAction(qV_t) action_array_tAvoid, action_angle_tAvoid = getAction(qV_tAvoid) #base_angle_t = math.sin(max(-1*math.pi * 0.5, min(math.pi * 0.5,errorYaw *2.0))) * 7.4 base_angle_t = math.sin( max(-1 * math.pi * 0.5, min(math.pi * 0.5, errorYaw * 2.5))) * 7 + action_angle_t base_angle_t = round(base_angle_t, 2) if step % 5 == 0 or terminal: printPink(" action from q value : " + str(action_angle_tAvoid)) print("base angle is : " + str(base_angle_t)) print("sum angle is : " + str(base_angle_t + action_angle_tAvoid)) # scale down epsilon if epsilon > FINAL_EPSILON and step > 50000 + INITIAL_STEP: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE if random.random() < epsilon: action_array_t, action_angle_t = getRandomAction(qV_tAvoid) action_angle_t = round(action_angle_t, 2) if step % 5 == 0 or terminal: printYellow(" action from random : " + str(action_angle_tAvoid)) print("base angle is : " + str(base_angle_t)) print("sum angle is : " + str(base_angle_t + action_angle_t)) prius.control_prius(0.6, base_angle_t + action_angle_tAvoid, 0) #prius.control_prius(0.6, base_angle_t , 0) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) x_t = np.reshape(x_t, (160, 160, 1)) state_t1 = np.append(x_t, state_t[:, :, :9], axis=2) store.append((state_t, action_array_t, reward, state_t1, terminal)) if len(store) > REPLAY_MEMORY: store.popleft() # print on-time reward line1 = str(step) + " \tstep and reward is :" + str(reward) if step % 5 == 0 or terminal: print(round(base_angle_t, 2), '\t') print(round(action_angle_t, 2)) if terminal: printRed(line1) elif reward < 0.15: printYellow(line1) elif reward < 0.3: printCyan(line1) else: printGreen(line1) # if terminal, restart if terminal: prius.reset() time.sleep(0.5) #prius.update_velocity() #prius.update_collisions() #prius.update_pos() x_t, reward, terminal, intersection, errorYaw = env.render( prius.collisions(), prius.pose(), prius.velocity()) terminal = prius.terminal() or terminal if terminal: reward = -1 #x_t, reward, terminal, intersection= env.render(prius.conllided(), prius.pose(), prius.velocity()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) state_t1 = np.stack( (x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t = state_t1 step += 1 while going_on: time.sleep(2) print("dafdfweewq") prius.control_world()
def enjoyPrius(args): # prius and its's environment prius = simulator.Prius(args) prius.reset() time.sleep(2)