def randomSteps(env,steps,dqn): t0 = time.time() env.reset() i = 0 frame_stack = Pipe(4) initial_no_op = np.random.randint(4, NO_OP_MAX) for _ in range(0,steps): if i < initial_no_op: # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS action = NO_OP_CODE state, reward, done, info = env.step(action) greyObservation = rgb2gray(state) state = downSample(greyObservation) frame_stack.push(state) i += 1 else: state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) action = np.random.randint(0, len(ACTIONS)) actionH =np.zeros(len(ACTIONS)) actionH[action] = 1 next_state, reward, game_over, info = env.step(action) greyObservation = rgb2gray(next_state) next_state = downSample(greyObservation) frame_stack.push(next_state) next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) dqn.storeExperience( state.astype(type), actionH, reward, next_state.astype(type), game_over) if done: #print("Episode finished after {} timesteps".format(_ + 1)) env.reset() i=0 frame_stack=[] t1 = time.time() print("Fullfilling replay memory operation took:",t1-t0,) print('Size of replay memory %s bytes and has %s elements' % ((sys.getsizeof(dqn.replayMemory)),len(dqn.replayMemory))) print
initial_no_op = np.random.randint(4, NO_OP_MAX) i=0 frame_stack=Pipe(4) score=0 summary_writer = tf.summary.FileWriter('logs',sess.graph) summary_writer = tf.summary.FileWriter(LOG_DIRECTORY + RUN_STRING, sess.graph) print('Started training') for step in range(STEPS): if i < initial_no_op: # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS action = NO_OP_CODE state, reward, done, info = env.step(action) greyObservation = rgb2gray(state) state = downSample(greyObservation) frame_stack.push(state) i+=1 else: state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) action = dqn.selectAction(state,step) actionN = np.argmax(dqn.selectAction(state,step)) next_state, reward, game_over, info = env.step(actionN) greyObservation = rgb2gray(next_state) next_state = downSample(greyObservation) frame_stack.push(next_state) next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))