start_timer = time.time() for i_episode in itertools.count(1): # Initialise episode rewards episode_reward = 0 episode_peak_reward = 0 episode_day_reward = 0 episode_night_reward = 0 episode_smooth_reward = 0 episode_steps = 0 done = False state = env.reset() temporal_state = state[:3] state = state[3:] state = encoder.encode_min(state) state = temporal_state.tolist() + state grads_G1_daily = [] grads_G1_weekly = [] grads_G2_daily = [] grads_G2_weekly = [] # For every step while not done: # If learning hasn't started yet, sample random action if args.start_steps > total_numsteps: # action = env.action_space.sample() agent.action_tracker.append(action)