Пример #1
0
start_timer = time.time()

for i_episode in itertools.count(1):

    # Initialise episode rewards
    episode_reward = 0
    episode_peak_reward = 0
    episode_day_reward = 0
    episode_night_reward = 0
    episode_smooth_reward = 0
    episode_steps = 0
    done = False
    state = env.reset()
    temporal_state = state[:3]
    state = state[3:]
    state = encoder.encode_min(state)
    state = temporal_state.tolist() + state

    grads_G1_daily = []
    grads_G1_weekly = []

    grads_G2_daily = []
    grads_G2_weekly = []

    # For every step
    while not done:
        # If learning hasn't started yet, sample random action
        if args.start_steps > total_numsteps:
            # 
            action = env.action_space.sample()
            agent.action_tracker.append(action)