memory = Memory(100000) if render_map: root, canvas = init_map() steps = 0 scores = [] epsilon = 1.0 for episode in range(hp.num_episodes): state = env.reset() state = pre_process(state) history = np.stack((state, state, state, state), axis=2) history = np.reshape([history], (84, 84, 4)) for i in range(3): action = env.action_space.sample() next_state, reward, done, info = env.step(action) next_state = pre_process(next_state) next_state = np.reshape(next_state, (84, 84, 1)) history = np.append(next_state, history[:, :, :3], axis=2) score = 0 prev_life = 20 episode_len = 0 while True: env.render(mode='rgb_array') steps += 1 qvalue = model(to_tensor(history).unsqueeze(0)) action = get_action(epsilon, qvalue, num_actions) next_state, reward, done, info = env.step(action)
video = cv2.VideoWriter('record/mob-fun.avi', cv2.VideoWriter_fourcc('M','J','P','G'), 20, (800, 600)) done = False write = False batch_size = 1 for i in range(1): score = 0 count = 0 env.reset() while True: count += 1 env.render(mode="rgb_array") action = env.action_space.sample() obs, reward, done, info = env.step(action) num_states = info['number_of_observations_since_last_state'] num_rewards = info['number_of_rewards_since_last_state'] observation = info['observation'] # print(num_states, num_rewards) score += reward obs = np.reshape(obs, (600, 800, 3)) img, origin_img, dim = prep_image(obs, inp_dim) with torch.no_grad(): prediction = model(img, CUDA) prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)