def __init__(self): self.serv_sender = ExperienceSender() self.record = False self.env = make_imitation_env(skip=3) self.oracle = FGM() #store observations for sender (sending off to server for training) self.sender_buffer = deque(maxlen=100)
def __init__(self): self.model = NVIDIA_ConvNet().to(device) self.update_nn() self.model.eval() self.oracle = FGM() self.serv_sender = ExperienceSender() self.repbuf = f110_ReplayBuffer() self.record = False self.send = False self.env = make_imitation_env()
def run_policy(self): """ Uses self.oracle to run the policy onboard""" env = make_imitation_env(skip=2) obs_dict = env.reset() self.sender_buffer.append(obs_dict) while True: action = self.get_action(obs_dict) nobs_dict, reward, done, info = env.step(action) if info.get("record"): self.sender_buffer.append(nobs_dict) obs_dict = nobs_dict if done: obs_dict = env.reset()
def main(): env = make_imitation_env(skip=2) #env = f110Env() obs = env.reset() count = 0 while True: random_action = {"angle":0.2, "speed":1.0} obs, reward, done, info = env.step(random_action) cv_img = obs["img"] cv2.imshow('latestimg', cv_img) cv2.waitKey(2) count+=1 if done: print("ISDONE") obs = env.reset()
def run_policy(self): """ Uses self.model to run the policy onboard & adds experiences to the replay buffer """ env = make_imitation_env(skip=3) obs_dict = env.reset() while True: action = self.get_action(self.gymobs_to_inputdict(obs_dict)) next_obs_dict, reward, done, info = env.step(action) if info.get("record"): self.record = True ret_dict = self.oracle.fix(obs_dict) self.repbuf.add(ret_dict, action, reward, done) else: self.record = False if info.get("buttons")[0]: self.send = True else: self.send = False obs_dict = next_obs_dict if done: obs_dict = env.reset()
def run_policy(self): """ Uses self.model to run the policy onboard & adds experiences to the prioritized replay buffer """ env = make_imitation_env() obs_dict = env.reset() while True: action = self.get_action(self.gymobs_to_inputdict(obs_dict)) next_obs_dict, reward, done, info = env.step(action) if info.get("record"): self.record = True entry = self.get_repbuf_entry(obs_dict, action, next_obs_dict, reward, done, info) self.repbuf.add(*entry) else: self.record = False if info.get("buttons")[0] == 1: self.send = True else: self.send = False obs_dict = next_obs_dict if done: obs_dict = env.reset()