def get_trajectories(self, full_states): # print(self.head) obj_dumps = [s[1] for s in full_states] trajectory = get_individual_data(self.head, obj_dumps, pos_val_hash=1) # TODO: automatically determine if correlate pos_val_hash is 1 or 2 # TODO: multiple tail support # TODO: Separation of Interference and Contingent objects if self.tail[0] == "Action": # print(obj_dumps, self.tail[0]) merged = trajectory # correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=2) else: correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=1) merged = np.concatenate([trajectory, correlate_trajectory], axis=1) # print(pytorch_model.wrap(merged)) return pytorch_model.wrap(merged).cuda()
def get_state(self, state): # copy of get_trajectories, but for a single state # print(self.head) if self.head == "Block": # TODO: make not hard coded hstate = get_individual_data(self.head, [state[1]], pos_val_hash=3)[0] else: hstate = get_individual_data(self.head, [state[1]], pos_val_hash=1)[0] # TODO: automatically determine if correlate pos_val_hash is 1 or 2 # TODO: multiple tail support # TODO: Separation of Interference and Contingent objects if self.tail[0] == "Action": # print(obj_dumps, self.tail[0]) merged = hstate corr_state = [] # correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=2) else: corr_state = get_individual_data(self.tail[0], [state[1]], pos_val_hash=1)[0] merged = np.concatenate([hstate, corr_state]) # print(pytorch_model.wrap(merged)) return merged, [len(hstate), len(corr_state)]
# python get_reward.py --record-rollouts data/ataripaddle/ --changepoint-dir data/atarigraph/ --train-edge "Paddle->Ball" --transforms WProx --determiner prox --reward-form changepoint --num-stack 1 --focus-dumps-name focus_dumps.txt --dp-gmm atariball --period 5 # python get_reward.py --record-rollouts data/pusherrandom/ --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --transforms SVel SCorAvg --determiner overlap --reward-form markov --segment --train --num-stack 2 --gpu 1 # python get_reward.py --record-rollouts data/extragripper/ --changepoint-dir data/pushergraphvec/ --train-edge "Gripper->Block" --transforms SProxVel --determiner merged --reward-form changepoint --segment --num-stack 2 --gpu 1 --cluster-model FDPGMM --period 9 --dp-gmm block --min-cluster 5 # python get_reward.py --record-rollouts data/pusherrandom/ --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --transforms SVel SCorAvg --determiner overlap --reward-form markov --segment --train --num-stack 2 --gpu 1 > pusher/reward_training.txt dataset_path = args.record_rollouts changepoints_path = args.record_rollouts # these are the same for creating rewards head, tail = get_edge(args.train_edge) cp_dict = load_from_pickle( os.path.join(changepoints_path, "changepoints-" + head + ".pkl")) changepoints, models = get_cp_models_from_dict(cp_dict) obj_dumps = read_obj_dumps(dataset_path, i=-1, rng=args.num_iters, filename=args.focus_dumps_name) trajectory = get_individual_data(head, obj_dumps, pos_val_hash=1) # TODO: automatically determine if correlate pos_val_hash is 1 or 2 # TODO: multiple tail support if tail[0] == "Action": correlate_trajectory = get_individual_data(tail[0], obj_dumps, pos_val_hash=2) new_ct = np.zeros( (len(correlate_trajectory), int(np.max(correlate_trajectory)) + 1)) hot_idxes = np.array(list(range( len(correlate_trajectory)))), correlate_trajectory.astype(int) new_ct[np.array(list(range(len(correlate_trajectory)))), np.squeeze(correlate_trajectory.astype(int))] = 1 correlate_trajectory = new_ct else: correlate_trajectory = get_individual_data(tail[0],