Пример #1
0
    def __init__(self,
                 instance_id=0,
                 real_drone=False,
                 policy=None,
                 oracle=None,
                 no_reward=False):

        self.presenter = Presenter()
        self.instance_id = instance_id

        self.word2token = None
        self.all_instructions = None
        self.all_env_ids, self.all_instructions, self.corpus, self.token2term, self.word2token = self.load_all_envs(
        )

        self.env = PomdpInterface(instance_id=self.instance_id,
                                  is_real=real_drone)
        self.policy = policy
        self.oracle = oracle
        self.no_reward = no_reward
Пример #2
0
def take_pics():
    P.initialize_experiment()
    train_i, dev_i, test_i, _ = get_all_instructions()
    all_instructions = {**train_i, **dev_i, **test_i}

    save_dir = paths.get_env_image_path(0)
    os.makedirs(os.path.dirname(save_dir), exist_ok=True)

    keylist = list(all_instructions.keys())

    envs = [PomdpInterface(instance_id=i) for i in range(0, NUM_WORKERS)]
    env_id_splits = [[] for _ in range(NUM_WORKERS)]
    keylist = [6825]

    for i, key in enumerate(keylist):
        env_id_splits[i % NUM_WORKERS].append(key)


    time.sleep(1.0)
    for i in range(len(keylist)):

        d = False
        # For each worker, start the correct env
        for w in range(NUM_WORKERS):
            if i >= len(env_id_splits[w]):
                continue
            env_id = env_id_splits[w][i]
            # FIXME: :This assumes that there is only 1 instruction set per env!
            fname = paths.get_env_image_path(env_id)
            if os.path.isfile(fname):
                print("Img exists: " + fname)
                continue

            d = True
            instruction_set = all_instructions[env_id][0]
            envs[w].set_environment(env_id, instruction_set["instructions"], fast=True)
            print("setting env on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))

        # Then for each worker, take a picture and save it
        if d:
            time.sleep(0.1)
        for w in range(NUM_WORKERS):
            if i >= len(env_id_splits[w]):
                continue
            env_id = env_id_splits[w][i]
            fname = paths.get_env_image_path(env_id)
            if os.path.isfile(fname):
                print("Img exists: " + fname)
                continue
            envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV)
            image = envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV)
            image = np.flip(image, 0)
            imsave(fname, image)
            print("saving pic on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))
Пример #3
0
        self.thread.daemon = True
        self.thread.start()

    def run(self):
        self.mon.run()

    def get_command(self):
        return self.mon.current_vel


initialize_experiment("nl_datacollect_cage")

teleoper = KeyTeleop()
rate = Rate(0.1)

env = PomdpInterface()

train_instructions, dev_instructions, test_instructions, _ = get_all_instructions(
)

count = 0
stuck_count = 0


def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb,
Пример #4
0
from pykeyboard import PyKeyboardEvent

from drones.airsim_interface.rate import Rate
from data_io.instructions import get_all_instructions
from pomdp.pomdp_interface import PomdpInterface
from visualization import Presenter

from parameters.parameter_server import initialize_experiment, get_current_parameters
from utils.keyboard import KeyTeleop

initialize_experiment()

teleoper = KeyTeleop()
rate = Rate(0.1)

env = PomdpInterface(is_real=get_current_parameters()["Setup"]["real_drone"])

train_instructions, dev_instructions, test_instructions, _ = get_all_instructions(
)

count = 0
stuck_count = 0


def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb,
Пример #5
0
def interactive_demo():

    P.initialize_experiment()
    InteractAPI.launch_ui()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    count = 0
    stuck_count = 0

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    InteractAPI.write_empty_instruction()
    InteractAPI.write_real_instruction("None")
    instruction_str = InteractAPI.read_instruction_file()
    print("Initial instruction: ", instruction_str)

    for instruction_sets in interact_instructions.values():
        for set_idx, instruction_set in enumerate(instruction_sets):
            env_id = instruction_set['env']
            env.set_environment(env_id, instruction_set["instructions"])

            presenter = Presenter()
            cumulative_reward = 0
            for seg_idx in range(len(instruction_set["instructions"])):

                print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                real_instruction_str = instruction_set["instructions"][
                    seg_idx]["instruction"]
                InteractAPI.write_real_instruction(real_instruction_str)
                valid_segment = env.set_current_segment(seg_idx)
                if not valid_segment:
                    continue
                state = env.reset(seg_idx)

                keep_going = True
                while keep_going:
                    InteractAPI.write_real_instruction(real_instruction_str)

                    while True:
                        cv2.waitKey(200)
                        instruction = InteractAPI.read_instruction_file()
                        if instruction == "CMD: Next":
                            print("Advancing")
                            keep_going = False
                            InteractAPI.write_empty_instruction()
                            break
                        elif instruction == "CMD: Reset":
                            print("Resetting")
                            env.reset(seg_idx)
                            InteractAPI.write_empty_instruction()
                        elif len(instruction.split(" ")) > 1:
                            instruction_str = instruction
                            break

                    if not keep_going:
                        continue

                    env.override_instruction(instruction_str)
                    tok_instruction = tokenize_instruction(
                        instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)

                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        presenter.show_sample(state, action, reward,
                                              cumulative_reward,
                                              instruction_str)
                        #show_depth(state.image)
                        if done:
                            break
                    InteractAPI.write_empty_instruction()
                    print("Segment finished!")
        print("Env finished!")
Пример #6
0
def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")
Пример #7
0
from env_config.definitions.landmarks import LANDMARK_RADII
from data_io.paths import get_landmark_images_dir
from pomdp.pomdp_interface import PomdpInterface
from visualization import Presenter
"""
This script is used to take pictures of various landmarks
"""
from parameters import parameter_server as P
P.initialize_experiment("nl_datacollect")

rate = Rate(0.1)

IMAGES_PER_LANDMARK_TRAIN = 1000
IMAGES_PER_LANDMARK_TEST = 200

env = PomdpInterface()

count = 0

presenter = Presenter()


def save_landmark_img(state, landmark_name, i, eval):
    data_dir = get_landmark_images_dir(landmark_name, eval)
    os.makedirs(data_dir, exist_ok=True)
    full_path = os.path.join(data_dir, landmark_name + "_" + str(i) + ".jpg")
    scipy.misc.imsave(full_path, state.image)


for landmark_name, landmark_radius in LANDMARK_RADII.items():
Пример #8
0
    def roll_out_policy(self, params):
        """
        Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all
        environments specified in params.setEnvIds.

        Awful function that really needs to be simplified.
        A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome.
        The actual rollout is a very small part of the code.
        :param params: RollOutParams instance defining the parameters of the rollout
        :return: Aggregated dataset with images, states and oracle actions.
        If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples
        otherwise it will be a list (over environments) of lists (over segments) of samples
        """

        if params.isDebug():
            run_metadata.WRITE_DEBUG_DATA = True

        dataset = []
        try:
            # Load the neural network policy from file
            # We can't just pass a neural network into this function, because it can't be pickled
            params.loadPolicy()
            assert params.hasPolicy()

            self.env = PomdpInterface(instance_id=self.instance_id,
                                      is_real=params.real_drone)

            all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs(
            )
            env_ids = params.envs  # if params.envs is not None else all_env_ids
            seg_indices = params.seg_list
            custom_instructions = params.custom_instructions

            # Filter out the envs that are not in all_instructions (we don't have instructions available for them)
            valid_env_ids = [i for i in env_ids if i in all_instructions]

            count = 0

            # Loop through environments
            for i, env_id in enumerate(valid_env_ids):
                #print ("Rolling out on env: " + str(env_id))
                # Loop through all non-empty sets of instructions for each pomdp
                instruction_sets = [
                    s for s in all_instructions[env_id] if len(s) > 0
                ]

                if len(instruction_sets) == 0:
                    print("No instruction sets for env: " + str(env_id))

                for j, instructions_set in enumerate(instruction_sets):
                    count += 1
                    try:
                        seg_id = seg_indices[
                            i] if seg_indices is not None else None
                        custom_instr = custom_instructions[
                            i] if custom_instructions is not None else None
                        import rollout.run_metadata as md
                        md.CUSTOM_INSTR_NO = i
                        # TODO: Check if this works!
                        dataset.append(
                            self.roll_out_on_env(params, instructions_set, j,
                                                 seg_id, custom_instr))
                        #log("Path finished!")
                        DebugWriter().commit()

                        if params.isRealDrone():
                            break

                    except Exception as e:
                        import traceback
                        from utils.colors import print_error
                        print_error("Error encountered during policy rollout!")
                        print_error(e)
                        print_error(traceback.format_exc())
                        continue

        except Exception as e:
            import traceback
            from utils.colors import print_error
            print_error("Error encountered during policy rollout!")
            print_error(e)
            print_error(traceback.format_exc())

        self.env.land()

        return dataset
Пример #9
0
class PolicyRoller:
    """
    Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp
    It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset
    """
    def __init__(self, instance_id=0):
        self.presenter = Presenter()
        self.instance_id = instance_id
        self.env = None

        self.word2token = None
        self.all_instructions = None

    def reset(self):
        self.__init__()

    def load_all_envs(self):
        train_i, dev_i, test_i, corpus = get_all_instructions()
        all_instructions = merge_instruction_sets(train_i, dev_i, test_i)
        token2term, word2token = get_word_to_token_map(corpus)
        env_ids = list(all_instructions.keys())
        return env_ids, all_instructions, corpus, token2term, word2token

    def tokenize_string(self, s):
        word_list = filter(None, s.split(" "))
        token_instruction = list(map(lambda w: self.word2token[w], word_list))
        return token_instruction

    def roll_out_on_segment(self, ):
        pass

    def choose_action(self, params, step, switch_thres, reference_action,
                      policy_action):
        """
        Choose whether to perform the policy action or the reference (oracle) action based on the type of mixture
        policy that is being executed
        :param params: RolloutParams instance
        :param step: current control step number
        :param switch_thres: roll-in/roll-out control step number
        :param reference_action: action executed by oracle
        :param policy_action: action executed by policy
        :return:
        """
        if params.rollout_strategy == RolloutStrategy.POLICY:
            return policy_action
        elif params.rollout_strategy == RolloutStrategy.REFERENCE:
            return reference_action
        elif params.rollout_strategy == RolloutStrategy.POLICY_IN_REF_OUT:
            if step > switch_thres:
                return reference_action
            else:
                return policy_action
        elif params.rollout_strategy == RolloutStrategy.MIXTURE:
            if random.uniform(0, 1) < params.mixture_ref_prob:
                return reference_action
            else:
                return policy_action

    def roll_out_on_env(self,
                        params,
                        instructions_set,
                        set_idx,
                        only_seg_idx=None,
                        custom_instr=None):

        env_dataset = []
        failed = False

        env_id = instructions_set["env"]
        self.env.set_environment(
            env_id, instruction_set=instructions_set['instructions'])
        path = load_and_convert_path(env_id)
        params.initPolicyContext(env_id, path)

        import rollout.run_metadata as md
        segments = list(instructions_set['instructions'])

        # all segments with at least length 2
        valid_segments = [
            (segments[i], segments[i]["seg_idx"]) for i in range(len(segments))
            if segments[i]["end_idx"] - segments[i]["start_idx"] >= 2
        ]

        if len(valid_segments) == 0:
            print("Ding dong!")

        first_seg = True

        # For recurrent policy, we need to explicity start a segment and reset the LSTM state
        # TODO: Make sure this still works for the older non-NL model
        params.policy.start_sequence()

        for segment, seg_idx in valid_segments:
            if only_seg_idx is not None and seg_idx != only_seg_idx:
                print("Skipping seg: " + str(seg_idx) + " as not requested")
                continue

            valid_segment = self.env.set_current_segment(seg_idx)
            if not valid_segment:
                print(
                    f"Skipping segment {seg_idx} as it is empty / invalid for env {env_id}"
                )
                continue

            if params.segment_level:
                params.policy.start_sequence()

            segment_dataset = []

            # Decide when to switch policies
            switch_threshold = params.horizon + 1  # Never switch policies by default
            do_switch = random.uniform(0, 1) < params.switch_prob
            if do_switch and params.threshold_strategy == SwitchThresholdStrategy.UNIFORM:
                switch_threshold = random.uniform(0, params.horizon)

            string_instruction, end_idx, start_idx = segment[
                "instruction"], segment["end_idx"], segment["start_idx"]

            # Manual instruction override to allow rolling out arbitrary instructions for debugging
            if custom_instr is not None:
                print("REPLACED: ", string_instruction)
                string_instruction = custom_instr
            print("INSTRUCTION:", string_instruction)

            # Set some global parameters that can be accessed by other parts of the system
            md.IS_ROLLOUT = True
            md.REAL_DRONE = params.real_drone
            md.RUN_NAME = params.run_name
            md.ENV_ID = env_id
            md.SET_IDX = set_idx
            md.SEG_IDX = seg_idx
            md.START_IDX = start_idx
            md.END_IDX = end_idx
            md.INSTRUCTION = string_instruction

            if hasattr(params.policy, "start_segment_rollout"):
                params.policy.start_segment_rollout(env_id, set_idx, seg_idx)

            token_instruction = self.tokenize_string(string_instruction)

            # At the end of segment N, should we reset drone position to the start of segment N+1 or continue
            # rolling out seamlessly?
            if first_seg or params.shouldResetAlways() or (
                    failed and params.shouldResetIfFailed()):
                state = self.env.reset(seg_idx)
                #instr_str = debug_untokenize_instruction(instruction)
                #Presenter().show_instruction(string_instruction.replace("  ", " "))
                failed = False
                first_seg = False
                sleep(sleepytime)

            # Tell the oracle which part of the path is currently being executed
            params.setCurrentSegment(start_idx, end_idx)

            step_num = 0
            total_reward = 0
            # If the path has been finished according to the oracle, allow rolling out STEPS_TO_KILL more steps
            # If we finish the segment, but don't stop, log the position at which we finish the segment
            oracle_finished_countdown = params.steps_to_kill

            # Finally the actual policy roll out on the path segment!
            while True:

                # Get oracle action (labels)
                ref_action, _ = params.ref_policy.get_action(
                    state, token_instruction)

                if ref_action is None or step_num == params.horizon:
                    failed = True  # Either veered off too far, or ran out of time. Either way, we consider it a fail
                    print("Failed segment")
                    break

                # Get the policy action (actions to be rolled out)
                action, _ = params.policy.get_action(
                    state, token_instruction)  #, env_id=env_id)

                if action is None:
                    print("POLICY PRODUCED None ACTION")
                    break

                # Choose which action to execute (reference or policy) based on the selected procedure
                exec_action = self.choose_action(params, step_num,
                                                 switch_threshold, ref_action,
                                                 action)

                # action = [vel_x, vel_y, vel_yaw] vel_y is unused currently. Execute the action in the pomdp
                state, reward, done, exceeded, oob = self.env.step(exec_action)

                total_reward += reward

                # Collect the data into a dataset
                sample = {
                    "instruction": string_instruction,
                    "state": state,
                    "ref_action": ref_action,
                    "reward": reward,
                    "done": done,
                    #"metadata": {
                    "seg_path": path[start_idx:end_idx + 1],
                    "path": path,
                    "env_id": env_id,
                    "set_idx": set_idx,
                    "seg_idx": seg_idx,
                    "start_idx": start_idx,
                    "end_idx": end_idx,
                    "action": exec_action,
                    "pol_action": action,
                    #"ref_action": ref_action,
                    #"instruction": string_instruction,
                    "flag": params.getFlag()
                    #}
                }

                segment_dataset.append(sample)
                if not params.isSegmentLevel():
                    env_dataset.append(sample)

                # Do visual feedback and logging
                if params.first_person:
                    self.presenter.show_sample(state, exec_action, reward,
                                               string_instruction)
                if params.plot:
                    self.presenter.plot_paths(segment_dataset,
                                              interactive=True)
                if params.save_samples:
                    file_path = params.getSaveSamplesPath(
                        env_id, set_idx, seg_idx, step_num)
                    self.presenter.save_sample(file_path, state, exec_action,
                                               reward, string_instruction)
                if params.show_action:
                    self.presenter.show_action(ref_action, "ref_action")
                    self.presenter.show_action(exec_action, "exec_action")

                # If the policy is finished, we stop. Otherwise the oracle should just keep outputing
                # examples that say that the policy should output finished at this point
                if exec_action[3] > 0.5 and not params.shouldIgnorePolicyStop(
                ):
                    print("Policy stop!")
                    break
                # If oracle says we're finished, allow a number of steps before terminating.
                if ref_action[3] > 0.5:
                    if oracle_finished_countdown == params.steps_to_kill:
                        drone_pos_force_stop = state.get_pos_2d()
                    oracle_finished_countdown -= 1
                    if oracle_finished_countdown == 0:
                        print("Oracle forced stop!")
                        break
                step_num += 1

            # Call the rollout end callback, so that the model can save any debugging information, such as feature maps
            if callable(getattr(params.policy, "on_rollout_end", None)):
                params.policy.on_rollout_end(env_id, set_idx, seg_idx)

            if params.isSegmentLevel():
                env_dataset.append(segment_dataset)

            # Plot the trajectories for error tracking
            # TODO: Plot entire envs not segment by segment
            if params.save_plots:
                if not params.isSegmentLevel():
                    self.presenter.plot_paths(
                        env_dataset,
                        segment_path=path[start_idx:end_idx + 1],
                        interactive=False,
                        bg=True,
                        world_size=4.7)
                self.presenter.save_plot(
                    params.getSavePlotPath(env_id, set_idx, seg_idx))

            # Calculate end of segment error
            if end_idx > len(path) - 1:
                end_idx = len(path) - 1

            # The reward is proportional to path length. Weigh it down, so that max reward is 1:
            seg_len = end_idx - start_idx
            #self.error_tracker.add_sample(not failed, drone_pos_force_stop, state.get_pos(), path[end_idx],
            #                              path[end_idx - 1], total_reward, seg_len)

            if params.first_segment_only:
                print("Only running the first segment")
                break

            #sleep(sleepytime)

        return env_dataset

    def roll_out_policy(self, params):
        """
        Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all
        environments specified in params.setEnvIds.

        Awful function that really needs to be simplified.
        A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome.
        The actual rollout is a very small part of the code.
        :param params: RollOutParams instance defining the parameters of the rollout
        :return: Aggregated dataset with images, states and oracle actions.
        If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples
        otherwise it will be a list (over environments) of lists (over segments) of samples
        """

        if params.isDebug():
            run_metadata.WRITE_DEBUG_DATA = True

        dataset = []
        try:
            # Load the neural network policy from file
            # We can't just pass a neural network into this function, because it can't be pickled
            params.loadPolicy()
            assert params.hasPolicy()

            self.env = PomdpInterface(instance_id=self.instance_id,
                                      is_real=params.real_drone)

            all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs(
            )
            env_ids = params.envs  # if params.envs is not None else all_env_ids
            seg_indices = params.seg_list
            custom_instructions = params.custom_instructions

            # Filter out the envs that are not in all_instructions (we don't have instructions available for them)
            valid_env_ids = [i for i in env_ids if i in all_instructions]

            count = 0

            # Loop through environments
            for i, env_id in enumerate(valid_env_ids):
                #print ("Rolling out on env: " + str(env_id))
                # Loop through all non-empty sets of instructions for each pomdp
                instruction_sets = [
                    s for s in all_instructions[env_id] if len(s) > 0
                ]

                if len(instruction_sets) == 0:
                    print("No instruction sets for env: " + str(env_id))

                for j, instructions_set in enumerate(instruction_sets):
                    count += 1
                    try:
                        seg_id = seg_indices[
                            i] if seg_indices is not None else None
                        custom_instr = custom_instructions[
                            i] if custom_instructions is not None else None
                        import rollout.run_metadata as md
                        md.CUSTOM_INSTR_NO = i
                        # TODO: Check if this works!
                        dataset.append(
                            self.roll_out_on_env(params, instructions_set, j,
                                                 seg_id, custom_instr))
                        #log("Path finished!")
                        DebugWriter().commit()

                        if params.isRealDrone():
                            break

                    except Exception as e:
                        import traceback
                        from utils.colors import print_error
                        print_error("Error encountered during policy rollout!")
                        print_error(e)
                        print_error(traceback.format_exc())
                        continue

        except Exception as e:
            import traceback
            from utils.colors import print_error
            print_error("Error encountered during policy rollout!")
            print_error(e)
            print_error(traceback.format_exc())

        self.env.land()

        return dataset
Пример #10
0
def automatic_demo():

    P.initialize_experiment()
    instruction_display = InstructionDisplay()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    # Loop over the select few examples
    while True:

        for instruction_sets in interact_instructions.values():
            for set_idx, instruction_set in enumerate(instruction_sets):
                env_id = instruction_set['env']
                found_example = None
                for example in examples:
                    if example[0] == env_id:
                        found_example = example
                if found_example is None:
                    continue
                env.set_environment(env_id, instruction_set["instructions"])

                presenter = Presenter()
                cumulative_reward = 0
                for seg_idx in range(len(instruction_set["instructions"])):
                    if seg_idx != found_example[2]:
                        continue

                    print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                    real_instruction_str = instruction_set["instructions"][
                        seg_idx]["instruction"]
                    instruction_display.show_instruction(real_instruction_str)
                    valid_segment = env.set_current_segment(seg_idx)
                    if not valid_segment:
                        continue
                    state = env.reset(seg_idx)

                    for i in range(START_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)

                        tok_instruction = tokenize_instruction(
                            real_instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        instruction_display.tick()
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)
                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        #presenter.show_sample(state, action, reward, cumulative_reward, real_instruction_str)
                        #show_depth(state.image)
                        if done:
                            break

                    for i in range(END_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)
                        print("Segment finished!")
                    instruction_display.show_instruction("...")

            print("Env finished!")
Пример #11
0
class SimplePolicyRoller:
    """
    Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp
    It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset
    """
    def __init__(self,
                 instance_id=0,
                 real_drone=False,
                 policy=None,
                 oracle=None,
                 no_reward=False):

        self.presenter = Presenter()
        self.instance_id = instance_id

        self.word2token = None
        self.all_instructions = None
        self.all_env_ids, self.all_instructions, self.corpus, self.token2term, self.word2token = self.load_all_envs(
        )

        self.env = PomdpInterface(instance_id=self.instance_id,
                                  is_real=real_drone)
        self.policy = policy
        self.oracle = oracle
        self.no_reward = no_reward

    def load_all_envs(self):
        train_i, dev_i, test_i, corpus = get_all_instructions()
        all_instructions = merge_instruction_sets(train_i, dev_i, test_i)
        token2term, word2token = get_word_to_token_map(corpus)
        env_ids = list(all_instructions.keys())
        return env_ids, all_instructions, corpus, token2term, word2token

    def tokenize_string(self, s):
        word_list = filter(None, s.split(" "))
        token_instruction = list(map(lambda w: self.word2token[w], word_list))
        return token_instruction

    def set_policy(self, policy):
        self.policy = policy

    def save_rollouts(self, rollouts, dataset_name):
        env_rollouts = {}
        for rollout in rollouts:
            env_id = rollout[0]["env_id"]
            if env_id not in env_rollouts:
                env_rollouts[env_id] = []
            env_rollouts[env_id] += rollout

        for env_id, rollouts in env_rollouts.items():
            # This saves just a single segment per environment, as opposed to all segments that the oracle saves. Problem?
            if len(rollouts) > 0:
                #pruned_rollouts = [prune_sample(s) for s in rollouts]
                save_dataset(dataset_name, rollouts, env_id=env_id, lock=True)
                #save_metadata(dataset_name, env_id, {"seg_ids": segments})

    def choose_action(self, pol_action, ref_action, dagger_beta):
        use_expert = random.uniform(0, 1) < dagger_beta
        if use_expert:
            return ref_action
        else:
            return pol_action

    def sample_to_cpu(self, sample):
        for k, v in sample.items():
            if hasattr(v, "to") and isinstance(v.to, types.MethodType):
                sample[k] = v.to("cpu")

    def single_segment_rollout(self,
                               env_id,
                               set_idx,
                               seg_idx,
                               do_sample,
                               dagger_beta=0,
                               rl_rollout=True):
        instruction_sets = self.all_instructions[env_id][set_idx][
            'instructions']
        for instruction_set in instruction_sets:
            if instruction_set["seg_idx"] == seg_idx:
                break

        # TODO: Get rid of this idiocy:
        md.IS_ROLLOUT = True

        instruction_set = get_instruction_segment(
            env_id, set_idx, seg_idx, all_instr=self.all_instructions)

        self.env.set_environment(env_id,
                                 instruction_set=instruction_sets,
                                 fast=True)
        self.env.set_current_segment(seg_idx)

        self.policy.start_sequence()
        if hasattr(self.policy, "start_segment_rollout"):
            self.policy.start_segment_rollout(env_id, set_idx, seg_idx)
        if self.oracle:
            self.oracle.start_segment_rollout(env_id, set_idx, seg_idx)

        string_instruction, end_idx, start_idx = instruction_set[
            "instruction"], instruction_set["end_idx"], instruction_set[
                "start_idx"]
        token_instruction = self.tokenize_string(string_instruction)

        # TODO: Support oracle (including setCurrentSegment, and setting the path)
        rollout_sample = []

        # Reset the drone to the segment starting position:
        state = self.env.reset(seg_idx)

        first = True
        while True:
            action, rl_stuff = self.policy.get_action(state,
                                                      token_instruction,
                                                      sample=do_sample,
                                                      rl_rollout=rl_rollout)

            if self.oracle:
                ref_action, _ = self.oracle.get_action(state,
                                                       token_instruction)
                exec_action = self.choose_action(action, ref_action,
                                                 dagger_beta)
            else:
                ref_action = action
                exec_action = action

            next_state, extrinsic_reward, done, expired, oob = self.env.step(
                exec_action)

            # Calculate intrinsic reward (I don't like that this delays the loop)
            if hasattr(self.policy,
                       "calc_intrinsic_rewards") and not self.no_reward:
                intrinsic_rewards = self.policy.calc_intrinsic_rewards(
                    next_state, action, done, first)
            else:
                intrinsic_rewards = {"x": 0}
            intrinsic_reward = sum(intrinsic_rewards.values())

            sample = {
                "instruction": string_instruction,
                "ref_action": ref_action,
                "pol_action": action,
                "action": exec_action,
                "state": state,
                "extrinsic_reward": extrinsic_reward,
                "intrinsic_reward": intrinsic_reward - (1.0 if oob else 0.0),
                "full_reward": extrinsic_reward + intrinsic_reward,
                "done": done,
                "expired": expired,
                "env_id": env_id,
                "set_idx": set_idx,
                "seg_idx": seg_idx,
            }
            sample = dict_merge(sample, rl_stuff)
            if not self.no_reward:
                sample = dict_merge(sample, intrinsic_rewards)
            rollout_sample.append(sample)

            # Multiprocessing has stopped playing nice with PyTorch cuda. Move sample to cpu first.
            if rl_rollout:
                self.sample_to_cpu(sample)

            state = next_state
            first = False
            if done:
                #print(f"Done! Last action: {exec_action}")
                break

        md.IS_ROLLOUT = False
        # Add discounted returns
        return rollout_sample

    def rollout_segments(self,
                         env_ids,
                         seg_ids,
                         policy_state,
                         sample,
                         dagger_beta=0,
                         save_dataset_name=None,
                         land_afterwards=False,
                         rl_rollout=True):
        if policy_state is not None:
            self.policy.set_policy_state(policy_state)

        data = []
        for env_id, seg_idx in zip(env_ids, seg_ids):
            done = False
            while not done:
                try:
                    seg_data = self.single_segment_rollout(
                        env_id, 0, seg_idx, sample, dagger_beta, rl_rollout)
                    done = True
                except PomdpInterface.EnvException as e:
                    continue
            data.append(seg_data)

        if save_dataset_name:
            self.save_rollouts(data, save_dataset_name)

        # Land the real drone if we have one.
        if land_afterwards:
            self.env.land()

        return data
Пример #12
0
        self.thread = threading.Thread(target=self.run, args=())
        self.thread.daemon = True
        self.thread.start()

    def run(self):
        self.mon.run()

    def get_command(self):
        return self.mon.current_vel

initialize_experiment("nl_datacollect_cage")

teleoper = KeyTeleop()
rate = Rate(0.1)

env = PomdpInterface()

env_ids = get_available_env_ids()

count = 0
stuck_count = 0


def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb, "depth_alignment", torch=False, waitkey=1, scale=4)