Пример #1
0
 def __init__(self, model):
     self.sampler = sampling.Sampler(model)
     self.queue = queue.Queue()
     self.thread = Thread(target=self.threadmain_h)
     self.thread.daemon = True
     self.stopped = True
     self.thread.start()
Пример #2
0
def run() -> typing.List:
    ### Creating sampler
    sampler = sampling.Sampler(n_query=2, dim_features=DOM.feature_size, update_func="approx",
                               beta_demo=BETA_DEMO, beta_pref=10)

    ### Collecting and loading demos
    demo_names = []
    for _ in range(N_DEMOS):
        inp = input("When you are ready, input 'y', and you can start providing your demonstration.")
        if inp == "y":
            demo_names.extend(DOM.collect_dems())
    demo_path = 'dempref_demonstrations/'
    demos = [pickle.load(open(f'{demo_path}{d}', 'rb'), encoding='latin1') for d in demo_names]
    demos = [DOM.fetch_to_mujoco(x) for x in demos]
    phi_demos = [DOM.np_features(x) for x in demos]
    sampler.load_demo(np.array(phi_demos))

    ### Computing BIRL estimate of w
    samples = sampler.sample(N=N_SAMPLES_SUMM)
    birl_w = np.mean(samples, axis=0)
    birl_w = birl_w / np.linalg.norm(birl_w)
    var_w = np.var(samples, axis=0)
    print("birl_w: ", birl_w)
    print("var_w: ", var_w)

    return [birl_w]
Пример #3
0
def generate_demos(dom: domain.Domain, weight: typing.List, name: str):
    t = dom.simulate(weight, query_length=QUERY_LENGTH, iter_count=10)

    sampler = sampling.Sampler(n_query=N_QUERY,
                               dim_features=dom.feature_size,
                               update_func=UPDATE_FUNC,
                               beta_demo=BETA_DEMO,
                               beta_pref=BETA_PREF)
    sampler.load_demo(dom.np_features(t))
    samples = sampler.sample(50000)
    mean_w = np.mean(samples, axis=0)
    mean_w = mean_w / np.linalg.norm(mean_w)
    print("w = " + str(mean_w))
    m = np.mean([
        np.dot(w, weight) / np.linalg.norm(w) / np.linalg.norm(weight)
        for w in samples
    ])
    print("m = " + str(m))

    with open(f"fetch_demos/{name}.pickle", 'wb') as f:
        pickle.dump(t, f)
Пример #4
0
def main(argv):
    if FLAGS.data_dir is None:
        print('No data_dir specified. See --help')
        return 0

    img_paths = [p for p in image_walk(FLAGS.data_dir)]
    random.shuffle(img_paths)

    samp = sampling.Sampler(img_paths,
                            im_dims=(720, 1280),
                            crop_size=FLAGS.crop_size,
                            num_crop=FLAGS.crops_per_img,
                            )

    loader = gui.AsyncImageLoader(samp)

    # img_list, label_img_list = sampling.sample_img(720, 1280)
    # print(len(img_list))

    # newboi = list(samp)
    # print(len(newboi))

    gui_handler = gui.GUI(loader)
Пример #5
0
def run() -> pd.DataFrame:
    ### Creating data frame to store data in
    # pref_iter correponds to the iteration of the preference loop in the particular run
    # run is the type of data being stored; options are "mean", "var", "m"
    # value is the actual value being stored
    df = pd.DataFrame(columns=["pref_iter", "type", "value"])

    ### Creating sampler
    sampler = sampling.Sampler(n_query=N_QUERY,
                               dim_features=DOM.feature_size,
                               update_func=UPDATE_FUNC,
                               beta_demo=BETA_DEMO,
                               beta_pref=BETA_PREF)

    ### Creating query generator
    qg = query_generation.ApproxQueryGenerator(
        dom=DOM,
        num_queries=N_QUERY,
        query_length=QUERY_LENGTH,
        num_expectation_samples=N_SAMPLES_EXP,
        include_previous_query=INC_PREV_QUERY,
        generate_scenario=GEN_SCENARIO,
        update_func=UPDATE_FUNC,
        beta_pref=BETA_PREF)

    ### Creating human object
    H = human.TerminalHuman(DOM, UPDATE_FUNC)
    ### Collecting and loading demonstrations
    demo_names = DOM.collect_dems()
    demo_path = 'dempref_demonstrations/'
    demos = [
        pickle.load(open(demo_path + f'{demo_name}', 'rb'), encoding='latin1')
        for demo_name in demo_names
    ]
    demos = [DOM.fetch_to_mujoco(x) for x in demos]
    if INC_PREV_QUERY:
        last_query_picked = demos[0]
    phi_demos = [DOM.np_features(x) for x in demos]
    sampler.load_demo(np.array(phi_demos))

    ### Computing initial estimates
    samples = sampler.sample(N=N_SAMPLES_SUMM)
    mean_w = np.mean(samples, axis=0)
    mean_w = mean_w / np.linalg.norm(mean_w)
    var_w = np.var(samples, axis=0)
    print('Mean: ' + str(mean_w))
    print('Var: ' + str(sum(var_w)))
    data = [[0, "mean", mean_w], [0, "var", var_w]]
    df = df.append(pd.DataFrame(data, columns=["pref_iter", "type", "value"]),
                   ignore_index=True)
    if sum(var_w) < VARIANCE_THRESH:
        print("Variance is now below threshold; EXITING.")
        return df

    ### Preferences loop
    for j in range(N_PREF_ITERS):
        print("\n\n*** Preferences # %d\n" % (j + 1))

        ## Generate queries
        if INC_PREV_QUERY:
            queries = qg.queries(samples, last_query_picked)
        else:
            queries = qg.queries(samples)
        mujoco_queries = [DOM.mujoco_to_fetch(x) for x in queries]

        ## Querying human
        print('\a')
        best = H.input(queries, on_real_robot=False)
        if INC_PREV_QUERY:
            last_query_picked = queries[best]

        ## Creating dictionary mapping rankings to features of queries and loading into sampler
        features = [DOM.np_features(x) for x in queries]
        phi = {k: features[k] for k in range(len(queries))}
        sampler.load_prefs(phi, best)

        ## Recording data from this run
        samples = sampler.sample(N=N_SAMPLES_SUMM)
        mean_w = np.mean(samples, axis=0)
        mean_w = mean_w / np.linalg.norm(mean_w)
        var_w = np.var(samples, axis=0)
        print('Mean: ' + str(mean_w))
        print('Var: ' + str(sum(var_w)))
        data = [[j + 1, "mean", mean_w], [j + 1, "var", var_w]]
        df = df.append(pd.DataFrame(data,
                                    columns=["pref_iter", "type", "value"]),
                       ignore_index=True)
        if sum(var_w) < VARIANCE_THRESH:
            print("Variance is now below threshold; EXITING.")
            return df

    return df
Пример #6
0
# UPDATE_FUNC = "rank"
# BETA_DEMO = 0.1
# BETA_PREF = 5

N_QUERY = 2
DIM_FEATURES = dom.feature_size

ms = {}

for i in range(25):
    weight = np.random.uniform(-1, 1, 4)
    t = dom.simulate(weight, query_length=QUERY_LENGTH, iter_count=10)

    sampler = sampling.Sampler(n_query=N_QUERY,
                               dim_features=DIM_FEATURES,
                               update_func=UPDATE_FUNC,
                               beta_demo=BETA_DEMO,
                               beta_pref=BETA_PREF)
    sampler.load_demo(dom.np_features(t))
    samples = sampler.sample(50000)
    m = np.mean([
        np.dot(w, true_weight) / np.linalg.norm(w) /
        np.linalg.norm(true_weight) for w in samples
    ])

    ms[m] = t

sorted = list(ms)
sorted.sort()

worst = sorted[0]
Пример #7
0
readline.parse_and_bind('tab: complete')
readline.parse_and_bind('set editing-mode vi')

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', default='models/test.json')
parser.add_argument('--maxlength', default=1024, type=int)
parser.add_argument('--temperature', default=1.0, type=float)
parser.add_argument('--savedir', default='')
args = parser.parse_args()

model = LanguageModel.LanguageModel()
model.load_json(args.checkpoint)
model.eval()

sampler = sampling.Sampler(model)

stor = M.DefaultStateStore(model, default_token=model.token_to_idx[b'\n'])
pc = sampling.default_put_chains(stor)
gc = sampling.default_get_chains(stor,
                                 endtoken=[model.token_to_idx[b'\n']],
                                 maxlength=args.maxlength,
                                 temperature=args.temperature)

badword_mod = M.BlockBadWords(model, [])

path_bw = args.savedir + '/badwords'
if args.savedir and os.path.exists(path_bw):
    badword_mod.badwords = pickle.load(open(path_bw, 'rb'))

gc.sample_post += [M.PrintSampledString(model), badword_mod]
Пример #8
0
    def run(self, n_iters: int = 1) -> Tuple[pd.DataFrame, List]:
        """
        Runs the algorithm n_iters times and returns a data frame with all the data from the experiment.

        :param n_iters: Number of times to run the algorithm.
        :param verbose: Prints status messages about the progress of the algorithm if true.
        :return: (self.config, df); config contains the parameters of the run and df is a data frame containing all the
            data from the run.
        """
        ### Creating data frame to store data in
        # run corresponds to the iteration of the whole experiment
        # pref_iter correponds to the iteration of the preference loop in the particular run
        # run is the type of data being stored; options are "mean", "var", "m"
        # value is the actual value being stored
        df = pd.DataFrame(columns=["run #", "pref_iter", "type", "value"])

        ### Creating query generator
        if isinstance(self.domain,
                      domain.Car):  # using exact QG when dynamics is available
            if self.update_func == "pick_best":
                obj_fn = query_generation.pick_best
            elif self.update_func == "approx":
                obj_fn = query_generation.approx
            elif self.update_func == "rank":
                obj_fn = query_generation.rank
            qg = query_generation.QueryGenerator(
                dom=self.domain,
                num_queries=self.n_query,
                query_length=self.query_length,
                num_expectation_samples=self.n_samples_exp,
                include_previous_query=self.inc_prev_query,
                generate_scenario=self.gen_scenario,
                objective_fn=obj_fn,
                beta_pref=self.beta_pref)
        else:  # using approx QG when dynamics is not available
            qg = query_generation.ApproxQueryGenerator(
                dom=self.domain,
                num_queries=self.n_query,
                query_length=self.query_length,
                num_expectation_samples=self.n_samples_exp,
                include_previous_query=self.inc_prev_query,
                generate_scenario=self.gen_scenario,
                update_func=self.update_func,
                beta_pref=self.beta_pref)

        ### Creating human
        humans = {
            "opt":
            human.OptimalHuman(self.domain, self.update_func,
                               self.true_weight),
            "btz":
            human.BoltzmannHuman(self.domain, self.update_func,
                                 self.true_weight, self.beta_human),
            "term":
            human.TerminalHuman(self.domain, self.update_func)
        }
        H = humans[self.human_type]

        ### Iterating to build confidence intervals
        for i in range(n_iters):
            ### Processing demonstrations
            sampler = sampling.Sampler(n_query=self.n_query,
                                       dim_features=self.domain.feature_size,
                                       update_func=self.update_func,
                                       beta_demo=self.beta_demo,
                                       beta_pref=self.beta_pref)
            if self.n_demos > 0:
                if self.gen_demos:
                    self.demos = [
                        self.domain.simulate(self.true_weight,
                                             iter_count=self.sim_iter_count)
                        for _ in range(self.n_demos)
                    ]
                phi_demos = [self.domain.np_features(x) for x in self.demos]
                sampler.load_demo(np.array(phi_demos))
                if self.inc_prev_query and isinstance(self.domain, domain.Car):
                    cleaned_demos = [
                        d.trim(self.query_length, self.trim_start)
                        for d in self.demos
                    ]
                else:
                    cleaned_demos = self.demos
                if self.inc_prev_query:
                    last_query_picked = [d for d in cleaned_demos]
            else:
                last_query_picked = [
                    traj.Trajectory(states=None, controls=None, null=True)
                ]

            ## Computing initial estimates
            samples = sampler.sample(N=self.n_samples_summ)
            mean_w = np.mean(samples, axis=0)
            mean_w = mean_w / np.linalg.norm(mean_w)
            var_w = np.var(samples, axis=0)
            data = [[i + 1, 0, "mean", mean_w], [i + 1, 0, "var", var_w]]
            print("Estimate of w: " +
                  str(mean_w))  # TODO: Add different levels of verbose mode
            print("Estimate of variance: " + str(sum(var_w)))
            # computing convergence measure if we are in simulation
            if self.human_type != "term":
                m = np.mean([
                    np.dot(w, self.true_weight) / np.linalg.norm(w) /
                    np.linalg.norm(self.true_weight) for w in samples
                ])
                data.append([i + 1, 0, "m", m])
                print("Estimate of m: " + str(m) + "\n\n")
            df = df.append(pd.DataFrame(
                data, columns=["run #", "pref_iter", "type", "value"]),
                           ignore_index=True)

            ### Preferences loop
            for j in range(self.n_pref_iters):
                print("\n\n*** Preferences Loop %d\n" % (j))

                ## Get last_query
                if self.inc_prev_query:
                    if len(self.demos) > 0:
                        random_scenario_index = np.random.randint(
                            len(self.demos))
                    else:
                        random_scenario_index = 0
                    last_query = last_query_picked[random_scenario_index]

                ## Generate queries while ensuring that features of queries are epsilon apart
                query_diff = 0
                print("Generating queries")
                while query_diff <= self.epsilon:
                    if self.inc_prev_query:
                        if last_query.null:
                            queries = qg.queries(samples, blank_traj=True)
                        else:
                            queries = qg.queries(samples, last_query)
                    else:
                        queries = qg.queries(samples)
                    query_diffs = []
                    for m in range(len(queries)):
                        for n in range(m):
                            query_diffs.append(
                                np.linalg.norm(
                                    self.domain.np_features(queries[m]) -
                                    self.domain.np_features(queries[n])))
                    query_diff = max(query_diffs)

                ## Querying human
                if self.human_type == "term":
                    print('\a')
                rank = H.input(queries)
                if self.update_func == "rank":
                    best = rank[0]
                else:
                    if rank == -1:
                        return df, self.config
                    best = rank

                if self.inc_prev_query:
                    last_query_picked[random_scenario_index] = queries[best]

                ## Creating dictionary mapping rankings to features of queries and loading into sampler
                features = [self.domain.np_features(x) for x in queries]
                phi = {k: features[k] for k in range(len(queries))}
                sampler.load_prefs(phi, rank)

                ## Recording data from this run
                samples = sampler.sample(N=self.n_samples_summ)
                mean_w = np.mean(samples, axis=0)
                mean_w = mean_w / np.linalg.norm(mean_w)
                var_w = np.var(samples, axis=0)
                data = [[i + 1, j + 1, "mean", mean_w],
                        [i + 1, j + 1, "var", var_w]]
                print("Estimate of w: " + str(mean_w))
                print("Estimate of variance: " + str(sum(var_w)))
                if self.human_type != "term":
                    m = np.mean([
                        np.dot(w, self.true_weight) / np.linalg.norm(w) /
                        np.linalg.norm(self.true_weight) for w in samples
                    ])
                    data.append([i + 1, j + 1, "m", m])
                    print("Estimate of m: " + str(m) + "\n\n")
                df = df.append(pd.DataFrame(
                    data, columns=["run #", "pref_iter", "type", "value"]),
                               ignore_index=True)
            ## Resetting for next run
            sampler.clear_pref()
            if self.inc_prev_query and self.n_demos > 0:
                last_query_picked = [d for d in cleaned_demos]

        return df, self.config