def __init__(self, config): # load configs self.config = config self.cookbook = Cookbook(config.recipes) self.subtask_index = util.Index() self.task_index = util.Index() with open(config.trainer.hints) as hints_f: self.hints = yaml.load(hints_f) # initialize randomness self.random = np.random.RandomState(config.seed) # organize task and subtask indices self.tasks_by_subtask = defaultdict(list) self.tasks = [] for hint_key, hint in self.hints.items(): goal = util.parse_fexp(hint_key) goal = (self.subtask_index.index(goal[0]), self.cookbook.index[goal[1]]) if config.model.use_args: steps = [util.parse_fexp(s) for s in hint] steps = [(self.subtask_index.index(a), self.cookbook.index[b]) for a, b in steps] steps = tuple(steps) task = Task(goal, steps) for subtask, _ in steps: self.tasks_by_subtask[subtask].append(task) else: steps = [self.subtask_index.index(a) for a in hint] steps = tuple(steps) task = Task(goal, steps) for subtask in steps: self.tasks_by_subtask[subtask].append(task) self.tasks.append(task) self.task_index.index(task)
def __init__(self): self.world = CraftWorld() self.cookbook = Cookbook() self.subtask_index = util.Index() self.task_index = util.Index() dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, "hints.yaml")) as hints_f: self.hints = yaml.load(hints_f) # initialize randomness # self.random = np.random.RandomState(0) # Think about this # organize task and subtask indices self.tasks_by_subtask = defaultdict(list) self.tasks = [] for hint_key, hint in self.hints.items(): goal = util.parse_fexp(hint_key) goal = (self.subtask_index.index(goal[0]), self.cookbook.index[goal[1]]) steps = [self.subtask_index.index(a) for a in hint] steps = tuple(steps) task = Task(goal, steps) for subtask in steps: self.tasks_by_subtask[subtask].append(task) self.tasks.append(task) self.task_index.index(task)
def __init__(self, recipes_path, hints_path, max_steps=100, seed=0, visualise=False, reuse_environments=False): self.subtask_index = util.Index() self.task_index = util.Index() self._max_steps = max_steps self._visualise = visualise self._reuse_environments = reuse_environments # Per task, we reuse the same environment, with same layouts. # Should generates much easier tasks where agents can overfit. if self._reuse_environments: self._env_cache = {} # create World self.world = craft.CraftWorld(recipes_path, seed) # Load the tasks with sub-steps (== hints) with open(hints_path) as hints_f: self.hints = yaml.load(hints_f) # Setup all possible tasks self._init_tasks()
def __init__(self): self.tasks = [] self.index = util.Index() self.vocab = util.Index() self.ingredients = [self.index.index(k) for k in INGREDIENTS] self.recipes = { self.index.index(k): set(self.index.index(vv) for vv in v) for k, v in RECIPES.items() } #print self.recipes #self.hints = [ # (self.index.index(k), tuple(self.vocab.index(vv) for vv in v)) # for k, v in HINTS #] self.hints = [] for k, v in HINTS: self.hints.append((self.index.index(k), v)) for w in v: self.vocab.index(w) self.kind_to_obs = {} self.obs_to_kind = {} for k in self.ingredients: self.kind_to_obs[k] = len(self.kind_to_obs) self.obs_to_kind[self.kind_to_obs[k]] = k self.n_obs = ( 2 * WINDOW_SIZE * WINDOW_SIZE * len(self.kind_to_obs) + len(self.index) + 4) self.n_act = N_ACTIONS self.n_actions = self.n_act self.n_features = self.n_obs self.is_discrete = True self.max_hint_len = 3 self.n_vocab = len(self.vocab) self.random = util.next_random() self.START = START self.STOP = STOP self.tasks = [] for i, (goal, steps) in enumerate(self.hints): self.tasks.append(Minicraft2Task(i, goal, None)) self.n_tasks = len(self.tasks) self.n_train = len(TRAIN_IDS) self.n_val = 0 self.n_test = len(TEST_IDS) self.demos = {}
def __init__(self): assert FLAGS.hint_type in ("re", "nl", "none") with open(os.path.join(sys.path[0], "data/re2/corpus.json")) as corpus_f: corpus = json.load(corpus_f) self.hint_vocab = util.Index() self.str_vocab = util.Index() self.str_vocab.index(SEP) self.SEP = SEP self.START = START self.STOP = STOP data = {} for fold in ["train", "val", "test"]: data[fold] = [] for example in corpus[fold]: if FLAGS.hint_type == "re": hint = example["re"] hint = [self.hint_vocab.index(c) for c in hint] hints = [hint] elif FLAGS.hint_type == "nl": hints = [] for hint in example["hints_aug"]: hint = [self.hint_vocab.index(w) for w in hint] hints.append(hint) elif FLAGS.hint_type == "none": hints = [[]] pairs = [] for inp, out in example["examples"]: inp = [self.str_vocab.index(c) for c in inp] out = [self.str_vocab.index(c) for c in out] pairs.append((inp, out)) datum = FullDatum(hints, pairs) data[fold].append(datum) self.train_data = data["train"] self.val_data = data["val"] self.test_data = data["test"]
def __init__(self): self.hint_vocab = util.Index() self.START = START self.STOP = STOP with open(os.path.join(birds_path, "hendricks_data", "CUB_feature_dict.pkl")) as feat_f: self.features = pickle.load(feat_f) #file_to_full = {k.split("/")[1]: k for k in self.features} #self.captions = {} #for fname in os.listdir(os.path.join(birds_path, "captions")): # name = file_to_full[fname[:-4] + ".jpg"] # inst_capts = [] # with open(os.path.join(birds_path, "captions", fname)) as capt_f: # for line in capt_f: # line = line.strip().replace(".", " .").replace(",", " ,") # toks = [START] + line.split() + [STOP] # toks = [self.hint_vocab.index(w) for w in toks] # inst_capts.append(tuple(toks)) # self.captions[name] = tuple(inst_capts) self.captions = {} with open(os.path.join(birds_path, "hendricks_data", "captions.tsv")) as capt_f: reader = csv.DictReader(capt_f, delimiter="\t") for row in reader: caption = row["Description"].lower().replace(".", " .").replace(",", " ,") toks = [START] + caption.split() + [STOP] toks = [self.hint_vocab.index(w) for w in toks] url = row["Input.image_url"] inst = "/".join(url.split("/")[-2:]) if inst not in self.captions: self.captions[inst] = [] self.captions[inst].append(toks) classes = sorted(list(set(k.split("/")[0] for k in self.captions))) classes.remove("cub_missing") shuf_random = np.random.RandomState(999) shuf_random.shuffle(classes) assert len(classes) == 200 data_classes = { "train": classes[:100], "val": classes[100:110], "test": classes[100:200] } data_insts = {} for fold in ("train", "val", "test"): classes = data_classes[fold] data_classes[fold] = classes instances = {cls: [] for cls in classes} for key in self.features.keys(): cls, inst = key.split("/") if cls in instances: instances[cls].append(key) data_insts[fold] = instances # print fold # for cls in classes: # print cls, len(instances[cls]) # print #exit() self.train_classes = data_classes["train"] self.val_classes = data_classes["val"] self.test_classes = data_classes["test"] self.train_insts = data_insts["train"] self.val_insts = data_insts["val"] self.test_insts = data_insts["test"] self.n_features = self.features[self.features.keys()[0]].size
def __init__(self): train_local_raw, test_local_raw = loading.load("local", "human", 2000, 500) train_global_raw, test_global_raw = loading.load( "global", "human", 1500, 500) #train_local, test_local = loading.load("local", "human", 200, 200) #train_global, test_global = loading.load("global", "human", 200, 200) self.vocab = util.Index() self.vocab.index("UNK") self.START = START self.STOP = STOP raw_splits = { ("train", "local"): train_local_raw, ("train", "global"): train_global_raw, ("test", "local"): test_local_raw, ("test", "global"): test_global_raw } templates = defaultdict(list) splits = {} task_id = 0 for fold in ("train", "test"): for mode in ("local", ): data, task_id = self._format_data(fold, raw_splits[fold, mode], templates, task_id) splits[fold, mode] = data final_templates = defaultdict(list) for k, vv in templates.items(): counts = defaultdict(lambda: 0) for v in vv: counts[v] += 1 for v, c in counts.items(): if c > 1: final_templates[k].append(v) final_templates = dict(final_templates) train_local_fmt = splits["train", "local"] test_local_fmt = splits["test", "local"] task_counter = 0 self.train, n_train_tasks = self._build_examples(train_local_fmt, 5000, final_templates, task_counter, group_tasks=False) self.test, n_total_tasks = self._build_examples(test_local_fmt, 50, final_templates, n_train_tasks, group_tasks=True) self.test_ids = sorted(list(set(d.task_id for d in self.test))) print "[n_train]", len(self.train) print "[n_test]", len(self.test), "test" samp = self.sample_train() self.n_features = samp.features.size self.n_tasks = n_train_tasks self.n_actions = ACTS print "[task_ids]", n_train_tasks, "->", n_total_tasks print "[n_vocab]", len(self.vocab)
def __init__(self): self.hint_vocab = util.Index() self.feature_index = util.Index() self.START = START self.STOP = STOP #with open(os.path.join(sw_path, "train", "examples.struct.json")) as feature_f: # feature_data = json.load(feature_f) # for datum in feature_data: # for example in datum: # for feature in example: # self.feature_index.index(tuple(feature)) data = {} for fold in ("train", "val", "test", "val_same", "test_same"): examples = np.load(os.path.join(sw_path, fold, "examples.npy")) inputs = np.load(os.path.join(sw_path, fold, "inputs.npy")) labels = np.load(os.path.join(sw_path, fold, "labels.npy")) with open(os.path.join(sw_path, fold, "hints.json")) as hint_f: hints = json.load(hint_f) #new_hints = [] #for hint in hints: # hint = hint.split() # new_hint = [] # for i in range(len(hint) - 1): # new_hint.append(hint[i] + "/" + hint[i+1]) # new_hints.append(" ".join(new_hint)) #hints = new_hints indexed_hints = [] for hint in hints: hint = [START] + hint.split() + [STOP] indexed_hint = [self.hint_vocab.index(w) for w in hint] indexed_hints.append(indexed_hint) hints = indexed_hints #ex_features = np.zeros((examples.shape[0], examples.shape[1], len(self.feature_index))) #inp_features = np.zeros((examples.shape[0], len(self.feature_index))) #with open(os.path.join(sw_path, fold, "examples.struct.json")) as ex_struct_f: # examples_struct = json.load(ex_struct_f) # for i_datum, expls in enumerate(examples_struct): # for i_ex, example in enumerate(expls): # for feature in example: # i_feat = self.feature_index[tuple(feature)] # if i_feat: # ex_features[i_datum, i_ex, i_feat] = 1 #with open(os.path.join(sw_path, fold, "inputs.struct.json")) as in_struct_f: # inputs_struct = json.load(in_struct_f) # for i_datum, example in enumerate(inputs_struct): # for feature in example: # i_feat = self.feature_index[tuple(feature)] # if i_feat is not None: # inp_features[i_datum, i_feat] = 1 ex_features = np.load(os.path.join(sw_path, fold, "examples.feats.npy")) inp_features = np.load(os.path.join(sw_path, fold, "inputs.feats.npy")) fold_data = [] for i in range(len(hints)): if USE_IMAGES: fold_data.append(Datum( hints[i], examples[i, ...], inputs[i, ...], labels[i])) else: fold_data.append(Datum( hints[i], ex_features[i, ...], inp_features[i, ...], labels[i])) if FLAGS.vis: # TODO this is so dirty! datum = fold_data[-1] fold_data[-1] = VisDatum( datum.hint, datum.ex_inputs, datum.input, datum.label, examples[i, ...], inputs[i, ...]) data[fold] = fold_data self.train_data = data["train"] self.val_data = data["val"] self.test_data = data["test"] self.val_same_data = data["val_same"] self.test_same_data = data["test_same"] #self.train_data = data["train"][:8000] #self.val_data = data["train"][8000:8500] #self.test_data = data["train"][8500:9000] if USE_IMAGES: self.width, self.height, self.channels = self.train_data[0].input.shape else: #self.n_features = len(self.feature_index) self.n_features = inp_features.shape[1]
def __init__(self, config, world, model): # load configs self.config = config self.cookbook = Cookbook(config.recipes) self.subtask_index = util.Index() self.task_index = util.Index() with open(config.trainer.hints) as hints_f: self.hints = yaml.load(hints_f) # initialize randomness self.random = np.random.RandomState(0) # organize task and subtask indices self.train_tasks = [] self.test_tasks = [] if "train" in self.hints: train_hints = self.hints["train"] test_hints = self.hints["test"] else: train_hints = self.hints test_hints = {} all_hints = dict(train_hints) all_hints.update(test_hints) hint_keys = [ 'make[cloth]', 'make[bed]', 'make[axe]', 'make[stick]', 'make[bridge]', 'make[plank]', 'get[gold]', 'make[shears]', 'get[gem]', 'make[rope]' ] # hint_keys = ['make[cloth]', 'make[bed]', 'make[stick]', 'make[bridge]', 'make[plank]', 'get[gold]', 'make[shears]', 'get[gem]', 'make[rope]'] # for hint_key, hint in all_hints.items(): for hint_key in hint_keys: hint = all_hints[hint_key] goal = util.parse_fexp(hint_key) goal = (self.subtask_index.index(goal[0]), self.cookbook.index[goal[1]]) if config.model.use_args: steps = [util.parse_fexp(s) for s in hint] steps = [(self.subtask_index.index(a), self.cookbook.index[b]) for a, b in steps] steps = tuple(steps) task = Task(goal, steps) else: steps = [self.subtask_index.index(a) for a in hint] steps = tuple(steps) task = Task(goal, steps) if hint_key in train_hints: self.train_tasks.append(task) else: self.test_tasks.append(task) self.task_index.index(task) print(hint_key, hint, task) model.prepare(world, self) print("tasks: ", self.test_tasks) # self.ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[None, ob_space.shape[0]]) hid_size = 32 num_hid_layers = 2 self.ob = U.get_placeholder( name="ob", dtype=tf.float32, shape=[None, model.n_features + len(self.test_tasks)]) self.new_ob = U.get_placeholder( name="new_ob", dtype=tf.float32, shape=[None, model.n_features + len(self.test_tasks)]) self.acts = U.get_placeholder(name="acts", dtype=tf.int32, shape=(None, )) self.policy = Policy(name="policy", ob=self.ob, ac_space=world.n_actions, hid_size=hid_size, num_hid_layers=num_hid_layers, num_subpolicies=len(self.subtask_index)) self.old_policy = Policy(name="old_policy", ob=self.ob, ac_space=world.n_actions, hid_size=hid_size, num_hid_layers=num_hid_layers, num_subpolicies=len(self.subtask_index)) self.stochastic = True policy_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='policy') old_policy_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='old_policy') not_init_initializers = [ var.initializer for var in policy_vars + old_policy_vars ] model.session.run(not_init_initializers) self.policy.reset(model.session) self.old_policy.reset(model.session) hid_size = 1024 num_hid_layers = 2 self.env_model = EnvModel(name="env_model", ob=self.ob, acts=self.acts, hid_size=hid_size, num_hid_layers=num_hid_layers, num_subpolicies=len(self.subtask_index)) self.old_env_model = EnvModel(name="old_env_model", ob=self.ob, acts=self.acts, hid_size=hid_size, num_hid_layers=num_hid_layers, num_subpolicies=len(self.subtask_index)) with model.session.as_default() as sess: self.learner = Learner(self.policy, self.old_policy, self.env_model, self.old_env_model, len(self.subtask_index), None, clip_param=0.2, entcoeff=0, optim_epochs=10, optim_stepsize=3e-5, optim_batchsize=64) model.load() # self.learner.syncMasterPolicies() print("policy variables: ", self.policy.get_variables()) print("policy trainable variables: ", self.policy.get_trainable_variables())