Exemplo n.º 1
0
    def __init__(self):
        self.tasks = []
        self.index = util.Index()
        self.vocab = util.Index()
        self.ingredients = [self.index.index(k) for k in INGREDIENTS]
        self.recipes = {
            self.index.index(k): set(self.index.index(vv) for vv in v)
            for k, v in RECIPES.items()
        }
        #print self.recipes
        #self.hints = [
        #    (self.index.index(k), tuple(self.vocab.index(vv) for vv in v))
        #    for k, v in HINTS
        #]
        self.hints = []
        for k, v in HINTS:
            self.hints.append((self.index.index(k), v))
            for w in v:
                self.vocab.index(w)

        self.kind_to_obs = {}
        self.obs_to_kind = {}
        for k in self.ingredients:
            self.kind_to_obs[k] = len(self.kind_to_obs)
            self.obs_to_kind[self.kind_to_obs[k]] = k

        self.n_obs = (
            2 * WINDOW_SIZE * WINDOW_SIZE * len(self.kind_to_obs)
            + len(self.index)
            + 4)
        self.n_act = N_ACTIONS
        self.n_actions = self.n_act
        self.n_features = self.n_obs
        self.is_discrete = True

        self.max_hint_len = 3
        self.n_vocab = len(self.vocab)
        self.random = util.next_random()

        self.START = START
        self.STOP = STOP

        self.tasks = []
        for i, (goal, steps) in enumerate(self.hints):
            self.tasks.append(Minicraft2Task(i, goal, None))
        self.n_tasks = len(self.tasks)
        self.n_train = len(TRAIN_IDS)
        self.n_val = 0
        self.n_test = len(TEST_IDS)

        self.demos = {}
Exemplo n.º 2
0
    gflags.DEFINE_boolean("infer_hyp", False, "use hypotheses at test time")
    gflags.DEFINE_string("restore", None, "model to restore")
    gflags.DEFINE_float("concept_prior", None,
                        "place a normal prior on concept representations")
    gflags.DEFINE_integer(
        "adapt_reprs", 100,
        "number of representations to sample when doing adaptation")
    gflags.DEFINE_integer(
        "adapt_samples", 1000,
        "number of episodes to spend evaluating sampled representations")


N_EMBED = 64
N_HIDDEN = 64

random = util.next_random()


class Policy(object):
    def __init__(self, task):
        self.task = task

        self.t_state = tf.placeholder(tf.float32, (None, task.n_features))
        self.t_action = tf.placeholder(tf.int32, (None, ))
        self.t_reward = tf.placeholder(tf.float32, (None, ))
        self.t_hint = tf.placeholder(tf.int32, (None, None))
        self.t_hint_len = tf.placeholder(tf.int32, (None, ))
        self.t_task = tf.placeholder(tf.int32, (None, ))

        self.t_last_hyp = tf.placeholder(tf.int32, (None, ), "last_hyp")
        self.t_last_hyp_hidden = tf.placeholder(tf.float32,
Exemplo n.º 3
0
                else:
                    new_ingredients.append(ing)
            ingredients = new_ingredients
        specialized = []
        for ingredient in ingredients:
            if "?" in ingredient:
                specialized.append(ingredient.replace("?", variants.pop(0)))
            else:
                specialized.append(ingredient)
        #print goal, specialized
        HINTS.append((goal, [START] + specialized + [STOP]))

for ingredient in INGREDIENTS:
    HINTS.append((ingredient, [START, ingredient, STOP]))

util.next_random().shuffle(HINTS)

#for x in RECIPES.items():
#    print x
#
#print

#for x in HINTS:
#    print x

#for i, (k, v) in enumerate(HINTS):
#    print i, k, v

#HINTS = [
#    ("wood", ["wood"]),
#    ("ore", ["ore"]),