def find_failure_case(num_bases, num_blocks, max_levels):
    while True:
        # print("trying..")
        env = BlocksWorldEnv(show=False)
        thing_below, goal_thing_below = random_problem_instance(
            env, num_blocks, max_levels, num_bases)
        am = make_abstract_machine(env, num_bases, max_levels)
        am_results = run_machine(am, goal_thing_below, {"jnt": "rest"})
        env.close()
        ticks, running_time, sym_reward, spa_reward = am_results
        if sym_reward <= -2: break
        # print(sym_reward)
    return thing_below, goal_thing_below, sym_reward
示例#2
0
def generate_data(num_blocks, base_name):

    thing_below = random_thing_below(num_blocks, max_levels=3)
    goal_thing_below = random_thing_below(num_blocks, max_levels=3)

    dump = DataDump(goal_thing_below, hook_period=1)
    env = BlocksWorldEnv(pb.POSITION_CONTROL,
                         show=False,
                         control_period=12,
                         step_hook=dump.step_hook)
    env.load_blocks(thing_below)

    restacker = Restacker(env, goal_thing_below, dump)
    restacker.run()

    reward = compute_symbolic_reward(env, goal_thing_below)
    final_thing_below = env.thing_below
    commands = [frame["command"] for frame in dump.data]
    data_file = "%s/meta.pkl" % base_name
    data = (thing_below, goal_thing_below, final_thing_below, reward, commands)
    with open(data_file, "wb") as f:
        pk.dump(data, f)

    env.close()

    for d, frame in enumerate(dump.data):
        _, (thing, block) = frame["command"]
        position, action, rgba, coords_of, _ = zip(*frame["records"])

        position = tr.tensor(np.stack(position)).float()
        action = tr.tensor(np.stack(action)).float()
        rgba = tr.tensor(np.stack(rgba))
        block_coords = tr.tensor(np.stack([co[block]
                                           for co in coords_of])).float()
        thing_coords = tr.tensor(np.stack([co[thing]
                                           for co in coords_of])).float()

        # preprocessing
        rgb, block_coords, thing_coords = preprocess(rgba, block_coords,
                                                     thing_coords)

        data_file = "%s/%03d.pt" % (base_name, d)
        tr.save((position, action, rgb, block_coords, thing_coords), data_file)

    print(" success=%s (start, end, goal)" % (reward == 0))
    print("  ", thing_below)
    print("  ", env.thing_below)
    print("  ", goal_thing_below)
    return reward
def run_trial(num_bases, num_blocks, max_levels):

    env = BlocksWorldEnv(show=False)

    # rejection sample non-trivial instance
    thing_below, goal_thing_below = random_problem_instance(
        env, num_blocks, max_levels, num_bases)

    am = make_abstract_machine(env, num_bases, max_levels)
    nvm = virtualize(am)

    am_results = run_machine(am, goal_thing_below, {"jnt": "rest"})

    env.reset()
    env.load_blocks(thing_below, num_bases)

    nvm_results = run_machine(nvm, goal_thing_below,
                              {"jnt": tr.tensor(am.ik["rest"]).float()})

    env.close()

    return am_results, nvm_results, nvm.size(), thing_below, goal_thing_below
    num_dual_iters = 16
    primal_tol = 0.001
    dual_tol = 0.001

    max_levels = 3
    num_blocks = 5
    num_bases = 5
    
    # prob_freq = "batch"
    prob_freq = "once"
    
    if run_exp:

        domain = bp.BlockStackingDomain(num_blocks, num_bases, max_levels)
        mp_tracker = MovementPenaltyTracker(period=5)
        env = BlocksWorldEnv(show=False, step_hook=mp_tracker.step_hook)
    
        # set up rvm and virtualize
        rvm = make_abstract_machine(env, domain)
        rvm.reset({"jnt": "rest"})
        rvm.mount("main")
    
        nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=detach_gates)
        nvm.mount("main")
        W_init = {name: {0: nvm.net.batchify_weights(conn.W)} for name, conn in nvm.connections.items()}
        v_init = {name: {0: nvm.net.batchify_activities(reg.content)} for name, reg in nvm.registers.items()}
        v_init["jnt"][0] = nvm.net.batchify_activities(tr.tensor(rvm.ik["rest"]).float())
    
        # set up trainable connections
        inputable = ("obj","loc","goal")
        # trainable = ["ik", "to", "tc", "po", "pc", "right", "above", "base"]
示例#5
0
import pickle as pk
import numpy as np
import sys

sys.path.append('../../envs')

import pybullet as pb
from blocks_world import BlocksWorldEnv, random_thing_below

thing_below = random_thing_below(num_blocks=7, max_levels=3)
env = BlocksWorldEnv(pb.POSITION_CONTROL, show=False, control_period=12)
env.load_blocks(thing_below)
rgba, view, proj, coords_of = env.get_camera_image()
env.close()

np.save("tmp.npy", rgba)
rgba = np.load("tmp.npy")

import matplotlib.pyplot as pt

pt.imshow(rgba)
pt.show()
示例#6
0
        return tr.tanh(v) / σ1


    # def σ(v): return v

    if run_exp:

        lr_results = {lr: list() for lr in learning_rates}
        for rep in range(num_repetitions):
            for learning_rate in learning_rates:

                results = lr_results[learning_rate]
                start_rep = time.perf_counter()
                results.append([])

                env = BlocksWorldEnv(show=showenv,
                                     step_hook=penalty_tracker.step_hook)
                env.load_blocks({
                    "b%d" % n: "t%d" % n
                    for n in range(num_bases)
                })  # placeholder for rvm construction

                # set up rvm and virtualize
                rvm = make_abstract_machine(env,
                                            num_bases,
                                            max_levels,
                                            gen_regs=["r0", "r1"])
                rvm.reset({"jnt": "rest"})
                rvm.mount("main")

                nvm = virtualize(rvm, σ)
                init_regs, init_conns = nvm.get_state()
                                      goal_thing_above)

    penalty_tracker = PenaltyTracker(period=5)

    if run_exp:

        lr_results = {lr: list() for lr in learning_rates}
        for rep in range(num_repetitions):
            for learning_rate in learning_rates:
                print("Starting lr=%f" % learning_rate)

                results = lr_results[learning_rate]
                start_rep = time.perf_counter()
                results.append([])

                env = BlocksWorldEnv(show=False,
                                     step_hook=penalty_tracker.step_hook)
                env.load_blocks(thing_below)

                # set up rvm and virtualize
                rvm = make_abstract_machine(env, num_bases, max_levels)
                rvm.reset({"jnt": "rest"})
                rvm.mount("main")

                nvm = virtualize(rvm,
                                 σ=nv.default_activator,
                                 detach_gates=detach_gates)
                nvm.mount("main")
                W_init = {
                    name: {
                        0: nvm.net.batchify_weights(conn.W)
                    }
    # goal_thing_below = random_thing_below(num_blocks, max_levels, num_bases)

    # one failure case:
    max_levels = 3
    num_blocks = 5
    num_bases = 5
    thing_below = {'b0': 't1', 'b2': 'b0', 'b4': 'b2', 'b1': 't4', 'b3': 't2'}
    goal_thing_below = {
        'b1': 't1',
        'b2': 't3',
        'b3': 'b2',
        'b0': 't0',
        'b4': 'b0'
    }

    env = BlocksWorldEnv(show=True)
    env.load_blocks(thing_below, num_bases)
    am = make_abstract_machine(env, num_bases, max_levels)

    goal_thing_above = env.invert(goal_thing_below)
    for key, val in goal_thing_above.items():
        if val == "none": goal_thing_above[key] = "nil"
    memorize_env(am, goal_thing_above)

    # restack test
    am.reset({
        "jnt": "rest",
    })
    num_ticks = am.run(dbg=True)

    input('...')
示例#9
0
import sys
sys.path.append('../../envs')
import pybullet as pb
from blocks_world import BlocksWorldEnv, random_thing_below

env = BlocksWorldEnv()
thing_below = random_thing_below(num_blocks=4, max_levels=3)
env.load_blocks(thing_below)

input('.')

env.reset()

input('.')

thing_below = random_thing_below(num_blocks=4, max_levels=3)
env.load_blocks(thing_below)

input('.')
示例#10
0
import pybullet as pb
from blocks_world import BlocksWorldEnv

step_log = []


def step_hook(env, action):
    if action is None: return
    position = env.get_position()
    delta = action - position
    rgb, _, _, coords_of = env.get_camera_image()
    step_log.append((position, delta, rgb, coords_of))


env = BlocksWorldEnv(pb.POSITION_CONTROL, step_hook=step_hook)
env.load_blocks({"b0": "t0", "b1": "t1", "b2": "t2"})

action = [0.] * env.num_joints
env.goto_position([0.5] * env.num_joints, 20 / 240)
env.close()

position, delta, rgb, coords_of = zip(*step_log)
pt.ion()
for t in range(len(step_log)):
    print(t)
    print(position[t])
    print(delta[t])

    x, y = zip(*coords_of[t].values())
    pt.imshow(rgb[t])
示例#11
0
    end_reward = calc_reward(sym_reward, spa_reward)
    rewards[-1] += end_reward

    return end_reward, log_prob, rewards, log_probs


if __name__ == "__main__":

    max_levels = 3
    num_blocks = 5
    num_bases = 5

    for rep in range(10):

        penalty_tracker = PenaltyTracker()
        env = BlocksWorldEnv(show=False, step_hook=penalty_tracker.step_hook)

        thing_below, goal_thing_below = random_problem_instance(
            env, num_blocks, max_levels, num_bases)
        goal_thing_above = invert(goal_thing_below, num_blocks, num_bases)
        for key, val in goal_thing_above.items():
            if val == "none": goal_thing_above[key] = "nil"

        σ1 = tr.tensor(1.).tanh()

        def σ(v):
            return tr.tanh(v) / σ1


        # set up rvm and virtualize
        rvm = make_abstract_machine(env, num_bases, max_levels)
from blocks_world import BlocksWorldEnv, random_thing_below

if __name__ == "__main__":

    # thing_below = random_thing_below(num_blocks, max_levels=3)
    # goal_thing_below = random_thing_below(num_blocks, max_levels=3)

    # num_blocks = 7
    # thing_below = {("b%d" % n): ("t%d" % n) for n in range(num_blocks)}
    # block, thing = "b3", "b4"

    with open("episodes/000/meta.pkl", "rb") as f:
        thing_below, _, _, _, commands = pk.load(f)

    _, (block, thing) = commands[0]
    env = BlocksWorldEnv(pb.POSITION_CONTROL, show=True, control_period=12)
    env.load_blocks(thing_below)

    _, _, _, coords_of = env.get_camera_image()
    block_coords = tr.tensor(np.stack([coords_of[block]])).float()
    thing_coords = tr.tensor(np.stack([coords_of[thing]])).float()

    # move to
    net = VisuoMotorNetwork()
    # net.load_state_dict(tr.load("net.pt"))
    net.load_state_dict(tr.load("net500.pt"))

    force_coords = False

    for t in range(100):
        position = env.get_position()
示例#13
0
        am_results = run_machine(am, problem.goal_thing_below, {"jnt": "rest"})
        ticks, running_time, sym_reward, spa_reward = am_results
        if sym_reward <= sym_cutoff: break
        # print(sym_reward)
    env.reset()
    return problem, sym_reward


if __name__ == "__main__":

    num_bases, num_blocks, max_levels = 5, 5, 3
    domain = bp.BlockStackingDomain(num_bases, num_blocks, max_levels)

    find_new = True
    if find_new:
        env = BlocksWorldEnv(show=False)
        problem, _ = find_failure_case(env, domain)
        env.close()
        thing_below = problem.thing_below
        goal_thing_below = problem.goal_thing_below
        print(thing_below)
        print(goal_thing_below)
        # thing_below = {'b0': 't1', 'b1': 'b0', 'b2': 'b1', 'b3': 't2', 'b4': 'b3'}
        # goal_thing_below = {'b0': 't0', 'b1': 'b4', 'b2': 'b1', 'b3': 't1', 'b4': 't4'}

    else:
        # one failure case:
        thing_below = {
            'b0': 't1',
            'b2': 'b0',
            'b4': 'b2',
    penalty_tracker = MovementPenaltyTracker(period=tracker_period)

    if run_exp:

        lr_results = {lr: list() for lr in learning_rates}
        for rep in range(num_repetitions):
            for learning_rate in learning_rates:
                print("Starting lr=%f" % learning_rate)

                results = lr_results[learning_rate]
                start_rep = time.perf_counter()
                results.append([])

                if prob_freq != "once":
                    problem = domain.random_problem_instance()
                env = BlocksWorldEnv(show=False,
                                     step_hook=penalty_tracker.step_hook)
                env.load_blocks(problem.thing_below)

                # set up rvm and virtualize
                rvm = make_abstract_machine(env, domain)
                rvm.reset({"jnt": "rest"})
                rvm.mount("main")

                nvm = virtualize(rvm,
                                 σ=nv.default_activator,
                                 detach_gates=detach_gates)
                nvm.mount("main")
                W_init = {
                    name: {
                        0: nvm.net.batchify_weights(conn.W)
                    }
示例#15
0
    comp.ret_if_nil()
    comp.put("b0", "r0")
    comp.ret()


def main(comp):
    comp.call("proc")


if __name__ == "__main__":

    max_levels = 3
    num_blocks = 5
    num_bases = 5
    domain = bp.BlockStackingDomain(num_blocks, num_bases, max_levels)
    env = BlocksWorldEnv(show=False)

    # # small example
    # am, compiler = setup_abstract_machine(env, domain, gen_regs=["r0"])
    # compiler.flash(proc)
    # compiler.flash(main)

    # restacking code
    am = make_abstract_machine(env, domain)

    code = am.machine_code()
    ipt, asm, mach, store, recall = zip(*code)
    store = [", ".join(conn) for conn in store]
    recall = [", ".join(conn) for conn in recall]

    width = [
                self.mp = []
                self.sym = []
                self.goal_thing_below = goal_thing_below

            def reset(self):
                self.mp = []
                self.sym = []

            def step_hook(self, env, action):
                self.mp.append(env.movement_penalty())
                self.sym.append(
                    compute_symbolic_reward(env, self.goal_thing_below))

        # load
        tracker = Tracker(goal_thing_below)
        env = BlocksWorldEnv(show=False, step_hook=tracker.step_hook)
        env.load_blocks(thing_below)
        # run rvm
        rvm = make_abstract_machine(env,
                                    num_bases,
                                    max_levels,
                                    gen_regs=["r0", "r1"])
        nvm = virtualize(rvm, nv.default_activator)
        # run
        goal_thing_above = env.invert(goal_thing_below)
        for key, val in goal_thing_above.items():
            if val == "none": goal_thing_above[key] = "nil"
        memorize_env(rvm, goal_thing_above)
        rvm.reset({"jnt": "rest"})
        rvm.mount("main")
        while True:
示例#17
0
def run_trial(domain):

    env = BlocksWorldEnv(show=False)

    # rejection sample non-trivial instance
    problem = domain.random_problem_instance()
    env.reset()
    env.load_blocks(problem.thing_below, num_bases=domain.num_bases)

    # set up rvm and virtualize
    rvm = make_abstract_machine(env, domain)
    memorize_problem(rvm, problem)
    rvm.reset({"jnt": "rest"})
    rvm.mount("main")
    nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=True)
    nvm.mount("main")
    W_init = {
        name: {
            0: nvm.net.batchify_weights(conn.W)
        }
        for name, conn in nvm.connections.items()
    }
    v_init = {
        name: {
            0: nvm.net.batchify_activities(reg.content)
        }
        for name, reg in nvm.registers.items()
    }
    v_init["jnt"][0] = nvm.net.batchify_activities(
        tr.tensor(rvm.ik["rest"]).float())

    # rvm_results = run_machine(rvm, problem.goal_thing_below, {"jnt": "rest"})
    start = time.perf_counter()
    tar_changed = False
    while True:
        done = rvm.tick()
        if tar_changed:
            position = rvm.ik[rvm.registers["jnt"].content]
            env.goto_position(position, speed=1.5)
        if done: break
        tar_changed = (rvm.registers["tar"].content !=
                       rvm.registers["tar"].old_content)
    rvm_ticks = rvm.tick_counter
    rvm_runtime = time.perf_counter() - start
    rvm_sym = compute_symbolic_reward(env, problem.goal_thing_below)
    rvm_spa = compute_spatial_reward(env, problem.goal_thing_below)
    rvm_results = rvm_ticks, rvm_runtime, rvm_sym, rvm_spa

    # nvm_results = run_machine(nvm, problem.goal_thing_below, {"jnt": tr.tensor(rvm.ik["rest"]).float()})
    env.reset()
    env.load_blocks(problem.thing_below, num_bases=domain.num_bases)
    start = time.perf_counter()
    while True:
        t = nvm.net.tick_counter
        if t > 0 and nvm.decode("ipt", t, 0) == nvm.decode("ipt", t - 1, 0):
            break
        nvm.net.tick(W_init, v_init)
        nvm.pullback(t)
        if t > 1 and nvm.decode("tar", t - 2, 0) != nvm.decode(
                "tar", t - 1, 0):
            position = nvm.net.activities["jnt"][t][0, :, 0].detach().numpy()
            env.goto_position(position, speed=1.5)
    nvm_ticks = nvm.net.tick_counter
    nvm_runtime = time.perf_counter() - start
    nvm_sym = compute_symbolic_reward(env, problem.goal_thing_below)
    nvm_spa = compute_spatial_reward(env, problem.goal_thing_below)
    nvm_results = nvm_ticks, nvm_runtime, nvm_sym, nvm_spa

    env.close()
    return rvm_results, nvm_results, nvm.size(), problem
示例#18
0
    thing_below = {'b0': 't1', 'b2': 'b0', 'b4': 'b2', 'b1': 't4', 'b3': 't2'}
    goal_thing_below = {'b1': 't1', 'b2': 't3', 'b3': 'b2', 'b0': 't0', 'b4': 'b0'}

    # num_blocks = 4
    # thing_below = {"b%d"%b: "t%d"%b for b in range(num_blocks)}
    # thing_below.update({"b1": "b0", "b2": "b3"})

    # goal_thing_below = {"b%d"%b: "t%d"%b for b in range(num_blocks)}
    # goal_thing_below.update({"b1": "b2", "b2": "b0"})

    # # thing_below = random_thing_below(num_blocks, max_levels=3)
    # # goal_thing_below = random_thing_below(num_blocks, max_levels=3)

    dump = DataDump(goal_thing_below, hook_period=1)
    # env = BlocksWorldEnv(pb.POSITION_CONTROL, show=True, control_period=12, step_hook=dump.step_hook)
    env = BlocksWorldEnv()
    env.load_blocks(thing_below)

    # from check/camera.py
    pb.resetDebugVisualizerCamera(
        1.2000000476837158, 56.799964904785156, -22.20000648498535,
        (-0.6051651835441589, 0.26229506731033325, -0.24448847770690918))
    
    restacker = Restacker(env, goal_thing_below, dump)
    restacker.run()
    
    reward = compute_symbolic_reward(env, goal_thing_below)
    print("symbolic reward = %f" % reward)

    reward = compute_spatial_reward(env, goal_thing_below)
    print("spatial reward = %f" % reward)
示例#19
0
    num_repetitions = 1
    num_episodes = 2
    num_epochs = 3

    run_exp = False
    showresults = True
    # tr.autograd.set_detect_anomaly(True)

    if run_exp:

        results = []
        for rep in range(num_repetitions):
            start_rep = time.perf_counter()
            results.append([])

            env = BlocksWorldEnv(show=False)
            # placehold blocks for nvm init
            env.load_blocks({"b%d" % n: "t%d" % n for n in range(num_bases)})

            rvm = make_abstract_machine(env, num_bases, max_levels)
            nvm = virtualize(rvm)
            # print(nvm.size())
            # input('.')
            init_regs, init_conns = nvm.get_state()
            orig_ik_W = init_conns["ik"].clone()
            init_regs["jnt"] = tr.tensor(rvm.ik["rest"]).float()

            # set up trainable connections
            conn_params = {
                name: init_conns[name]
                # for name in ["ik", "to", "tc", "pc", "pc"]