Пример #1
0
def main():
    iters = [int(50e4), int(50e4), int(10e6), int(10e6)]
    numMod = [5, 5, 10, 10]
    global globalNum, env
    NUM_CORES = 8

    # iterate over 4 envs
    for num in range(1, 5):
        print(num)
        globalNum = num
        env = gridWorld1(size=num)

        global laplaceModel
        laplaceModel = Laplacian(env)

        laplaceModel.getLaplacian(iters=iters[num - 1])
        laplaceModel.saveEigenVec(
            "data/dat" + str(num) + "/eigenval" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl")

        # Load laplacian model
        laplaceModel.loadEigenVecs(
            "data/dat" + str(num) + "/eigenval" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl")

        #  Train policies for eigen-vectors (train in parallel)
        procPool = Pool(NUM_CORES)
        procPool.map(trainLaplace, range(numMod[num - 1]))
Пример #2
0
def main():

    for i in range(4):
        num = i + 1
        env = gridWorld1(size=num)

        laplaceModel = Laplacian(env)
        successorModel = Successor(env)

        print("Laplacian framework")
        laplaceModel.loadEigenVecs(
            "data/dat" + str(num) + "/eigenval" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv",
            "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl")

        for i in range(20):
            laplaceModel.plotSingleEigen(
                i, "images/eigenvec" + str(num) + "/eigen" + str(i) + ".png")

        print("Successor framework")
        successorModel.loadSuccessor(
            "data/dat" + str(num) + "/successor" + str(num) + ".csv")

        ln = len(successorModel.keys)
        for i in range(20):
            obj = 1
            while obj == 1:
                ind = np.random.randint(ln)
                pos = successorModel.revMap[ind]
                obj = env.room[pos[0]][pos[1]]

            successorModel.plotSuccessorState(
                ind,
                "images/successor" + str(num) + "/sr2d" + str(i) + ".png",
                "images/successor" + str(num) + "/sr3d" + str(i) + ".png")
Пример #3
0
def main():
    parser = argparse.ArgumentParser(description='Get SR policies')
    parser.add_argument('--cluster', type=int, default=2)
    parser.add_argument('--norm', type=int, default=0)
    args = parser.parse_args()

    #  iters = [int(50e4), int(50e4), int(10e6), int(10e6)]
    #  The iterations for training Q-learning
    numMod = [4, 5, 10, 10]
    for num in range(1, 5):
        print("Env : " + str(num))
        global srModel, env, globNum
        env = gridWorld1(size=num)
        globNum = num
        srModel = Successor(env)
        srModel.loadSuccessor("data/dat" + str(num) + "/successor" + str(num) +
                              ".csv")

        srModel.clusterRepresentation(numMod[num - 1], "images/tmp/sr.png",
                                      args.cluster, args.norm)
        srModel.saveLabels('data/dat' + str(num) + "/successorLabels" +
                           str(num) + ".pkl")

        procPool = Pool(12)
        procPool.map(buildSRPolicy, range(len(srModel.medoids)))
Пример #4
0
def main():

    parser = argparse.ArgumentParser(description='Get SR policies')
    parser.add_argument('--env', type=int, default=3)
    parser.add_argument('--ratio', type=float, default=50.0)
    parser.add_argument('--seed', type=int, default=1000)
    args = parser.parse_args()

    random.seed(args.seed)
    np.random.seed(args.seed)

    assert(args.env >= 1 and args.env <= 4)

    ITERS = [int(5e4), int(5e4), int(5e5), int(5e5)][args.env-1]
    OPTSIZE = [4, 5, 10, 10][args.env-1]
    EVALNUM = 100

    for i in range(200):
        env = gridWorld1(size=args.env)
        env.resetGoal()
        env.resetStart()
        env.reset()

        #  env.plotState(path=None)
        pth = "data/dat" + str(args.env) + "/policies/"

        print("Option0")
        qlearner1 = SmdpQlearner(env, optionPath=pth, optSize=0,
                                 gamma=0.99, plot=False)
        qlearner1.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp1")

        print("Option5-Eigen")
        qlearner2 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE,
                                 gamma=0.99, plot=False)
        qlearner2.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp2")

        print("Option5-SR")
        qlearner3 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE,
                                 gamma=0.99, plot=False, eigen=False)
        qlearner3.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3")

        print("Option5-Eigen-NU")
        qlearner4 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE,
                                 gamma=0.99, plot=False)
        qlearner4.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp2",
                        uniform=False, rat=args.ratio)

        print("Option5-SR-NU")
        qlearner5 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE,
                                 gamma=0.99, plot=False, eigen=False)
        qlearner5.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3",
                        uniform=False, rat=args.ratio)

        print("Option5-SR-AE")
        pklPath = "data/dat" + str(args.env) + \
            "/successorLabels" + str(args.env) + ".pkl"
        qlearner5 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE,
                                 gamma=0.99, plot=False, eigen=False)
        qlearner5.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3",
                        uniform=False, rat=args.ratio,
                        adaptive=True, adaPath=pklPath)
Пример #5
0
def main():
    titers = [int(10e5), int(5e3), int(5e3), int(5e3)]

    numMod = [10, 4, 7, 5]

    env_num = 4
    assert (env_num >= 1 and env_num <= 4)

    print("Env : " + str(env_num))
    env = gridWorld1(size=env_num, incremental=True)
    print("goal ", env.goal)
    srModel = IncSuccessor(env)
    render = False

    #  options are unavailable in first round
    optionsAvailable = False

    #  Each incremental iteration
    for itera in range(9):

        #  Build successor representation with current set of options
        srModel.getSuccessor(optionsAvailable=optionsAvailable, render=render)
        srModel.getValidSuccessor()
        srModel.getReachedSuccessor()
        srModel.plotSuccessorMagnitudes("images/magnitudes/" + str(itera) +
                                        ".png")

        randomState = []
        for i in range(20):
            while True:
                x = np.random.randint(env.height)
                y = np.random.randint(env.width)
                if env.room[x][y] == 0:
                    break

            randomState.append(srModel.validKeyInd[tuple([x, y])])

        # If final iteration, run full successor representatio
        if itera >= 8:
            srModel.getRareStateSuccessor(final=True)
        # Otherwise, cluster SR of states that are not used frequently
        else:
            srModel.getRareStateSuccessor()
        srModel.clusterRepresentation(
            numMod[env_num - 1], "images/inc-goals/sr" + str(itera) + ".png",
            2, 0)

        print("size", srModel.successor.shape)
        curMedoids = []

        #  Iterate over every cluster center and learn policy
        for j, md in enumerate(srModel.medoids):
            print(j)

            rew = srModel.validSuccessor[srModel.validKeyInd[
                srModel.reachedRevMap[md]]]

            Qlearner = SimpleQLearner(env, rew, srModel.keyInd)
            if itera >= 8:
                Qlearner.train(int(5e4))
            else:
                Qlearner.train(titers[env_num - 1])
            Qlearner.plotPolicy("images/policies" + str(env_num) +
                                "/explorepolicy" + str(j) + ".png")
            Qlearner.savePolicy("data/dat" + str(env_num) +
                                "/policies/explorepolicy" + str(j) + ".csv")

            ind = srModel.reachedRevMap[md]

            plot_ind = srModel.keyInd[ind]
            srModel.plotSuccessorState(
                plot_ind,
                "images/sr" + str(env_num) + "/sr2d" + str(j) + ".png",
                "images/sr" + str(env_num) + "/sr3d" + str(j) + ".png")

            curMedoids.append(md)

        optionsAvailable = True