Пример #1
0
def GetOption(mdp,
              k=1,
              sample=False,
              matrix=None,
              intToS=None,
              method='eigen',
              option_type='subgoal'):
    if matrix is not None:
        A = matrix
    elif sample:
        A, intToS = GetIncidenceMatrix(mdp)
    else:
        A, intToS = GetAdjacencyMatrix(mdp)

    if method == 'eigen':
        B, options, vectors = Eigenoptions(A, k)
    elif method == 'fiedler':
        B, options, _, vectors = FiedlerOptions(A, k)
    elif method == 'bet':
        # TODO: B is empty.
        B, options, vectors = BetweennessOptions(A, k)

    if not option_type == 'subgoal':
        return B, options, intToS, vectors
    # #print('knwon region=', known_region)

    egoal_list = [[]] * (len(options) * 2)
    for i, o in enumerate(options):
        if type(o[0]) is list:
            for ss in o[0]:
                egoal_list[i * 2].append(intToS[ss])
            for ss in o[1]:
                egoal_list[i * 2 + 1].append(intToS[ss])
        else:
            egoal_list[i * 2] = [intToS[o[0]]]
            egoal_list[i * 2 + 1] = [intToS[o[1]]]

        # print('eogallist=', egoal_list[i*2])

    # for evec in evectors:
    #     print('evec=', evec)
    #     print('type(evec)=', type(evec))

    evector_list = [dict()] * (len(options) * 2)
    for i, o in enumerate(options):
        for j in intToS.keys():
            # print('hash(', j, ')=', hash(intToS[j]))
            # print('s[j]=', intToS[j])
            # for i in intToS[j].data.flatten():
            #     if i > 0:
            #         print(i)

            evector_list[i * 2][intToS[j]] = -vectors[i][j]
            evector_list[i * 2 + 1][intToS[j]] = vectors[i][j]

            # TODO: Why we were using hash here?
            # evector_list[i * 2][hash(intToS[j])] = -vectors[i][j]
            # evector_list[i * 2 + 1][hash(intToS[j])] = vectors[i][j]

    return B, egoal_list, intToS, evector_list
Пример #2
0
def test_utility(args, mdp):
    # The number of options to the performance
    # TODO: Compare the utility of point options vs. subgoal options?
    now_ts = str(datetime.now().timestamp())
    origMatrix, intToS = GetAdjacencyMatrix(mdp)
    known_region = list(intToS.values())  # Known region is a set of MDPStates.

    n_ops_list = [2, 4, 8, 16, 32]

    agents = []
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    agents.append(ql_agent)

    method = 'fiedler'

    for n_ops in n_ops_list:
        _, foptions, _, fvectors = GetOption(mdp,
                                             n_ops,
                                             matrix=origMatrix,
                                             intToS=intToS,
                                             option_type=args.optiontype,
                                             method=method)
        print('#options=', n_ops)
        print(foptions)

        if args.optiontype == 'subgoal':
            known_region = list(
                intToS.values())  # Known region is a set of MDPStates.
            eigenoption_agent = build_subgoal_option_agent(
                mdp,
                foptions,
                known_region,
                vectors=fvectors,
                name='-' + method + '-' + args.optiontype + '-' + str(n_ops))
        else:
            eigenoption_agent = build_point_option_agent(
                mdp,
                foptions,
                agent=QLearningAgent,
                policy='vi',
                name='-' + method + '-' + args.optiontype + '-' + str(n_ops))

        agents.append(eigenoption_agent)

    run_agents_on_mdp(agents,
                      mdp,
                      instances=args.ninstances,
                      episodes=args.nepisodes,
                      steps=args.nsteps,
                      open_plot=True,
                      track_disc_reward=True,
                      cumulative_plot=True,
                      dir_for_plot="results/")
Пример #3
0
def TestMatching():
    domain = '5x5grid'

    fname = '../tasks/' + domain + '.txt'
    mdp = make_grid_world_from_file(fname)

    G, intToS = GetAdjacencyMatrix(mdp)
    c = GetCost(G)

    matrix, F, LB = MinimumWeightMatching(G, c)

    print('F\'=', F)
    print('LB=', LB)

    Gnx = nx.from_edgelist(F)
    dic = dict()
    for i, s in enumerate(intToS):
        dic[i] = (s.x, s.y)

    nx.draw_networkx_nodes(Gnx, pos=dic, node_size=300, node_color='g')
    nx.draw_networkx_edges(Gnx, pos=dic)

    plt.savefig('Matching.pdf')
Пример #4
0
if __name__ == "__main__":
    TestMatching()
    exit(0)
    # domain = '5x5grid'
    # goals = [(1, 5), (1, 1), (5, 5), (3, 3), (5, 1)]

    domain = '9x9grid'
    goals = [(1, 1), (1, 9), (9, 1), (9, 9), (5, 5)]

    # domain = 'fourroom'
    # goals = [(1, 1), (1, 11), (11, 1), (11, 11), (5, 5), (8, 7), (5, 7)]

    fname = '../../tasks/' + domain + '.txt'
    mdp = make_grid_world_from_file(fname)

    G, intToS = GetAdjacencyMatrix(mdp)

    c = np.ones_like(G, dtype=int)
    d = GetCost(G)
    # print('d=', d)
    # TODO
    K = StatesToArray(intToS, goals)
    # K = np.random.binomial(n=1, p=0.2, size=G.shape[0]) # np.ones(G.shape[0], dtype=int)

    print('K=', K)
    D = 15

    tree, options = DiameterConstrainedSteinerTree(G, c, d, K, D, 0.1)

    print('tree', tree)
Пример #5
0
def test_offline_agent(args, mdp):
    '''
    '''
    #########################
    # Parameters for the Offline option generations
    # Incidence matrix sampling
    smp_n_traj = args.nsepisodes
    smp_steps = args.nssteps

    # Option policy learning
    op_n_episodes = args.noepisodes
    op_n_steps = args.nosteps

    # Final Evaluation step
    n_episodes = args.nepisodes
    n_steps = args.nsteps
    n_instances = args.ninstances

    n_options = args.noptions

    option_type = args.optiontype

    now = datetime.now()
    now_ts = str(now.timestamp())

    if args.incidence:
        origMatrix, intToS = GetIncidenceMatrix(mdp,
                                                n_traj=smp_n_traj,
                                                eps_len=smp_steps)
    else:
        origMatrix, intToS = GetAdjacencyMatrix(mdp)
    fiedlerMatrix, foptions, _, fvectors = GetOption(mdp,
                                                     n_options,
                                                     matrix=origMatrix,
                                                     intToS=intToS,
                                                     option_type=option_type,
                                                     method='fiedler')
    eigenMatrix, eoptions, _, evectors = GetOption(mdp,
                                                   n_options,
                                                   matrix=origMatrix,
                                                   intToS=intToS,
                                                   option_type=option_type,
                                                   method='eigen')
    _, boptions, _, bvectors = GetOption(mdp,
                                         n_options,
                                         matrix=origMatrix,
                                         intToS=intToS,
                                         option_type=option_type,
                                         method='bet')
    #        fiedlerMatrix, foptions, _, fvectors = GetOption(mdp, n_options, option_type=option_type, method='fiedler')
    #        eigenMatrix, eoptions, _, evectors = GetOption(mdp, n_options, option_type=option_type, method='eigen')

    #     if method == 'Fiedler':
    #         fiedlerMatrix, options, intToS = GetFiedlerOption(mdp, 32)
    #     elif method == 'Eigen':
    #         eigenMatrix, options, intToS = GetEigenoptions(mdp, 32)
    #     elif method == 'Drawing':
    #         drawingMatrix, options, intToS = GetGraphDrawingOptions(mdp, 2)
    #     else:
    #         print('No known method named', method)

    # print('options=', options)

    ######################################
    # Use the options for the learning

    ###
    # Make goal-based option agent.
    # TODO: Generate a set of MDPs with each goal set to the subgoal discovered by the algorithm

    # print('eigengoals=', eoptions)
    # print('fiedlergoals=', foptions)

    # print('fvector=', fvectors)
    vec = fvectors[0]

    # for key, items in vec.items():
    #     print('x,y=', key.x, '-', key.y)
    #     print('fval=', items)

    def ffunc(x, y):
        for key, item in vec.items():
            if key.x == x and key.y == y:
                return item
        return 0.0

    xr = mdp.width
    yr = mdp.height
    val = np.zeros((yr, xr))
    for x in range(xr):
        for y in range(yr):
            val[y][x] = ffunc(x + 1, y + 1)

    gpos = mdp.goal_locs[0]
    gval = val[gpos[1] - 1][gpos[0] - 1]
    for x in range(xr):
        for y in range(yr):
            val[y][x] = abs(gval - val[y][x])

    euclid = False
    if euclid:
        for x in range(xr):
            for y in range(yr):
                val[y][x] = ((x - gpos[0] + 1)**2 + (y - gpos[1] + 1)**2)**0.5

    print('val=', val)

    maxval = np.amax(val)
    minval = np.amin(val)

    cmap = matplotlib.cm.get_cmap('Blues')
    norm = matplotlib.colors.Normalize(vmin=minval, vmax=maxval)

    # rgba = cmap(norm(val))
    rgba_ = cmap(norm(val))

    rgba = np.ones_like(rgba_)

    for w in mdp.walls:
        rgba[w[1] - 1, w[0] - 1, :3] = 0, 0, 0

    rgba[gpos[1] - 1, gpos[0] - 1, :3] = 1, 1, 1

    # fig, ax = plt.subplots()

    # im = ax.imshow(norm(val), visible=False, cmap=cmap)
    # im = ax.imshow(norm(val), visible=False, cmap=cmap)
    # fig.colorbar(im)

    plt.imshow(rgba, interpolation='nearest')

    # X, Y = np.meshgrid(x, y)
    #
    # zs = []
    # for xv in x:
    #     for yv in y:
    #         zs.append(ffunc(xv, yv))
    # zss = np.asarray(zs)
    # Z = zss.reshape(X.shape)
    #
    # print('X=', X)
    # print('Y=', Y)
    # print('Z=', Z)
    # fig = plt.figure()
    # ax = fig.add_subplot(111, projection='3d')
    #
    # from matplotlib import cm
    # ax.plot_surface(X, Y, Z, cmap=cm.coolwarm)

    if euclid:
        plt.savefig('euclid.pdf', bbox_inches='tight', pad_inches=0)
    else:
        plt.savefig('eigenfunc.pdf', bbox_inches='tight', pad_inches=0)

    ###############################################################
    ###############################################################

    exit(0)

    #################################
    # Point options
    #################################
    # eigenoption_agent = build_point_option_agent(mdp, eoptions, name='-eigen-point')
    # fiedleroption_agent = build_point_option_agent(mdp, foptions, name='-fiedler-point')

    #################################
    # Subgoal options
    #################################
    if option_type == 'subgoal':
        known_region = list(
            intToS.values())  # Known region is a set of MDPStates.
        # TODO: how is the state represented here in intToS?
        eigenoption_agent = build_subgoal_option_agent(mdp,
                                                       eoptions,
                                                       known_region,
                                                       vectors=evectors,
                                                       name='-eigen',
                                                       n_trajs=op_n_episodes,
                                                       n_steps=op_n_steps)
        fiedleroption_agent = build_subgoal_option_agent(mdp,
                                                         foptions,
                                                         known_region,
                                                         vectors=fvectors,
                                                         name='-fiedler',
                                                         n_trajs=op_n_episodes,
                                                         n_steps=op_n_steps)
        betoption_agent = build_subgoal_option_agent(mdp,
                                                     boptions,
                                                     known_region,
                                                     vectors=fvectors,
                                                     name='-bet',
                                                     n_trajs=op_n_episodes,
                                                     n_steps=op_n_steps)
    else:
        eigenoption_agent = build_point_option_agent(mdp,
                                                     eoptions,
                                                     agent=QLearningAgent,
                                                     policy='vi',
                                                     name='-eigen')
        fiedleroption_agent = build_point_option_agent(mdp,
                                                       foptions,
                                                       agent=QLearningAgent,
                                                       policy='vi',
                                                       name='-fiedler')
        betoption_agent = build_point_option_agent(mdp,
                                                   boptions,
                                                   agent=QLearningAgent,
                                                   policy='vi',
                                                   name='-bet')

    ql_agent = QLearningAgent(actions=mdp.get_actions(), default_q=1.0)
    rand_agent = RandomAgent(mdp.get_actions())

    # run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/" + now_ts)
    run_agents_on_mdp([
        fiedleroption_agent, eigenoption_agent, betoption_agent, ql_agent,
        rand_agent
    ],
                      mdp,
                      instances=n_instances,
                      episodes=n_episodes,
                      steps=n_steps,
                      open_plot=True,
                      cumulative_plot=True,
                      track_disc_reward=True,
                      dir_for_plot="results/",
                      reset_at_terminal=False)
Пример #6
0
def GetGraphDrawingOptions(mdp, k=1):
    # print('GDO typemdp', type(mdp))
    A, intToS = GetAdjacencyMatrix(mdp)
    B, options = GraphDrawingOptions(A, k)
    return B, options, intToS
def test_offline_agent(args, mdp):
    '''
    '''
    #########################
    # Parameters for the Offline option generations
    # Incidence matrix sampling
    smp_n_traj = args.nsepisodes
    smp_steps = args.nssteps

    # Option policy learning
    op_n_episodes = args.noepisodes
    op_n_steps = args.nosteps

    # Final Evaluation step
    n_episodes = args.nepisodes
    n_steps = args.nsteps
    n_instances = args.ninstances

    n_options = args.noptions

    option_type = args.optiontype

    now = datetime.now()
    now_ts = str(now.timestamp())

    if args.incidence:
        origMatrix, intToS = GetIncidenceMatrix(mdp,
                                                n_traj=smp_n_traj,
                                                eps_len=smp_steps)
    else:
        origMatrix, intToS = GetAdjacencyMatrix(mdp)
    fiedlerMatrix, foptions, _, fvectors = GetOption(mdp,
                                                     n_options,
                                                     matrix=origMatrix,
                                                     intToS=intToS,
                                                     option_type=option_type,
                                                     method='fiedler')
    eigenMatrix, eoptions, _, evectors = GetOption(mdp,
                                                   n_options,
                                                   matrix=origMatrix,
                                                   intToS=intToS,
                                                   option_type=option_type,
                                                   method='eigen')
    _, boptions, _, bvectors = GetOption(mdp,
                                         n_options,
                                         matrix=origMatrix,
                                         intToS=intToS,
                                         option_type=option_type,
                                         method='bet')

    ######################################
    # Use the options for the learning

    vec = fvectors[0]

    # for key, items in vec.items():
    #     print('x,y=', key.x, '-', key.y)
    #     print('fval=', items)

    def ffunc(x, y):
        for key, item in vec.items():
            if key.x == x and key.y == y:
                return item
        return 0.0

    #################################
    # Point options
    #################################
    # eigenoption_agent = build_point_option_agent(mdp, eoptions, name='-eigen-point')
    # fiedleroption_agent = build_point_option_agent(mdp, foptions, name='-fiedler-point')

    #################################
    # Subgoal options
    #################################
    if option_type == 'subgoal':
        known_region = list(
            intToS.values())  # Known region is a set of MDPStates.
        # TODO: how is the state represented here in intToS?
        eigenoption_agent = build_subgoal_option_agent(mdp,
                                                       eoptions,
                                                       known_region,
                                                       vectors=evectors,
                                                       name='-eigen',
                                                       n_trajs=op_n_episodes,
                                                       n_steps=op_n_steps)
        fiedleroption_agent = build_subgoal_option_agent(mdp,
                                                         foptions,
                                                         known_region,
                                                         vectors=fvectors,
                                                         name='-fiedler',
                                                         n_trajs=op_n_episodes,
                                                         n_steps=op_n_steps)
        betoption_agent = build_subgoal_option_agent(mdp,
                                                     boptions,
                                                     known_region,
                                                     vectors=fvectors,
                                                     name='-bet',
                                                     n_trajs=op_n_episodes,
                                                     n_steps=op_n_steps)
    else:
        eigenoption_agent = build_point_option_agent(mdp,
                                                     eoptions,
                                                     agent=QLearningAgent,
                                                     policy='vi',
                                                     name='-eigen')
        fiedleroption_agent = build_point_option_agent(mdp,
                                                       foptions,
                                                       agent=QLearningAgent,
                                                       policy='vi',
                                                       name='-fiedler')
        betoption_agent = build_point_option_agent(mdp,
                                                   boptions,
                                                   agent=QLearningAgent,
                                                   policy='vi',
                                                   name='-bet')

    ql_agent = QLearningAgent(actions=mdp.get_actions(), default_q=1.0)
    rand_agent = RandomAgent(mdp.get_actions())

    # run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/" + now_ts)
    run_agents_on_mdp([
        fiedleroption_agent, eigenoption_agent, betoption_agent, ql_agent,
        rand_agent
    ],
                      mdp,
                      instances=n_instances,
                      episodes=n_episodes,
                      steps=n_steps,
                      open_plot=True,
                      cumulative_plot=True,
                      track_disc_reward=True,
                      dir_for_plot="results/",
                      reset_at_terminal=False)
def GetGraphDrawingOptions(mdp, k=1):
    A, intToS = GetAdjacencyMatrix(mdp)
    B, options = GraphDrawingOptions(A, k)
    return B, options, intToS