Exemplo n.º 1
0
def main(values_path=None):
    """Serve TTT with a learned policy."""

    global global_policy
    global global_feature_map
    global global_value_function

    state_path = "server_state.pickle.gz"

    if values_path is None:
        logger.info("loading precomputed server state")

        with specmine.openz(state_path) as values_file:
            state = pickle.load(values_file)

        logger.info("done loading server state")
    else:
        state = prepare_state(values_path)

        with specmine.openz(state_path, "wb") as state_file:
            pickle.dump(state, state_file)

    (global_policy, global_feature_map, global_value_function) = state

    server.run(host="0.0.0.0")
Exemplo n.º 2
0
def main(values_path = None):
    """Serve TTT with a learned policy."""

    global global_policy
    global global_feature_map
    global global_value_function

    state_path = "server_state.pickle.gz"

    if values_path is None:
        logger.info("loading precomputed server state")

        with specmine.openz(state_path) as values_file:
            state = pickle.load(values_file)

        logger.info("done loading server state")
    else:
        state = prepare_state(values_path)

        with specmine.openz(state_path, "wb") as state_file:
            pickle.dump(state, state_file)

    (global_policy, global_feature_map, global_value_function) = state

    server.run(host = "0.0.0.0")
Exemplo n.º 3
0
def get_template_map(m, n, B=numpy.inf, symmetric=True):
    '''loads template map from file if available or generates the feature map. 
    If B is given as inf, then max number of features are used.'''
    if symmetric:
        path = str.format('specmine/data/feature_maps/template_feature_map.{a}x{b}.symmetric.pickle.gz',a=m,b=n)
    else:
        path = str.format('specmine/data/feature_maps/template_feature_map.{a}x{b}.pickle.gz',a=m,b=n)

    if os.path.isfile(path):
        #if available, use precomputed feature map
        logger.info("using precomputed features at %s", path)
        with specmine.openz(path) as featuremap_file:
            full_feature_map = pickle.load(featuremap_file)
    else:
        # generate and save for next time
        logger.info("generating complete %i by %i feature map",m,n)
        
        full_feature_map = specmine.feature_maps.TemplateFeatureMap(m,n)
        
        logger.info('saving computed feature map: %s', path)
        with specmine.util.openz(path, "wb") as out_file:
            pickle.dump(full_feature_map, out_file)

    if B == numpy.inf:
        return full_feature_map
    else:
        return full_feature_map.gen_map(B)
Exemplo n.º 4
0
def prepare_state(values_path):
    B = 200

    with specmine.openz(values_path) as values_file:
        values = pickle.load(values_file)

    logger.info("converting states to their vector representation")

    states_adict = specmine.tictac.load_adjacency_dict()
    (gameplay_NN, gameplay_index) = specmine.discovery.adjacency_dict_to_matrix(states_adict)
    basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B)
    feature_map = specmine.discovery.TabularFeatureMap(basis_NB, gameplay_index)

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player = -1)
    opponent_policy = specmine.rl.RandomPolicy(opponent_domain)
    domain = specmine.rl.TicTacToeDomain(player = 1, opponent = opponent_policy)

    # prepare features and targets
    states = list(values)

    state_features = numpy.array([feature_map[s] for s in states])
    state_values = numpy.array([values[s] for s in states])

    # learn a value function
    logger.info("fitting value function predictor")

    ridge = sklearn.linear_model.Ridge(alpha = 1.0)

    ridge.fit(state_features, state_values)

    value_function = specmine.rl.LinearValueFunction(feature_map, ridge.coef_)
    policy = specmine.rl.StateValueFunctionPolicy(domain, value_function)

    return (policy, feature_map, value_function)
def main(out_path, values_path, neighbors = 8, workers = 0):
    """Run TTT state-clustering experiment(s)."""

    # load the value function
    with specmine.openz(values_path) as values_file:
        values = pickle.load(values_file)

    # convert states to their vector representations
    states_adict = specmine.tictac.load_adjacency_dict()
    states = list(states_adict)

    logger.info("converting states to their vector representation")

    affinity_index = dict(zip(states, xrange(len(states))))
    vectors_ND = numpy.array(map(raw_state_features, states))

    # build the affinity graph
    affinity_NN = specmine.discovery.affinity_graph(vectors_ND, neighbors)
    (gameplay_NN, gameplay_index) = specmine.discovery.adjacency_dict_to_matrix(states_adict)

    def yield_jobs():
        for B in numpy.r_[0:400:64j].astype(int):
            yield (run_random_features, [B, vectors_ND, affinity_index, values])
            yield (run_graph_features, ["gameplay", B, vectors_ND, gameplay_NN, gameplay_index, values])
            yield (run_graph_features, ["affinity", B, vectors_ND, affinity_NN, affinity_index, values])

    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["map_name", "features", "score_mean", "score_variance"])

        condor.do_or_distribute(yield_jobs(), workers, lambda _, r: writer.writerow(r))
def main(out_path, opponent_path=None):
    """Compute the TTT value function using value iteration."""

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player=-1)

    if opponent_path is None:
        opponent_policy = specmine.rl.RandomPolicy(opponent_domain)
    else:
        logger.info("loading opponent policy from %s", opponent_path)

        with specmine.openz(opponent_path) as opponent_file:
            opponent_policy = pickle.load(opponent_file)

    domain = specmine.rl.TicTacToeDomain(player=1, opponent=opponent_policy)

    # compute the value function
    #values = specmine.rl.compute_state_values_table(domain)
    values = specmine.rl.compute_state_values_table_nondet(domain)

    # and store it
    with specmine.openz(out_path, "wb") as out_file:
        pickle.dump(values, out_file)
def main(out_path, opponent_path = None):
    """Compute the TTT value function using value iteration."""

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player = -1)

    if opponent_path is None:
        opponent_policy = specmine.rl.RandomPolicy(opponent_domain)
    else:
        logger.info("loading opponent policy from %s", opponent_path)

        with specmine.openz(opponent_path) as opponent_file:
            opponent_policy = pickle.load(opponent_file)

    domain = specmine.rl.TicTacToeDomain(player = 1, opponent = opponent_policy)

    # compute the value function
    #values = specmine.rl.compute_state_values_table(domain)
    values = specmine.rl.compute_state_values_table_nondet(domain)

    # and store it
    with specmine.openz(out_path, "wb") as out_file:
        pickle.dump(values, out_file)
def main(out_path, states_path=None, render_with=None, coloring_path=None):
    """Visualize a state space graph."""

    if states_path is None:
        states = specmine.tictac.load_adjacency_dict()
    else:
        with specmine.openz(states_path) as states_file:
            states = pickle.load(states_file)

    logger.info("writing %i-vertex graph to %s", len(states), out_path)

    if coloring_path is None:
        coloring = None
    else:
        with specmine.util.openz(coloring_path) as pickle_file:
            coloring = pickle.load(pickle_file)

        assert len(coloring) == len(states)

    specmine.graphviz.visualize_graph(out_path, states, render_with, coloring)
Exemplo n.º 9
0
def main(out_path, states_path=None, render_with=None, coloring_path=None):
    """Visualize a state space graph."""

    if states_path is None:
        states = specmine.tictac.load_adjacency_dict()
    else:
        with specmine.openz(states_path) as states_file:
            states = pickle.load(states_file)

    logger.info("writing %i-vertex graph to %s", len(states), out_path)

    if coloring_path is None:
        coloring = None
    else:
        with specmine.util.openz(coloring_path) as pickle_file:
            coloring = pickle.load(pickle_file)

        assert len(coloring) == len(states)

    specmine.graphviz.visualize_graph(out_path, states, render_with, coloring)
def main(out_path, values_path, neighbors=8, workers=0):
    """Run TTT state-clustering experiment(s)."""

    # load the value function
    with specmine.openz(values_path) as values_file:
        values = pickle.load(values_file)

    # convert states to their vector representations
    states_adict = specmine.tictac.load_adjacency_dict()
    states = list(states_adict)

    logger.info("converting states to their vector representation")

    affinity_index = dict(zip(states, xrange(len(states))))
    vectors_ND = numpy.array(map(raw_state_features, states))

    # build the affinity graph
    affinity_NN = specmine.discovery.affinity_graph(vectors_ND, neighbors)
    (gameplay_NN, gameplay_index
     ) = specmine.discovery.adjacency_dict_to_matrix(states_adict)

    def yield_jobs():
        for B in numpy.r_[0:400:64j].astype(int):
            yield (run_random_features,
                   [B, vectors_ND, affinity_index, values])
            yield (run_graph_features, [
                "gameplay", B, vectors_ND, gameplay_NN, gameplay_index, values
            ])
            yield (run_graph_features, [
                "affinity", B, vectors_ND, affinity_NN, affinity_index, values
            ])

    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["map_name", "features", "score_mean", "score_variance"])

        condor.do_or_distribute(yield_jobs(), workers,
                                lambda _, r: writer.writerow(r))
Exemplo n.º 11
0
def main(out_path, player=-1):
    """Compute the optimal TTT policy for the specified player."""

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player=-1 * player)
    opponent_policy = specmine.rl.RandomPolicy(opponent_domain)

    domain = specmine.rl.TicTacToeDomain(player=player,
                                         opponent=opponent_policy)

    # compute the optimal policy
    policy = {}

    for state in domain.states:
        (board, player_to_move) = state

        if player == player_to_move:
            (move_i, move_j,
             _) = specmine.tictac.ab_optimal_move(board, player)

            move = (move_i, move_j)

            policy[state] = move
        else:
            actions = list(domain.actions_in(state))

            if actions:
                (move, ) = actions

                policy[state] = move
            else:
                move = "(Terminal)"

        logger.info("optimal move in %s: %s",
                    board._grid.astype(int).tolist(), move)

    # and store it
    with specmine.openz(out_path, "wb") as out_file:
        pickle.dump(policy, out_file)
Exemplo n.º 12
0
def main(out_path, number=9):
    """Analyze eigenvectors in the TTT domain."""

    B = number
    adict = specmine.tictac.load_adjacency_dict()
    (gameplay_NN, index) = specmine.discovery.adjacency_dict_to_matrix(adict)
    basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B)
    start = specmine.tictac.BoardState()
    rows = []

    for i in xrange(3):
        for j in xrange(3):
            board = start.make_move(1, i, j)
            n = index[(board, -1)]

            for b in xrange(B):
                rows.append([b, i, j, basis_NB[n, b]])

    with specmine.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["number", "i", "j", "value"])
        writer.writerows(rows)
Exemplo n.º 13
0
def main(out_path, number=9):
    """Analyze eigenvectors in the TTT domain."""

    B = number
    adict = specmine.tictac.load_adjacency_dict()
    (gameplay_NN, index) = specmine.discovery.adjacency_dict_to_matrix(adict)
    basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B)
    start = specmine.tictac.BoardState()
    rows = []

    for i in xrange(3):
        for j in xrange(3):
            board = start.make_move(1, i, j)
            n = index[(board, -1)]

            for b in xrange(B):
                rows.append([b, i, j, basis_NB[n, b]])

    with specmine.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["number", "i", "j", "value"])
        writer.writerows(rows)
Exemplo n.º 14
0
def prepare_state(values_path):
    B = 200

    with specmine.openz(values_path) as values_file:
        values = pickle.load(values_file)

    logger.info("converting states to their vector representation")

    states_adict = specmine.tictac.load_adjacency_dict()
    (gameplay_NN, gameplay_index
     ) = specmine.discovery.adjacency_dict_to_matrix(states_adict)
    basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B)
    feature_map = specmine.discovery.TabularFeatureMap(basis_NB,
                                                       gameplay_index)

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player=-1)
    opponent_policy = specmine.rl.RandomPolicy(opponent_domain)
    domain = specmine.rl.TicTacToeDomain(player=1, opponent=opponent_policy)

    # prepare features and targets
    states = list(values)

    state_features = numpy.array([feature_map[s] for s in states])
    state_values = numpy.array([values[s] for s in states])

    # learn a value function
    logger.info("fitting value function predictor")

    ridge = sklearn.linear_model.Ridge(alpha=1.0)

    ridge.fit(state_features, state_values)

    value_function = specmine.rl.LinearValueFunction(feature_map, ridge.coef_)
    policy = specmine.rl.StateValueFunctionPolicy(domain, value_function)

    return (policy, feature_map, value_function)
def main(out_path, player=-1):
    """Compute the optimal TTT policy for the specified player."""

    # construct domain
    opponent_domain = specmine.rl.TicTacToeDomain(player=-1 * player)
    opponent_policy = specmine.rl.RandomPolicy(opponent_domain)

    domain = specmine.rl.TicTacToeDomain(player=player, opponent=opponent_policy)

    # compute the optimal policy
    policy = {}

    for state in domain.states:
        (board, player_to_move) = state

        if player == player_to_move:
            (move_i, move_j, _) = specmine.tictac.ab_optimal_move(board, player)

            move = (move_i, move_j)

            policy[state] = move
        else:
            actions = list(domain.actions_in(state))

            if actions:
                (move,) = actions

                policy[state] = move
            else:
                move = "(Terminal)"

        logger.info("optimal move in %s: %s", board._grid.astype(int).tolist(), move)

    # and store it
    with specmine.openz(out_path, "wb") as out_file:
        pickle.dump(policy, out_file)
Exemplo n.º 16
0
def get_laplacian_map(boards=None, num_samples=10000, max_eigs=500, neighbors=8, \
                        affinity_map = specmine.feature_maps.flat_affinity_map, eig_solver="arpack"):

    root,dirs,files = os.walk('./specmine/data/feature_maps/').next()
    curr_dir = 'specmine/data/feature_maps/'

    if affinity_map == specmine.feature_maps.flat_affinity_map:
        aff = 'flat'
    elif type(affinity_map) == specmine.feature_maps.TemplateFeatureMap:
        logger.info('getting laplacian map using template features as affinity')
        aff = '2x2_sym_template'
        affinity_map = affinity_map.__getitem__
    else:
        aff= '?'

    path_front = str.format('laplacian.ngs={s}.nan={k}.aff={a}.{ei}',s=int(num_samples), k=neighbors, a=aff, ei = eig_solver)
    path_end = '.pickle.gz'
    
    precomp = False

    for f in files:

        match = re.search(path_front+'.nf=(\d+)'+path_end, f)
        
        if match is not None:
            
            path = curr_dir + match.group(0)
            num_feats = int(match.group(1))
            
            if num_feats >= max_eigs:
                
                logger.info("using precomputed features at %s", path)
                logger.info('num features used: %i , max available: %i',num_feats,max_eigs)
                try:
                    with specmine.openz(path) as featuremap_file:
                        full_feature_map = pickle.load(featuremap_file)
                    
                    precomp = True
                    break
                except:
                    print 'error loading feature map'
                    precomp = False


    if not precomp:

        path = curr_dir + path_front + str.format('.nf={n}', n= max_eigs) + path_end
        # generate and save for next time
        logger.info("generating laplacian eigenvector feature map with %i eigenvectors",max_eigs)
        
        avectors_ND = numpy.array(map(affinity_map, boards))
        affinity_NN, ball_tree = specmine.feature_maps.build_affinity_graph(avectors_ND, neighbors, get_tree=True)

        basis_NB = specmine.spectral.laplacian_basis(affinity_NN, max_eigs, method = eig_solver)

        full_feature_map = specmine.feature_maps.InterpolationFeatureMap(basis_NB, \
                                specmine.feature_maps.flat_affinity_map,
                                ball_tree)
        
        try:
            logger.info('saving computed laplacian feature map: %s', path)
            with specmine.util.openz(path, "wb") as out_file:
                pickle.dump(full_feature_map, out_file)
        except:
            print 'error trying to save laplacian map'
        

    return full_feature_map