Пример #1
0
def draft_b_lp(columns: int):
    # Create environment
    environment = DeepSeaTreasureRightDownStochastic(columns=columns)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.000001)

    # Create instance of AgentB
    agent = AgentB(environment=environment, limited_precision=True)

    agent.simulate()
Пример #2
0
def get_trained_agent() -> AgentBN:
    # Environment
    environment = ResourceGatheringEpisodic()

    # Agent
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    agent.train(graph_type=GraphType.SWEEP, limit=10)

    return agent
Пример #3
0
def main():
    # Get trained agent
    print('Training agent...')
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0), False)

    # Initial vectors
    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Show information
    print('Vectors obtained after m3_max algorithm: ')
    print(vectors, end='\n\n')

    # Define a tolerance
    decimal_precision = 0.0000001

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=decimal_precision)

    print('Evaluating policies gotten...')

    # For each vector
    for vector in vectors:
        # Specify objective vector
        objective_vector = vector.copy()

        print('Recovering policy for objective vector: {}...'.format(
            objective_vector))

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        print('Evaluating policy obtaining...', end='\n\n')

        # Train until converge with `decimal_precision` tolerance.
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=decimal_precision)

        # Save policy and it evaluation.
        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Пример #4
0
def train_from_zero():
    # Define variables
    limit = int(3e6)
    epsilon = 0.4
    max_steps = 1000
    alpha = 0.1
    gamma = 1
    graph_type = GraphType.EPISODES
    columns_list = range(1, 6)
    decimals = [0.01, 0.05]

    for decimal_precision in decimals:

        # Set vector decimal precision
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        for columns in columns_list:
            # Environment
            environment = DeepSeaTreasureRightDownStochastic(columns=columns)

            # Create agent
            agent = AgentMPQ(environment=environment,
                             hv_reference=environment.hv_reference,
                             epsilon=epsilon,
                             alpha=alpha,
                             gamma=gamma,
                             max_steps=max_steps)

            # Time train
            t0 = time.time()

            # Show numbers of columns
            print('# of columns: {}'.format(columns))

            # Agent training
            agent.train(graph_type=graph_type, limit=limit)

            # Calc total time
            total_time = time.time() - t0

            prepare_for_dumps(agent, columns, decimal_precision, graph_type,
                              limit, total_time)
Пример #5
0
def main():
    # Get trained agent
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0))

    # agent: AgentBN = AgentBN.load(
    #     filename='bn/models/rg_1584437328_0.005.bin'
    # )

    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=0.0000001)

    for vector in vectors:
        # Recreate the index objective vector.
        # objective_vector = IndexVector(
        #     index=vector, vector=trained_agent.v[initial_state][vector]
        # )

        objective_vector = vector.copy()

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.0000001)

        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Пример #6
0
def train_agent() -> Agent:
    # Environment
    # environment = DeepSeaTreasureRightDownStochastic(columns=3)
    # environment = DeepSeaTreasureRightDown(columns=3)
    # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95)
    # environment = DeepSeaTreasure()
    environment = ResourceGathering()

    # Agent
    # agent = AgentMPQ(
    #     environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000
    # )
    # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01)
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001)
    agent.train(graph_type=GraphType.SWEEP, limit=13)

    return agent
Пример #7
0
def train_from_file():
    # Models Path
    models_path = 'mpq/models/dstrds_1579869395_1.0_4.bin'

    agent: AgentMPQ = u_models.binary_load(
        path=dumps_path.joinpath(models_path))

    # Data Path
    data_path = dumps_path.joinpath(
        'mpq/train_data/dstrds_1579869395_1.0_4.yml')
    data_file = data_path.open(mode='r', encoding='UTF-8')

    # Load yaml from file
    data = yaml.load(data_file, Loader=yaml.FullLoader)

    # Extract relevant data for training
    before_training_execution = float(data['time'])
    decimal_precision = float(data['agent']['decimal_precision'])
    graph_type = GraphType.from_string(data['training']['graph_type'])
    limit = int(data['training']['limit'])
    columns = int(data['environment']['columns'])

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=decimal_precision)

    # Time train
    t0 = time.time()

    # Agent training
    agent.train(graph_type=graph_type, limit=limit)

    # Calc total time
    total_time = (time.time() - t0) + before_training_execution

    prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit,
                      total_time)
Пример #8
0
def draft_w():
    tolerance = 0.00001

    for decimal_precision in [0.05, 0.005, 0.001]:
        # Create environment
        # environment = ResourceGatheringEpisodicSimplified()
        # environment = ResourceGatheringSimplified()
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_w = AgentW(environment=environment,
                         convergence_graph=True,
                         gamma=.9)

        # Time train
        t0 = time.time()

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        agent_w.train(graph_type=GraphType.SWEEP, limit=1)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {
            key: [vector.tolist() for vector in vectors]
            for key, vectors in agent_w.v.items()
        }

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_w.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_w.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_w.gamma,
            'initial_q_value': agent_w.initial_q_value,
            'initial_seed': agent_w.initial_seed,
            'interval_to_get_data': agent_w.interval_to_get_data,
            'max_steps': agent_w.max_iterations,
            'total_sweeps': agent_w.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)
Пример #9
0
def draft_w():
    tolerance = 0.00001
    gamma = 0.95

    # for decimal_precision in [1, 0.5, 0.1, 0.05, 0.005]:
    for decimal_precision in [0.1, 0.05, 0.005]:

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        # for i in range(1, 5):
        for i in [10]:
            # Create environment
            # environment = PyramidMDPNoBounces(diagonals=i, n_transition=0.95)
            # environment = DeepSeaTreasure(columns=i)
            # environment = ResourceGatheringEpisodic()
            environment = ResourceGathering()

            # Create agent
            agent_w = AgentW(environment=environment, convergence_graph=False, gamma=gamma)

            # Time train
            t0 = time.time()

            # # Calc number of sweeps limit
            # limit = (i + 1) * 3

            print('{}-diagonals'.format(i))
            # print('{}-sweeps'.format(limit))

            agent_w.train(graph_type=GraphType.SWEEP, limit=40)
            # agent_w.train(graph_type=GraphType.SWEEP, tolerance=tolerance)

            # x = list(range(1, agent_w.total_sweeps))
            # y = agent_w.convergence_graph_data.copy()
            #
            # plt.title('{} Diagonals'.format(i))
            # plt.y_label('Hypervolume max difference')
            # plt.x_label('Sweeps')
            # plt.plot(x, y)
            # plt.show()

            # Calc total time
            total_time = time.time() - t0

            # Convert to vectors
            vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_w.v.items()}

            # Prepare full_data to dumps
            data = {
                'time': '{}s'.format(total_time),
                'memory': {
                    'v_s_0': len(agent_w.v[environment.initial_state]),
                    'full': sum(len(vectors) for vectors in agent_w.v.values())
                },
                'vectors': vectors
            }

            # Configuration of environment
            environment_info = vars(environment).copy()
            environment_info.pop('_action_space', None)
            environment_info.pop('np_random', None)

            # Configuration of agent
            agent_info = {
                'gamma': agent_w.gamma,
                'initial_q_value': agent_w.initial_q_value,
                'initial_seed': agent_w.initial_seed,
                'interval_to_get_data': agent_w.interval_to_get_data,
                'max_steps': agent_w.max_iterations,
                'total_sweeps': agent_w.total_sweeps,
                'tolerance': tolerance
            }

            # Extra data
            data.update({'environment': environment_info})
            data.update({'agent': agent_info})

            # Dumps partial execution
            dumps(data=data, columns=i, environment=environment)
Пример #10
0
def draft_w():
    gamma = .9

    # for decimal_precision in [0.01, 0.005, 1, 0.5, 0.05, 0.1]:
    for decimal_precision in [0.005]:

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)
        tolerance = decimal_precision

        for i in ['full']:

            # Create environment
            for environment in [ResourceGathering()]:

                # Create agent
                agent_bn = AgentBN(environment=environment,
                                   convergence_graph=False,
                                   gamma=gamma)

                # Time train
                t0 = time.time()

                # # Calc number of sweeps limit
                print('{} cols \ntolerance: {}'.format(i, tolerance))

                agent_bn.train(graph_type=GraphType.SWEEP,
                               tolerance=tolerance,
                               sweeps_dump=30)

                # Calc total time
                total_time = time.time() - t0

                # Convert to vectors
                vectors = {
                    key: [vector.tolist() for vector in vectors]
                    for key, vectors in agent_bn.v.items()
                }

                # Prepare full_data to dumps
                data = {
                    'time': '{}s'.format(total_time),
                    'memory': {
                        'v_s_0':
                        len(agent_bn.v[environment.initial_state]),
                        'full':
                        sum(len(vectors) for vectors in agent_bn.v.values())
                    },
                    'vectors': vectors
                }

                # Configuration of environment
                environment_info = vars(environment).copy()
                environment_info.pop('_action_space', None)
                environment_info.pop('np_random', None)

                # Configuration of agent
                agent_info = {
                    'gamma': agent_bn.gamma,
                    'initial_q_value': agent_bn.initial_q_value,
                    'initial_seed': agent_bn.initial_seed,
                    'interval_to_get_data': agent_bn.interval_to_get_data,
                    'total_sweeps': agent_bn.total_sweeps,
                    'tolerance': tolerance
                }

                # Extra data
                data.update({'environment': environment_info})
                data.update({'agent': agent_info})

                # Dumps partial execution
                # dumps(data=data, columns=i, environment=environment)
                dumps(data=data, environment=environment)

                # Dump agent
                agent_bn.save()
Пример #11
0
def test_agents(environment: Environment,
                hv_reference: Vector,
                variable: str,
                agents_configuration: dict,
                graph_configuration: dict,
                epsilon: float = None,
                alpha: float = None,
                max_steps: int = None,
                states_to_observe: list = None,
                number_of_agents: int = 30,
                gamma: float = 1.,
                solution: list = None,
                initial_q_value: Vector = None,
                evaluation_mechanism: EvaluationMechanism = None):
    """
    If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data
    in the last steps (ignore `interval`).

    If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every
    `interval` steps.

    :param initial_q_value:
    :param graph_configuration:
    :param solution:
    :param environment:
    :param hv_reference:
    :param variable:
    :param agents_configuration:
    :param epsilon:
    :param alpha:
    :param max_steps:
    :param states_to_observe:
    :param number_of_agents:
    :param gamma:
    :param evaluation_mechanism:
    :return:
    """

    # Extract graph_types
    graph_types = set(graph_configuration.keys())

    if len(graph_types) > 2:
        print("Isn't recommended more than 2 graphs")

    # Parameters
    if states_to_observe is None:
        states_to_observe = {environment.initial_state}

    complex_states = isinstance(environment.observation_space[0],
                                gym.spaces.Tuple)

    if not complex_states and GraphType.DATA_PER_STATE in graph_types:
        print(
            "This environment has complex states, so DATA_PER_STATE graph is disabled."
        )
        graph_configuration.pop(GraphType.DATA_PER_STATE)

    # Build environment
    env_name = environment.__class__.__name__
    env_name_snake = str_to_snake_case(env_name)

    # File timestamp
    timestamp = int(time.time())

    # Write all information in configuration path
    write_config_file(timestamp=timestamp,
                      number_of_agents=number_of_agents,
                      env_name_snake=env_name_snake,
                      seed=','.join(map(str, range(number_of_agents))),
                      epsilon=epsilon,
                      alpha=alpha,
                      gamma=gamma,
                      max_steps=max_steps,
                      variable=variable,
                      agents_configuration=agents_configuration,
                      graph_configuration=graph_configuration,
                      evaluation_mechanism=evaluation_mechanism)

    # Create graphs structure
    graphs, graphs_info = initialize_graph_data(
        graph_types=graph_types, agents_configuration=agents_configuration)

    # Show information
    print('Environment: {}'.format(env_name))

    for graph_type in graph_types:

        # Extract interval and limit
        interval = graph_configuration[graph_type].get('interval', 1)
        limit = graph_configuration[graph_type]['limit']

        # Show information
        print(('\t' * 1) +
              "Graph type: {} - [{}/{}]".format(graph_type, limit, interval))

        # Set interval to get train_data
        Agent.interval_to_get_data = interval

        # Execute a iteration with different initial_seed for each agent indicate
        for seed in range(number_of_agents):

            # Show information
            print(('\t' * 2) + "Execution: {}".format(seed + 1))

            # For each configuration
            for agent_type in agents_configuration:

                # Show information
                print(('\t' * 3) + 'Agent: {}'.format(agent_type.value))

                # Extract configuration for that agent
                for configuration in agents_configuration[agent_type].keys():

                    # Show information
                    print(
                        ('\t' * 4) + '{}: {}'.format(variable, configuration),
                        end=' ')

                    # Mark of time
                    t0 = time.time()

                    # Reset environment
                    environment.reset()
                    environment.seed(seed=seed)

                    # Variable parameters
                    parameters = {
                        'epsilon': epsilon,
                        'alpha': alpha,
                        'gamma': gamma,
                        'max_steps': max_steps,
                        'evaluation_mechanism': evaluation_mechanism,
                        'initial_value': initial_q_value
                    }

                    if variable == 'decimal_precision':
                        Vector.set_decimal_precision(
                            decimal_precision=configuration)
                    else:
                        # Modify current configuration
                        parameters.update({variable: configuration})

                    agent, v_s_0 = train_agent_and_get_v_s_0(
                        agent_type=agent_type,
                        environment=environment,
                        graph_type=graph_type,
                        graph_types=graph_types,
                        hv_reference=hv_reference,
                        limit=limit,
                        seed=seed,
                        parameters=parameters,
                        states_to_observe=states_to_observe)

                    print('-> {:.2f}s'.format(time.time() - t0))

                    train_data = dict()

                    if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE:
                        train_data.update({
                            'vectors': {
                                state: {
                                    action: agent.q_set(state=state,
                                                        action=action)
                                    for action in agent.nd[state].keys()
                                }
                                for state in agent.nd.keys()
                            }
                        })

                    # Order vectors by origin Vec(0) nearest
                    train_data.update({
                        'v_s_0':
                        Vector.order_vectors_by_origin_nearest(vectors=v_s_0),
                        # 'q': agent.q,
                        # 'v': agent.v
                    })

                    # Write vectors found into path
                    dumps_train_data(
                        timestamp=timestamp,
                        seed=seed,
                        env_name_snake=env_name_snake,
                        train_data=train_data,
                        variable=variable,
                        agent_type=agent_type,
                        configuration=configuration,
                        evaluation_mechanism=evaluation_mechanism,
                        columns=environment.observation_space[0].n)

                    # Update graphs
                    update_graphs(graphs=graphs,
                                  agent=agent,
                                  graph_type=graph_type,
                                  configuration=str(configuration),
                                  agent_type=agent_type,
                                  states_to_observe=states_to_observe,
                                  graphs_info=graphs_info,
                                  solution=solution)

    prepare_data_and_show_graph(timestamp=timestamp,
                                env_name=env_name,
                                env_name_snake=env_name_snake,
                                graphs=graphs,
                                number_of_agents=number_of_agents,
                                agents_configuration=agents_configuration,
                                alpha=alpha,
                                epsilon=epsilon,
                                gamma=gamma,
                                graph_configuration=graph_configuration,
                                max_steps=max_steps,
                                initial_state=environment.initial_state,
                                variable=variable,
                                graphs_info=graphs_info,
                                evaluation_mechanism=evaluation_mechanism,
                                solution=solution)
Пример #12
0
def main():
    # Define gamma
    gamma = .9
    # Each 30 sweeps make it a dump
    sweeps_dumps = 30

    for decimal_precision in [0.01, 0.005]:
        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        # Define same tolerance that decimal precision, but is possible to change
        tolerance = decimal_precision

        # Create environment
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma)

        # Time train
        t0 = time.time()

        print('Training with tolerance: {}...'.format(tolerance))

        agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()}

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_bn.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_bn.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_bn.gamma,
            'initial_q_value': agent_bn.initial_q_value,
            'initial_seed': agent_bn.initial_seed,
            'interval_to_get_data': agent_bn.interval_to_get_data,
            'total_sweeps': agent_bn.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)

        # Dump binary information
        agent_bn.save()