示例#1
0
def scenario_one():
    """
    This scenario consists of a single agent type with constant c and state variables x, y, z
    The dynamics of this system are define as:
    # x = y + z + c
    # y = 2z
    # z = sin(x - y)
    """
    env = Environment()
    agent = Agent(['c'], ['x', 'y', 'z'], 'agent1')
    env.register_agents(agent)
    env.compile()

    constants = pd.DataFrame({'c': np.random.rand(1000)})
    states = pd.DataFrame({var: np.random.random(1000) for var in ['x', 'y', 'z']})
    data_input = {agent: {
        'constants': constants,
        'states': states
    }}

    agent_output = pd.DataFrame({
        'x': states['y'] + states['z'] + constants['c'],
        'y': 2 * states['z'],
        'z': np.sin(states['x'] - states['y'])
    })
    data_output = {agent: agent_output}

    env.solo_train(data_input, data_output)

    env.solo_test(data_input, data_output)

    # print(env.derivativeMatrix(agent, 'x', 100))

    print(env.correlation_matrix(agent))
示例#2
0
def train_iteratively(args):
    # iteration 1
    team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
    team_red = [
        Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    env = Environment(agents)

    args.n_actions = 6 + args.n_enemies
    args.n_inputs = 4 + 3 * (args.n_friends - 1) + 3 * args.n_enemies

    model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions)

    for agent in training_agents:
        agent.set_model(model)

    training_agents = train_agents(env, training_agents, args)
    trained_model = copy.deepcopy(training_agents[0].model)

    for iteration in range(args.n_iterations):
        args.n_steps = 10000 * (iteration + 2)
        team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
        team_red = [
            PGAgent(args.n_friends + idx, "red")
            for idx in range(args.n_enemies)
        ]

        training_agents = team_blue

        agents = team_blue + team_red
        env = Environment(agents, args)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        model.load_state_dict(trained_model.state_dict())
        model.eval()
        for agent in team_red:
            agent.set_model(model)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        for agent in team_blue:
            agent.set_model(model)

        training_agents = train_agents(env, training_agents, args)
        trained_model = copy.deepcopy(training_agents[0].model)
    torch.save(trained_model.state_dict(),
               args.path + f'RUN_{get_run_id()}.torch')
示例#3
0
def prepare_environment(config_address, data_address, models_address=None):
    """
    loads config and models, creates agents and environment
    """
    def read_header_from_csv(agent_name):
        with open(data_address + agent_name + "_x.csv", "r") as f:
            header = list(csv.reader(f))[0]
            return header

    with open(config_address, 'r') as sim_file:
        jstring = sim_file.read()
    config = json.loads(jstring)
    input_names = {
        agent_name: read_header_from_csv(agent_name)
        for agent_name in config
    }
    constants = {
        agent_name: list(
            filter(lambda name: name.startswith("const_"),
                   input_names[agent_name]))
        for agent_name in input_names
    }
    states = {
        agent_name: list(
            filter(lambda name: name.startswith("var_"),
                   input_names[agent_name]))
        for agent_name in input_names
    }
    agents_dict = {
        name: Agent(constants[name], states[name], name,
                    config[name]["hyper_parameters"])
        for name in input_names
    }

    env = Environment()
    env.register_agents(*(agents_dict.values()))
    for name in agents_dict:
        to_agents = [
            agents_dict[to_name] for to_name in config[name]['to_agents']
        ]
        env.register_connections(agents_dict[name], *to_agents)

    env.compile()
    if models_address is not None:
        env.load_models(models_address)
    return env, agents_dict
示例#4
0
文件: tools.py 项目: koenboeckx/VKHO
def test_replay(model_file, agent_type='qmix', period=None):
    import yaml
    from utilities import get_args
    args = get_args(yaml.load(open('default_config.yaml', 'r')))
    path = '/home/koen' + args.path

    args.gamma = 0.8
    args.max_episode_length = 30
    args.step_penalty = 0.05
    args.a_terrain = True

    if agent_type == 'qmix':
        model = QMixModel(input_shape=args.n_inputs,
                          n_actions=args.n_actions,
                          args=args)
        target = QMixModel(input_shape=args.n_inputs,
                           n_actions=args.n_actions,
                           args=args)
        model.load_state_dict(torch.load(path + model_file))
        target.load_state_dict(torch.load(path + model_file))
        models = {"model": model, "target": target}
        team_blue = [
            QMIXAgent(idx, "blue", args) for idx in range(args.n_friends)
        ]
    elif agent_type == 'reinforce':
        models = RNNModel(input_shape=args.n_inputs,
                          n_actions=args.n_actions,
                          args=args)
        models.load_state_dict(torch.load(path + model_file))
        team_blue = [
            PGAgent(idx, "blue", args) for idx in range(args.n_friends)
        ]

    for agent in team_blue:
        agent.set_model(models)
    team_red = [
        Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies)
    ]
    agents = team_blue + team_red
    env = RestrictedEnvironment(agents, args)
    while True:
        episode = generate_episode(env, args)
        print(len(episode))
        if len(episode) < 6:
            visualize(env, episode, period=period)
            break
示例#5
0
def main():
    parser = argparse.ArgumentParser(description='RL agents for atari')
    subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")

    train_parser = subparsers.add_parser("train", help="train an RL agent for atari games")
    train_parser.add_argument("--task-id", type=int, required=True, help="0 = BeamRider, 1 = Breakout, 2 = Enduro, 3 = Pong, 4 = Qbert, 5 = Seaquest, 6 = Spaceinvaders")
    train_parser.add_argument("--gpu", type=int, default=None, help="ID of GPU to be used")
    train_parser.add_argument("--double-dqn", type=int, default=0, help="double dqn - 0 = No, 1 = Yes")
    train_parser.add_argument("--dueling-dqn", type=int, default=0, help="dueling dqn - 0 = No, 1 = Yes")

    args = parser.parse_args()

    # command
    if (args.gpu != None):
        if torch.cuda.is_available():
            torch.cuda.set_device(args.gpu)
            print("CUDA Device: %d" %torch.cuda.current_device())



    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    double_dqn = (args.double_dqn == 1)
    dueling_dqn = (args.dueling_dqn == 1)
    #env = get_env(task, seed, task.env_id, double_dqn, dueling_dqn)
    env_id = "MyEnv1_varyenv"
    print("Training on %s, double_dqn %d, dueling_dqn %d" %(env_id, double_dqn, dueling_dqn))
    gridmap = np.zeros([100,100])
    gridmap[40:60, 40:60] = 2
    gridmap[10:13,20:23] = 1
    gridmap[80:83,30:33] = 1
    gridmap[75:78,90:93] = 1
    env = Grid(100,100, gridmap)#, grid_state_fn=lambda x:x.get_grid_img())  #the 3rd argument tells what the state this learner needs from grid
    #agentlist = [DQNSeeker((0,0,0), (255,255,255)) for k in range(50)]
    #env.init_agents(agentlist)
    agentlist = [Agent((0,0,0), (255,255,255)) for k in range(25)]
    env.init_agents(agentlist)
    agent_learn(env, env_id, num_timesteps=1000000, double_dqn=double_dqn, dueling_dqn=dueling_dqn, varyenv=True)
示例#6
0
def train_iteratively(args, agent_type):
    """Train agent iteratively by exchanging agent networks
    agent_type = 'reinforce' | 'qmix'
    """
    # setup the agents & environment
    args.n_actions = 6 + args.n_enemies
    args.n_inputs = 4 + 3 * (args.n_friends -
                             1) + 3 * args.n_enemies + args.n_enemies
    # setup model
    if agent_type == 'reinforce':
        model = RNNModel(input_shape=args.n_inputs,
                         n_actions=args.n_actions,
                         args=args)
        team_blue = [
            PGAgent(idx, "blue", args) for idx in range(args.n_friends)
        ]
    elif agent_type == 'qmix':
        model_ = QMixModel(input_shape=args.n_inputs,
                           n_actions=args.n_actions,
                           args=args)
        target = QMixModel(input_shape=args.n_inputs,
                           n_actions=args.n_actions,
                           args=args)
        model = {"model": model_, "target": target}
        team_blue = [
            QMIXAgent(idx, "blue", args) for idx in range(args.n_friends)
        ]
    team_red = [
        Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    env = RestrictedEnvironment(agents, args)

    for agent in training_agents:
        agent.set_model(model)

    # first model
    if agent_type == 'reinforce':
        training_agents = train_agents_reinforce(env, training_agents, args)
    elif agent_type == 'qmix':
        training_agents = train_agents_qmix(env, training_agents, model, args)
    trained_model = copy.deepcopy(training_agents[0].model)

    for iteration in range(args.n_iterations):
        print(f'Iteration {iteration + 1}')
        #args.n_steps = 10000 * (iteration + 2) # adapt step size TODO: find optimal criterion, e.g. stop at certain win rate
        # upgrade team red
        if agent_type == 'reinforce':
            team_red = [
                PGAgent(args.n_friends + idx, "red", args)
                for idx in range(args.n_enemies)
            ]
        elif agent_type == 'qmix':
            team_red = [
                QMIXAgent(args.n_friends + idx, "red", args)
                for idx in range(args.n_enemies)
            ]

        training_agents = team_blue

        agents = team_blue + team_red
        env = RestrictedEnvironment(agents, args)

        if agent_type == 'reinforce':
            opponent_model = RNNModel(input_shape=args.n_inputs,
                                      n_actions=args.n_actions,
                                      args=args)
            opponent_model.load_state_dict(trained_model.state_dict())
            opponent_model.eval()
        elif agent_type == 'qmix':
            model_ = QMixModel(input_shape=args.n_inputs,
                               n_actions=args.n_actions,
                               args=args)
            model_.load_state_dict(trained_model.state_dict())
            target = QMixModel(input_shape=args.n_inputs,
                               n_actions=args.n_actions,
                               args=args)
            opponent_model = {"model": model_, "target": target}

        for agent in team_red:
            agent.set_model(opponent_model)

        if args.reset_model:
            if agent_type == 'reinforce':
                model = RNNModel(input_shape=args.n_inputs,
                                 n_actions=args.n_actions,
                                 args=args)
            elif agent_type == 'qmix':
                model_ = QMixModel(input_shape=args.n_inputs,
                                   n_actions=args.n_actions,
                                   args=args)
                target = QMixModel(input_shape=args.n_inputs,
                                   n_actions=args.n_actions,
                                   args=args)
                model = {"model": model_, "target": target}

            for agent in team_blue:
                agent.set_model(model)

        if agent_type == 'reinforce':
            training_agents = train_agents_reinforce(env, training_agents,
                                                     args)
        elif agent_type == 'qmix':
            training_agents = train_agents_qmix(env, training_agents, model,
                                                args)
        trained_model = copy.deepcopy(training_agents[0].model)

    from os.path import expanduser
    home = expanduser("~")
    torch.save(trained_model.state_dict(),
               home + args.path + f'RUN_{get_run_id()}_MODEL.torch')
示例#7
0
def train(args):
    team_blue = [IQLAgent(idx, "blue") for idx in range(args.n_friends)]
    team_red = [
        Agent(idx + args.n_friends, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    if args.env_type == 'normal':
        env = Environment(agents, args)
    if args.env_type == 'restricted':
        env = Environment2(agents, args)

    args.n_actions = 6 + args.n_enemies  # 6 fixed actions + 1 aim action per enemy
    args.n_inputs = 4 + 3 * (
        args.n_friends -
        1) + 3 * args.n_enemies  # see process function in models.py
    models = generate_models(args.n_inputs, args.n_actions)
    for agent in training_agents:
        agent.set_model(models)

    buffer = ReplayBuffer(size=args.buffer_size)
    epi_len, nwins = 0, 0

    ex.log_scalar(f'win', 0.0, step=0)  # forces start of run at 0 wins ()
    for step_idx in range(args.n_steps):
        episode = generate_episode(env)
        buffer.insert_list(episode)
        if not buffer.can_sample(args.batch_size):
            continue

        epi_len += len(episode)
        reward = episode[-1].rewards["blue"]
        if episode[-1].rewards["blue"] == 1:
            nwins += 1
        batch = buffer.sample(args.batch_size)
        for agent in training_agents:
            loss = agent.update(batch)
            if step_idx > 0 and step_idx % args.sync_interval == 0:
                agent.sync_models(
                )  # TODO: same models get synced for all agents => to correct

            ex.log_scalar(f'loss{agent.id}', loss, step=step_idx)
            ex.log_scalar(f'epsilon', agent.scheduler(), step=step_idx)

        if step_idx > 0 and step_idx % PRINT_INTERVAL == 0:
            s = f"Step {step_idx}: loss: {loss:8.4f} - "
            s += f"Average length: {epi_len/PRINT_INTERVAL:5.2f} - "
            s += f"win ratio: {nwins/PRINT_INTERVAL:4.3f} - "
            s += f"epsilon: {agent.scheduler():4.3f} - "
            print(s)
            epi_len, nwins = 0, 0
            #_ = generate_episode(env, render=True)

        ex.log_scalar(f'length', len(episode), step=step_idx + 1)
        ex.log_scalar(f'win',
                      int(episode[-1].rewards["blue"] == 1),
                      step=step_idx + 1)
        ex.log_scalar(f'reward', reward, step=step_idx + 1)

    from os.path import expanduser
    home = expanduser("~")
    #for agent in training_agents:
    #    agent.save(home+args.path+f'RUN_{get_run_id()}_AGENT{agent.id}.p')
    torch.save(models["model"].state_dict(),
               home + args.path + f'RUN_{get_run_id()}.torch')
示例#8
0
import numpy as np
import pandas as pd
from aggregator import DummyAggregator
from env import Environment, Agent

if __name__ == '__main__':
    env = Environment()
    agent1 = Agent(["c1", "c2"], ['p1', 'p2'], 'agent1')
    agent2 = Agent(["c1", "c2"], ['p1', 'p2', 'p3'], 'agent2')
    data = {
        agent1: {
            "states":
            pd.DataFrame({
                'p1': np.random.rand(100),
                'p2': np.random.rand(100)
            }),
            "constants":
            pd.DataFrame({
                "c1": np.random.rand(100),
                "c2": np.random.rand(100)
            })
        },
        agent2: {
            "states":
            pd.DataFrame({
                'p1': np.random.rand(100),
                'p2': np.random.rand(100),
                'p3': np.random.rand(100)
            }),
            "constants":
            pd.DataFrame({
示例#9
0
def scenario_two():
    """
    In this scenario we have three agents: a1, a2, and a3
    The relationships diagram is like this (self-loops are assumed for all agents and not represented here)
    a1 => {a2, a3}
    a2 => {a1}
    a3 => {a2}

    C(a1) = {l1, l2}    S(a1) = {x1, x2, x3}
    C(a2) = {m1}        S(a2) = {y1, y2, y3, y4}
    C(a3) = {}          S(a3) = {z1, z2}

    Dynamics of a1:
    x1 = (y1y2)/l1 + e^(-3x1)
    x2 = l2 * ln(|2y4|)
    x3 = x1 - x2

    Dynamics of a2:
    y1 = y4 / 4 + cos(z2)
    y2 = m1y1 + y2
    y3 = cos(z1y2 + z2y1)
    y4 = 1 / (m1 + x3) - floor(x1)

    Dynamics
    z1 = sqrt(3|z2|)
    z2 = e^(-(z1 - 3)^2)
    """
    env = Environment()
    a1 = Agent(['l1', 'l2'], ['x1', 'x2', 'x3'], 'agent1')
    a2 = Agent(['m1'], ['y1', 'y2', 'y3', 'y4'], 'agent2')
    a3 = Agent([], ['z1', 'z2'], 'agent3')
    env.register_agents(a1, a2, a3)
    env.register_connections(a1, a2, a3)
    env.register_connections(a2, a1)
    env.register_connections(a3, a2)
    env.compile()

    a1_constants = {
        'l1': np.random.rand(1000),
        'l2': np.random.rand(1000)
    }

    a1_states = {
        'x1': np.random.rand(1000),
        'x2': np.random.rand(1000),
        'x3': np.random.rand(1000)
    }

    a2_constants = {
        'm1': np.random.rand(1000)
    }

    a2_states = {
        'y1': np.random.rand(1000),
        'y2': np.random.rand(1000),
        'y3': np.random.rand(1000),
        'y4': np.random.rand(1000)
    }

    a3_states = {
        'z1': np.random.rand(1000),
        'z2': np.random.rand(1000)
    }

    data_input = {
        a1: {
            'constants': pd.DataFrame(a1_constants),
            'states': pd.DataFrame(a1_states)
        },
        a2: {
            'constants': pd.DataFrame(a2_constants),
            'states': pd.DataFrame(a2_states)
        },
        a3: {
            'constants': pd.DataFrame(),
            'states': pd.DataFrame(a3_states)
        }
    }

    a1_outputs = {
        'x1': a2_states['y1'] * a2_states['y2'] / a1_constants['l1'] + np.exp(-3 * a1_states['x1']),
        'x2': a1_constants['l2'] * np.log(np.abs(2 * a2_states['y4'])),
        'x3': a1_states['x1'] - a1_states['x2']
    }

    a2_outputs = {
        'y1': a2_states['y2'] / 4 + np.cos(a3_states['z2']),
        'y2': a2_constants['m1'] * a2_states['y1'] + a2_states['y2'],
        'y3': np.cos(a3_states['z1'] * a2_states['y2'] + a3_states['z2'] * a2_states['y1']),
        'y4': 1 / (a2_constants['m1'] + a1_states['x3']) - np.floor(a1_states['x1'])
    }

    a3_outputs = {
        'z1': np.sqrt(3 * np.abs(a3_states['z2'])),
        'z2': np.exp(- np.square(a3_states['z1'] - 3))
    }

    data_output = {
        a1: pd.DataFrame(a1_outputs),
        a2: pd.DataFrame(a2_outputs),
        a3: pd.DataFrame(a3_outputs)
    }

    env.solo_train(data_input, data_output, training_hyper_params={
        a1: {'epochs': 100}, a2: {'epochs': 100}, a3: {'epochs': 50}})

    env.solo_test(data_input, data_output)

    print(env.correlation_matrix(a1))
示例#10
0
            ex.log_scalar(f'win_red', int(episode[-1].rewards["red"] == 1))

            if episode[-1].rewards["blue"] == 1:
                nwins += 1

        for agent in training_agents:
            loss = agent.update(batch)
            ex.log_scalar(f'loss{agent.id}', loss)

        s = f"Step {step_idx}: "
        s += f"Average length: {epi_len/args.n_episodes_per_step:5.2f} - "
        s += f"win ratio: {nwins/args.n_episodes_per_step:4.3f} - "
        print(s)
        epi_len, nwins = 0, 0

    return training_agents


if __name__ == '__main__':
    from env import Environment, Agent
    from settings import args
    team_blue = [Agent(idx, "blue") for idx in range(args.n_friends)]
    team_red = [
        Agent(idx + args.n_friends, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    env = Environment(agents)
    episode = generate_episode(env, render=True)
示例#11
0
文件: qmix.py 项目: koenboeckx/VKHO
def train(args):
    team_blue = [QMIXAgent(idx, "blue", args) for idx in range(args.n_friends)]
    team_red = [
        Agent(idx + args.n_friends, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    if args.env_type == 'normal':
        env = Environment(agents, args)
    if args.env_type == 'restricted':
        env = RestrictedEnvironment(agents, args)

    args.n_actions = 6 + args.n_enemies  # 6 fixed actions + 1 aim action per enemy
    args.n_inputs = 4 + 3 * (
        args.n_friends - 1
    ) + 3 * args.n_enemies + args.n_enemies  # see process function in models.py
    models = generate_models(args.n_inputs, args.n_actions, args)
    for agent in training_agents:
        agent.set_model(models)

    buffer = ReplayBuffer(size=args.buffer_size)
    mac = MultiAgentController(env, training_agents, models, args)
    for step_idx in range(args.n_steps):
        episode = generate_episode(env, args)
        buffer.insert_list(episode)
        if len(buffer) < args.batch_size:
            continue
        batch = buffer.sample(args.batch_size)

        loss = mac.update(batch)

        if step_idx % args.sync_interval == 0:
            mac.sync_networks()

        ## logging
        ex.log_scalar('loss', loss)

        if step_idx % args.log_interval == 0:
            episode = generate_episode(env, args, test_mode=True)
            if step_idx == 0:
                episode[-1].rewards["blue"] = 0
                episode[-1].rewards["red"] = 1
            ex.log_scalar('length', len(episode), step=step_idx)
            ex.log_scalar('reward', episode[-1].rewards["blue"], step=step_idx)
            ex.log_scalar(f'win_blue',
                          int(episode[-1].rewards["blue"] == 1),
                          step=step_idx)
            ex.log_scalar(f'win_red',
                          int(episode[-1].rewards["red"] == 1),
                          step=step_idx)
            ex.log_scalar('epsilon',
                          training_agents[0].scheduler(),
                          step=step_idx)

        if PRINT and step_idx > 0 and step_idx % PRINT_INTERVAL == 0:
            print(
                f"Step {step_idx}: loss = {loss}, reward = {episode[-1].rewards['blue']}"
            )
            #episode = generate_episode(env, render=True)

        if args.save_model and step_idx > 0 and step_idx % args.save_model_interval == 0:
            from os.path import expanduser
            home = expanduser("~")
            torch.save(models["model"].state_dict(),
                       home + args.path + f'RUN_{get_run_id()}_MODEL.torch')
            if args.use_mixer:
                torch.save(
                    mac.mixer.state_dict(),
                    home + args.path + f'RUN_{get_run_id()}_MIXER.torch')