Exemplo n.º 1
0
 def set_network(self, ph, args, scope):
     fsize = args['fsize']
     conv_depth = args['conv_depth']
     n_layers = args['n_layers']
     kernel_init = args['kernel_init']
     
     network_output = Network(ph, self.out_size, scope, fsize, conv_depth, n_layers, n_strides=2, kernel_init=kernel_init)
     return network_output
Exemplo n.º 2
0
 def make_encoder(self, state, z_size, scope):
     """ Encodes the given state to z_size => create guass. distribution for q(z | s)
     """
     # conv operations
     z_mean = Network(state,
                      z_size,
                      scope,
                      self.hid_size,
                      conv_depth=self.n_hidden)
     z_logstd = tf.get_variable("logstd", shape=(z_size, ))
     return tfp.distributions.MultivariateNormalDiag(
         loc=z_mean, scale_diag=tf.exp(z_logstd))
Exemplo n.º 3
0
 def dynamics_func(self, state, action, reuse):
     # add state, action normalization?
     sa = tf.concat([state, action], axis=1)
     delta_pred = Network(sa,
                          self.enc_dim,
                          'dynamics',
                          self.hid_size,
                          conv_depth=0,
                          n_hidden_dense=self.n_hidden,
                          reuse=reuse)
     n_state_pred = state + delta_pred
     return n_state_pred
Exemplo n.º 4
0
    def __init__(self, pg_screen, screen_size):
        Screen.__init__(self, pg_screen, screen_size)
        self.pg_screen = pg_screen
        self.screen_size = screen_size
        self.network = Network()
        self.font = Font(None, 30)
        self.sound_on = True
        self.music_on = True

        # load the assets
        self.logo = load("assets/logo.png").convert_alpha()
        self.main_menu = load("assets/mainmenu.png").convert_alpha()
        self.register_login = load("assets/register_login.png").convert_alpha()
        self.buttons = load("assets/buttons.png").convert_alpha()
Exemplo n.º 5
0
    def __init__(self, pg_screen, screen_size):
        Screen.__init__(self, pg_screen, screen_size)
        self.pg_screen = pg_screen
        self.screen_size = screen_size
        self.network = Network()
        self.font = Font(None, 30)
        self.arrow_keys = {
            pygame.K_UP: 0,
            pygame.K_w: 0,
            pygame.K_LEFT: 1,
            pygame.K_a: 1,
            pygame.K_DOWN: 2,
            pygame.K_s: 2,
            pygame.K_RIGHT: 3,
            pygame.K_d: 3,
        }

        # ingame parameters
        self.init = True
        self.pause = False
        self.score = 0
        self.game_over = False
        self.game_time = 0
        self.game_tick = 0.5
        self.game_tick_decrement = 0.05
        self.direction = 0  # [0,1,2,3] == [up,left,down,right]
        self.snake = [(5, 5), (5, 6), (5, 7)]  # 10 * 13 squares
        self.stain_pos = self._generate_stain_pos()

        # load the assets
        self.ready = load("assets/ready.png").convert_alpha()
        self.gameover = load("assets/gameover.png").convert_alpha()
        self.buttons = load("assets/buttons.png").convert_alpha()
        self.pausemenu = load("assets/pausemenu.png").convert_alpha()
        self.numbers = load("assets/numbers.png").convert_alpha()
        self.tail = load("assets/tail.png").convert_alpha()
        # load stains
        self.stains = [
            load("assets/stain1.png").convert_alpha(),
            load("assets/stain2.png").convert_alpha(),
            load("assets/stain3.png").convert_alpha(),
        ]
        # load heads
        self.heads = [
            load("assets/headup.png").convert_alpha(),
            load("assets/headleft.png").convert_alpha(),
            load("assets/headdown.png").convert_alpha(),
            load("assets/headright.png").convert_alpha(),
        ]
        self.stain = random.choice(self.stains)
Exemplo n.º 6
0
    def __init__(self, pg_screen, screen_size):
        Screen.__init__(self, pg_screen, screen_size)
        self.pg_screen = pg_screen
        self.screen_size = screen_size
        self.network = Network()
        self.font = Font(None, 30)
        self.logging_in = False
        self.logging_in_status = None
        self.username = None

        # load the assets
        self.buttons = load("assets/buttons.png").convert_alpha()

        # create the input boxes
        self.username_box = InputBox((10, 150), (300, 40))
        self.password_box = InputBox((10, 230), (300, 40), type="password")
Exemplo n.º 7
0
 def make_discriminator(self,
                        z,
                        output_size,
                        scope,
                        n_layers,
                        hid_size,
                        reuse=False):
     """ Predict D(z = [z1, z2]) => p(y | z)
     """
     logit = Network(z,
                     output_size,
                     scope,
                     hid_size,
                     conv_depth=0,
                     n_hidden_dense=n_layers,
                     reuse=reuse)
     return tfp.distributions.Bernoulli(logit)
Exemplo n.º 8
0
    def __init__(self, pg_screen, screen_size):
        Screen.__init__(self, pg_screen, screen_size)
        self.pg_screen = pg_screen
        self.screen_size = screen_size
        self.network = Network()
        self.font = Font(None, 30)

        self.fetch_global_highscores = True
        self.fetch_personal_highscores = True
        self.global_highscores_page = 0
        self.personal_highscores_page = 0
        self.highscore_page_idx = 0

        # load the assets
        self.mainmenu = load("assets/mainmenu.png").convert_alpha()
        self.numbers = load("assets/numbers.png").convert_alpha()
        self.buttons = load("assets/buttons.png").convert_alpha()
Exemplo n.º 9
0
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler: SubsetRandomSampler = SubsetRandomSampler(train_indices)
test_sampler: SubsetRandomSampler = SubsetRandomSampler(val_indices)

my_set = ImageSet(images, images_orig, boundaries)
train_loader: DataLoader = DataLoader(my_set,
                                      batch_size=batch_size,
                                      sampler=train_sampler)
val_loader: DataLoader = DataLoader(my_set,
                                    batch_size=batch_size,
                                    sampler=test_sampler)

model: Network = Network().double().to(device)
optimizer = Adam(model.parameters(), lr)

min_loss = float('inf')
best_model = copy.deepcopy(model)
for e in range(epoch):
    train_loss = 0
    val_loss = 0
    model.train(True)
    for x, orig, bound in tqdm(train_loader,
                               desc='Training: ',
                               position=0,
                               leave=True):
        x = x.to(device)
        orig = orig.to(device)
        optimizer.zero_grad()
Exemplo n.º 10
0
    def __init__(
            self,
            capacity_per_level=500000,
            warmup_steps=100000,
            n_frames=4,
            n_atoms=51,
            v_min=-1,
            v_max=0,
            gamma=.99,
            device='cuda',
            batch_size=48,
            lr=0.0000625 * 2,
            lr_decay=0.99,
            update_target_net_every=25000,
            train_every=6,
            frame_skip=4,
            disable_noisy_after=2000000,
            super_hexagon_path='C:\\Program Files (x86)\\Steam\\steamapps\\common\\Super Hexagon\\superhexagon.exe',
            run_afap=True):

        # training objects
        self.memory_buffer = MemoryBuffer(
            capacity_per_level,
            SuperHexagonInterface.n_levels,
            n_frames,
            SuperHexagonInterface.frame_size,
            SuperHexagonInterface.frame_size_cropped,
            gamma,
            device=device)
        self.net = Network(n_frames, SuperHexagonInterface.n_actions,
                           n_atoms).to(device)
        self.target_net = Network(n_frames, SuperHexagonInterface.n_actions,
                                  n_atoms).to(device)
        self.target_net.load_state_dict(self.net.state_dict())
        self.optimizer = torch.optim.Adam(self.net.parameters(),
                                          lr=lr,
                                          eps=1.5e-4)
        self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            self.optimizer, ExpLrDecay(lr_decay, min_factor=.1))

        # parameters
        self.batch_size = batch_size
        self.update_target_net_every = update_target_net_every
        self.train_every = train_every
        self.frame_skip = frame_skip
        self.disable_noisy_after = disable_noisy_after
        self.warmup_steps = warmup_steps
        self.gamma = gamma
        self.device = device

        # parameters for distributional
        self.n_atoms = n_atoms
        self.v_min = v_min
        self.v_max = v_max
        self.delta_z = (v_max - v_min) / (n_atoms - 1)
        self.support = torch.linspace(v_min,
                                      v_max,
                                      n_atoms,
                                      dtype=torch.float,
                                      device=device)
        self.offset = torch.arange(0,
                                   batch_size * n_atoms,
                                   n_atoms,
                                   device=device).view(-1, 1)
        self.m = torch.empty((batch_size, n_atoms), device=device)

        # debug and logging stuff
        self.list_steps_alive = [[]
                                 for _ in range(SuperHexagonInterface.n_levels)
                                 ]
        self.longest_run = [(0, 0)] * SuperHexagonInterface.n_levels
        self.total_simulated_steps = [0] * SuperHexagonInterface.n_levels
        self.losses = []
        self.kls = []
        self.times = []
        self.iteration = 0

        self.super_hexagon_path = super_hexagon_path
        self.run_afap = run_afap
Exemplo n.º 11
0
    net_path = 'super_hexagon_net'

    n_frames = 4
    frame_skip = 4
    log_every = 1000
    n_atoms = 51

    # setup
    fp, fcp = np.zeros(
        (1, n_frames, *SuperHexagonInterface.frame_size),
        dtype=np.bool), np.zeros(
            (1, n_frames, *SuperHexagonInterface.frame_size_cropped),
            dtype=np.bool)
    support = np.linspace(-1, 0, n_atoms)

    net = Network(n_frames, SuperHexagonInterface.n_actions,
                  n_atoms).to(device)
    net.load_state_dict(torch.load(net_path, map_location=device))
    net.eval()

    game = SuperHexagonInterface(frame_skip=frame_skip, run_afap=False)
    game.select_level(level)

    list_times_alive = []
    f, fc = game.reset()

    # helper function
    def to_torch_tensor(x):
        return torch.from_numpy(x).to(device).float()

    # global no_grad
    torch.set_grad_enabled(False)
Exemplo n.º 12
0
        self.batch_rewards = None


if  __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--environment", help="Gym environment to train on", default='CartPole-v0')
    parser.add_argument("-hi", "--hidden_size", help="Hidden layer sizes, separated by whitespace", default='16')
    parser.add_argument("-t", "--training_episodes", help="Maximum number of episodes to train", default=100000)
    parser.add_argument("-a", "--alpha", help="Step size", type=float, default=.003)
    parser.add_argument("-g", "--goal_return", help="Goal return", type=float, default=195)
    parser.add_argument("-b", "--batch_size", help="Episodes per gradient update", type=int, default=100)
    args = parser.parse_args()

    args.hidden_size = [int(val) for val in args.hidden_size.split()]

    env = gym.make(args.environment)
    use_cuda = torch.cuda.is_available()

    state_shape  = list(env.reset().shape)
    state_shape  = [1] + state_shape[:-1]
    action_count = np.product(env.action_space.shape)
    network      = Network(state_shape, args.hidden_size, action_count)

    agent = PGAgent(env, network, batch_size=1, alpha=args.alpha, use_cuda=use_cuda)
    _time = time()
    converged = agent.train(args.training_episodes, goal_return=args.goal_return, smoothing_eps=100)
    if converged:
        print("Solved in %.1f minutes" % ((time() - _time) / 60.0))
    else:
        print("Failed to converge")
Exemplo n.º 13
0
def main():
    """
    The main part of this program.
    All necessary threads are started and monitored.
    :return:
    """
    # remember the modification timestamp of the config; if the config gets changed, we can realod it!
    config_timestamp = 0 if not args.config else os.stat(
        importlib.util.find_spec("config").origin).st_mtime

    led = LedStatusMonitor()
    led.start()

    gopro = GoPro(args.background or args.quiet, args.ignore, args.max_time,
                  args.log_invisible)
    if args.config:
        gopro.setUserSettings({
            'FRAME_RATE':
            config.fps if 'fps' in vars(config) else None,
            'FOV':
            config.fov if 'fov' in vars(config) else None,
            'RESOLUTION':
            config.resolution if 'resolution' in vars(config) else None,
        })
    gopro.start()

    teams = config.teams if args.config and 'teams' in vars(config) else None
    #gameLogger = GameLoggerSql(os.path.join(os.path.dirname(__file__), 'logs/game.db'), teams)
    gameLogger = GameLoggerLog(
        os.path.join(os.path.dirname(__file__), 'logs/'), teams,
        args.log_invisible)
    gameLogger.start()

    gameController = GameController(args.gc_source)
    gameController.start()

    network = Network(args.device, args.ssid, args.passwd, args.retries,
                      args.mac)
    network.start()

    # monitor threads and config
    threads = [led, gopro, gameLogger, gameController, network]
    try:
        while True:
            #print(blackboard)
            # if config was loaded from file and file was modified since last checked
            if args.config and config_timestamp != os.stat(
                    importlib.util.find_spec("config").origin).st_mtime:
                config_timestamp = os.stat(
                    importlib.util.find_spec("config").origin).st_mtime
                try:
                    # reload config from file
                    importlib.reload(config)
                    Logger.info("Reloaded modified config")
                    network.setConfig(None, config.ssid, config.passwd,
                                      config.retries, config.mac)
                    gameController.setSource(config.gc_source)
                    gopro.setUserSettings({
                        'FRAME_RATE':
                        config.fps if 'fps' in vars(config) else None,
                        'FOV':
                        config.fov if 'fov' in vars(config) else None,
                        'RESOLUTION':
                        config.resolution
                        if 'resolution' in vars(config) else None,
                    })
                except Exception as e:
                    Logger.error("Invalid config! " + str(e))
            else:
                # do nothing
                time.sleep(1)
                for t in threads:
                    if not t.is_alive():
                        Logger.error("Thread %s is not running (anymore)!",
                                     str(t.__class__.__name__))
    except (KeyboardInterrupt, SystemExit):
        print("Shutting down ...")

    # cancel threads
    led.cancel()
    gopro.cancel()
    gameLogger.cancel()
    gameController.cancel()
    network.cancel()
    # wait for finished threads
    led.join()
    gopro.join()
    gameLogger.join()
    gameController.join()
    network.join()

    print("Bye")
Exemplo n.º 14
0
 def setupControl(self):
     ''' Inicializa blocos de controle: '''
     self.network = Network(self.alpha)
     self.plant = Plant (self.alpha)
Exemplo n.º 15
0
            value_loss.backward()
            self.value_opt.step()


if  __name__ == '__main__':
    ENVIRONMENT   = 'Pong-ram-v0'
    CONV_LAYERS   = None#[(1, 8, 3, True), (8, 8, 3, True), (8, 8, 3, True), (8, 8, 3, True)]
    HIDDEN_LAYERS = [256]
    TRAIN_EPS     = 1000 * 1000 * 1000

    env      = gym.make(ENVIRONMENT)
    use_cuda = torch.cuda.is_available()

    state_shape    = list(env.reset().shape)
    state_shape    = [1] + state_shape[:-1]
    action_count   = np.product(env.action_space.shape)
    policy_network = Network(state_shape, HIDDEN_LAYERS, action_count, conv=CONV_LAYERS)
    value_network  = Network(state_shape, HIDDEN_LAYERS, 1, conv=CONV_LAYERS, softmax=False)

    agent = ACAgent(env,
                    policy_network,
                    value_network,
                    alpha=.003,
                    gamma=.99,
                    memory_size=10000,
                    batch_size=1024,
                    use_cuda=use_cuda)
    _time = time()
    agent.train(TRAIN_EPS, goal_return=15, smoothing_eps=1)
    print("Solved in %.1f minutes" % ((time() - _time) / 60.0))
Exemplo n.º 16
0
    def __init__(self, graph_args, adv_args, in_shape):
        # arg unpacking
        self.act_dim = graph_args['act_dim']
        ## conv operations params
        n_hidden = graph_args['n_hidden']
        hid_size = graph_args['hid_size']
        conv_depth = graph_args['conv_depth']

        ## training params
        self.learning_rate = graph_args['learning_rate']
        self.num_target_updates = graph_args['num_target_updates']
        self.num_grad_steps_per_target_update = graph_args[
            'num_grad_steps_per_target_update']
        self.gamma = adv_args['gamma']

        # class similar actions => easier to predict
        self.setup_action_classes()

        self.act, self.adv = self.define_placeholders()
        self.obs = tf.placeholder(shape=in_shape, dtype=tf.float32)
        self.n_obs = tf.placeholder(shape=in_shape, dtype=tf.float32)

        # policy / actor evaluation with encoded state
        self.half_policy_distrib = Network(self.obs, None, 'policy_start', \
            hid_size, conv_depth)
        self.half_policy_distrib_2 = Network(self.n_obs, None, 'policy_start', \
            hid_size, conv_depth, reuse=True)
        self.policy_distrib = Network(self.half_policy_distrib, self.act_dim,  \
            'policy_out', hid_size, n_hidden_dense=n_hidden)

        self.greedy_action = tf.argmax(self.policy_distrib, axis=1)

        self.n_act_sample = 1
        self.sample_action = tf.random.multinomial(
            tf.nn.softmax(self.policy_distrib), self.n_act_sample)

        # policy update
        action_enc = tf.one_hot(self.act, depth=self.act_dim)
        self.logprob = -1 * tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self.policy_distrib, labels=action_enc)
        self.actor_loss = -tf.reduce_mean(self.logprob * self.adv -
                                          1e-3 * self.logprob)
        actor_optim = tf.train.AdamOptimizer(self.learning_rate)
        self.actor_update_op = actor_optim.minimize(self.actor_loss)

        # record gradients
        self.grads = actor_optim.compute_gradients(self.actor_loss)
        for grad in self.grads:
            tf.summary.histogram("{}-grad".format(grad[1].name), grad)
        self.merged = tf.summary.merge_all()

        # critic definition with encoded state
        self.v_target = tf.placeholder(shape=(None, ),
                                       name='v_target',
                                       dtype=tf.float32)
        self.v_pred = tf.squeeze(
            Network(self.obs,
                    1,
                    'critic',
                    hid_size,
                    conv_depth=conv_depth,
                    n_hidden_dense=n_hidden))
        self.critic_loss = tf.losses.mean_squared_error(
            self.v_target, self.v_pred)
        self.critic_update_op = tf.train.AdamOptimizer(
            self.learning_rate).minimize(self.critic_loss)

        # action neural network def
        actnn_layers = graph_args['actnn_layers']
        actnn_units = graph_args['actnn_units']
        self.actnn_learning_rate = graph_args['actnn_learning_rate']
        self.nclasses = graph_args['actnn_nclasses']

        # placeholders act_i, obs_i, obs_i+1
        self.prev_act_ph = tf.placeholder(shape=(None, ), dtype=tf.int32)
        self.actnn_prev_obs_ph = self.half_policy_distrib
        self.actnn_obs_ph = self.half_policy_distrib_2

        # concat & network pass
        multi_obs_enc = tf.concat([self.actnn_prev_obs_ph, self.actnn_obs_ph],
                                  axis=-1)
        self.actnn_pred = dense_pass(multi_obs_enc, self.nclasses,
                                     actnn_layers, actnn_units)
        action_enc = tf.one_hot(self.prev_act_ph, depth=self.nclasses)

        # update operations
        self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self.actnn_pred, labels=action_enc)
        self.train_step = tf.train.AdamOptimizer(
            self.actnn_learning_rate).minimize(self.loss)