Exemplo n.º 1
0
    def __init__(self,
                 max_pending=None,
                 default_func_caller=None,
                 cpu_only=False):
        num_gpus_available = len(
            tf.config.experimental.list_physical_devices('GPU'))

        if max_pending is None:
            max_pending = 4 if cpu_only else max(1, num_gpus_available)
        super().__init__(max_pending)

        worker_resources = {
            "num_gpus":
            0 if debug_mode() or cpu_only or num_gpus_available == 0 else 1,
        }

        self.worker_cls = ray.remote(**worker_resources)(Worker)

        if not ray.is_initialized():
            ray.init(local_mode=debug_mode(), ignore_reinit_error=True)

        self.max_pending = max_pending
        self.default_func_caller = default_func_caller
        if self.default_func_caller:
            self.caller_handle = ray.put(default_func_caller)
        self.scheduler = Scheduler(self.worker_cls.remote()
                                   for _ in range(max_pending))
        self.last_receive_time = 0
Exemplo n.º 2
0
def train():
    tf.keras.backend.clear_session()
    with strategy.scope() if config['use_tpu'] else empty_context_mgr():

        model = GAN_PG(**config)

        # Define optimizers
        optimizer_g = tf.train.AdamOptimizer(
            learning_rate=config['learning_rate'], beta1=0.0)
        optimizer_d = tf.train.AdamOptimizer(
            learning_rate=config['learning_rate'], beta1=0.0)

        # Compile the model
        model.compile_model(optimizer_g=optimizer_g,
                            optimizer_d=optimizer_d,
                            loss=config['gan_mode'],
                            tpu_strategy=strategy,
                            resolver=resolver,
                            config=config['sess_config'])

        if config['restore']:
            model.load_weights('{}/weights'.format(config['folder']))

        # Prepare inputs
        inputs = (X_train, y_train) if config['conditional'] else X_train

        # Train
        for stage in config['train_stages']:
            # Get training stage num
            stage_num = config['train_stages'].index(stage)

            print(
                '\nProcessing stage: {}  with image size {} =========================================='
                .format(stage_num, stage['size']))

            # Define schedulers
            alpha_scheduler = Scheduler(stage['train_epochs'], [0, 0.5],
                                        [0, 1.0])
            learning_rate_scheduler = Scheduler(
                stage['train_epochs'], [0, 0.5],
                [stage['lr'] * 0.1, stage['lr']])

            model.fit_stage(inputs,
                            config['batch_size'],
                            stage_num=stage_num,
                            alpha_scheduler=alpha_scheduler,
                            learning_rate_scheduler=learning_rate_scheduler,
                            folder=config['folder'],
                            save_epoch=config['save_epoch'],
                            seed_noise=seed_noise,
                            seed_labels=seed_labels)

    make_gif(
        glob.iglob('{}/progress/*.png'.format(config['folder'])),
        '{}/progress/{}_{}.gif'.format(config['folder'], config['gan_mode'],
                                       'progress'))
Exemplo n.º 3
0
    def __init__(self, input_length: int, output_length: int, n_steps: int, window_size: int, seed=3, gamma=0.8, vf_coef=0.25, ent_coef=0,
                 max_grad_norm=0.5, learning_rate=1e-3, alpha=0.99, epsilon=1e-5, lr_schedule='linear', verbose=0, _init_setup_model=True):

        self.policy = MetaLstmActorCriticPolicy
        self.verbose = verbose
        self.input_length = input_length
        self.output_length = output_length
        self.num_train_steps = 0
        self.n_steps = n_steps
        self.window_size = window_size
        self.total_timesteps = config.max_timesteps/10_000

        self.gamma = gamma
        self.vf_coef = vf_coef
        self.ent_coef = ent_coef
        self.max_grad_norm = max_grad_norm
        self.alpha = alpha
        self.epsilon = epsilon
        self.lr_schedule = lr_schedule
        self.learning_rate = learning_rate

        self.graph = None
        self.sess = None
        self.learning_rate_ph = None
        self.actions_ph = None
        self.advs_ph = None
        self.rewards_ph = None
        self.pg_loss = None
        self.vf_loss = None
        self.entropy = None
        self.apply_backprop = None
        self.policy_model = None
        self.step = None
        self.value = None
        self.learning_rate_schedule = None
        self.trainable_variables = None

        self.layers = config.meta_layers
        self.lstm_units = config.meta_lstm_units

        if seed is not None:
            set_global_seeds(seed)

        self.learning_rate_schedule = Scheduler(initial_value=self.learning_rate, n_values=self.total_timesteps,
                                                schedule=self.lr_schedule, init_step=self.num_train_steps)

        if _init_setup_model:
            self.setup_model()
Exemplo n.º 4
0
def main(multithreading: bool = True):
    node_num = 300

    logger.info('正在生成无线传感网络...')
    wsn = generate_rand_nodes(wsn=Wsn(),
                              wsn_width_x=100,
                              wsn_width_y=100,
                              node_num=node_num,
                              node_r_mu=10,
                              node_r_sigma=5,
                              node_power=100000000000,
                              node_pc_per_send=1)
    logger.info('无线传感网络生成完成')

    bystander = Bystander(wsn)
    logger.info('旁观者生成完成')

    # 给一号节点注入灵魂
    wsn.node_manager.nodes[0].teammate_num = node_num * 0.95
    wsn.node_manager.nodes[0].send_queue.append('Hello World!')

    if multithreading:
        Scheduler.schedule(bystander, EnumScheduleMode.MULTI_THREAD, [
            TerminationCondition.UserDriven(),
            TerminationCondition.NodeDriven(),
            TerminationCondition.RunningTime(300),
            TerminationCondition.SurvivalRate(0.6),
        ])
    else:
        Scheduler.schedule(bystander, EnumScheduleMode.SINGLE_THREAD, [
            TerminationCondition.UserDriven(),
            TerminationCondition.NodeDriven(),
            TerminationCondition.NumOfCycles(300),
            TerminationCondition.SurvivalRate(0.6),
        ])

    logger.info('正在进行电量统计..')
    power_usage = 0
    for node in wsn.node_manager.nodes:
        power_usage += (node.total_power - node.power)
    logger.warning(f'本次传输总耗电量 {power_usage} 点')
    logger.info('主线程结束...')
Exemplo n.º 5
0
    def search(self, load_from: str = None, save_every: int = None):
        if load_from:
            self.load_state(load_from)

        ray.init(local_mode=debug_mode())

        trainer = ray.put(self.trainer)
        ss = ray.put(self.config.search_space)
        scheduler = Scheduler([
            GPUTrainer.remote(ss, trainer)
            for _ in range(self.max_parallel_evaluations)
        ])
        self.log.info(
            f"Searching with {self.max_parallel_evaluations} workers.")

        def should_submit_more(cap):
            return (len(self.history) + scheduler.pending_tasks() < cap) \
               and scheduler.has_a_free_worker()

        def point_number():
            return len(self.history) + scheduler.pending_tasks() + 1

        while len(self.history) < self.initial_population_size:
            if should_submit_more(cap=self.initial_population_size):
                self.log.info(f"Populating #{point_number()}...")
                scheduler.submit(self.random_sample())
            else:
                info = scheduler.await_any()
                self.population.append(info)
                self.history.append(info)
                self.maybe_save_state(save_every)

        while len(self.history) < self.rounds:
            if should_submit_more(cap=self.rounds):
                self.log.info(f"Searching #{point_number()}...")
                sample = np.random.choice(self.population,
                                          size=self.sample_size)
                parent = max(sample, key=self.get_mo_fitness_fn())

                scheduler.submit(self.evolve(parent.point))
            else:
                info = scheduler.await_any()
                self.population.append(info)
                while len(self.population) > self.population_size:
                    self.population.pop(0)
                self.history.append(info)
                self.maybe_save_state(save_every)
                self.bounds_log()
Exemplo n.º 6
0
    def __init__(self, bot):
        self.bot = bot
        self.guilds_db = bot.db['guilds']

        self.scheduler = Scheduler(self.__class__.__name__)
        self.event_schedules = {}
Exemplo n.º 7
0
def train_episode(cnets, dTNet, device, args, lr, epoch, n_epochs, train_loader, test_loader, episode_idx, layers,
                  stats=None, eps_init=0, balanced_loss=False, net_weights=[1]):
    if not isinstance(cnets,list):
        cnets = [cnets]

    for cnet in cnets: cnet.train()

    net = cnets[0].net
    relaxed_net = cnets[0].relaxed_net

    relu_ids = relaxed_net.get_relu_ids()
    eps = eps_init

    if "COLT" in args.train_mode:
        relu_stable = args.relu_stable
    elif "adv" in args.train_mode:
        relu_stable = None
        args.mix = False
    elif "natural" in args.train_mode:
        relu_stable = None
        args.nat_factor = 1
        args.mix = False
    elif "diffAI" in args.train_mode:
        relu_stable = None
    else:
        raise RuntimeError(f"Unknown train mode {args.train_mode:}")

    print('Saving model to:', args.model_dir)
    print('Training layers: ', layers)

    for j in range(len(layers) - 1):
        opt, lr_scheduler = get_opt(net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader,
                                    args.lr_sched)
        curr_layer_idx = layers[j + 1]

        eps_old = eps
        eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j)
        if eps_old is None: eps_old = eps
        kappa_sched = Scheduler(0 if args.mix else 1, 1, args.train_batch * len(train_loader) * args.mix_epochs,
                                0 if not args.anneal else args.train_batch * len(train_loader)*args.anneal_warmup)
        beta_sched = Scheduler(args.beta_start if args.mix else args.beta_end, args.beta_end,
                                args.train_batch * len(train_loader) * args.mix_epochs, 0)
        eps_sched = Scheduler(eps_old if args.anneal else eps, eps, args.train_batch * len(train_loader) * args.anneal_epochs,
                              args.train_batch * len(train_loader)*args.anneal_warmup, power=args.anneal_pow)

        layer_dir = '{}/{}/{}'.format(args.model_dir, episode_idx, curr_layer_idx)
        if not os.path.exists(layer_dir):
            os.makedirs(layer_dir)

        print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'.format(eps, lr, curr_layer_idx))

        if balanced_loss:
            assert cnets[0].lossFn_test is None, "Unexpected lossFn"
            data_balance = np.array(train_loader.dataset.targets).astype(float).mean()
            balance_factor = (1 - data_balance) / (data_balance + 1e-3)
            cnets[0].update_loss_fn(balance_factor, device)

        for curr_epoch in range(n_epochs):
            if balanced_loss and args.sliding_loss_balance is not None and j == 0:
                # if sliding loss balance is acitve, anneal loss balance from fully balanced to partially balanced
                assert 0 <= args.sliding_loss_balance <= 1
                balance_factor_initial = (1-data_balance)/(data_balance+1e-3)
                scaling_factor_balance = 1-max(min((curr_epoch-0.1*n_epochs)/(n_epochs*0.7), args.sliding_loss_balance), 0)
                balance_factor = scaling_factor_balance * (balance_factor_initial-1) + 1
                cnets[0].update_loss_fn(balance_factor, device)

            train(device, epoch, args, j + 1, layers, cnets, eps_sched, kappa_sched, opt, train_loader,
                  lr_scheduler, relu_ids, stats, relu_stable,
                  relu_stable_protected=args.relu_stable_protected, net_weights=net_weights, beta_sched=beta_sched)

            if isinstance(lr_scheduler, optim.lr_scheduler.StepLR) and curr_epoch >= args.mix_epochs:
                lr_scheduler.step()

            if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0:
                torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1)))
                torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1)))
                test(device, args, cnets[0], test_loader if args.test_set == "test" else train_loader,
                     [curr_layer_idx], stats=stats)

            stats.update_tb(epoch)
            epoch += 1
        relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec
        lr = lr * args.lr_layer_dec
    net.freeze(len(net.blocks)-1)
    return epoch
Exemplo n.º 8
0
from episodic_curiosity import oracle

from ppo_agent import save_rews_list

import pickle

import numpy as np
from utils import explained_variance, Scheduler

sil_loss_weight = Scheduler(v=1., nvalues=200, schedule='linear')


class RewardForwardFilter(object):
    def __init__(self, gamma):
        self.rewems = None
        self.gamma = gamma

    def update(self, rews):
        if self.rewems is None:
            self.rewems = rews
        else:
            self.rewems = self.rewems * self.gamma + rews
        return self.rewems


a = [[2.0141983, 12], [2.0334325, 12], [2.0381384, 12], [1.9757961, 12],
     [1.9958011, 12], [2.0272028, 12], [2.073726, 12], [2.0448227, 11],
     [1.9994664, 11], [2.0019026, 11], [1.9926736, 11], [1.9864389, 11],
     [1.9718688, 11], [2.0127394, 11], [1.9853333, 11], [2.0017598, 11],
     [1.9949136, 11], [2.0054865, 11], [2.011146, 11], [2.0157328, 11],
     [2.0109742, 11], [2.008641, 11], [2.015577, 11], [2.0362294, 11],
Exemplo n.º 9
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear',
                 summary_dir=None):

        sess = tf_util.make_session()
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False)
        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs * nsteps,
                             nsteps,
                             reuse=True)

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                            decay=alpha,
                                            epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        # storing summaries
        episode_reward = tf.placeholder("float")
        tf.summary.scalar("policy_loss", pg_loss)
        tf.summary.scalar("entropy", entropy)
        tf.summary.scalar("value_loss", vf_loss)
        tf.summary.scalar("episode_reward", episode_reward)
        summary_op = tf.summary.merge_all()

        def train(obs, states, mean_reward, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                R: rewards,
                LR: cur_lr,
                episode_reward: mean_reward
            }
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, policy_entropy, summary, _ = sess.run(
                [pg_loss, vf_loss, entropy, summary_op, _train], td_map)
            return policy_loss, value_loss, policy_entropy, summary

        def save(save_path):
            ps = sess.run(params)
            make_path(osp.dirname(save_path))
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
        self.train_writer = tf.summary.FileWriter(summary_dir, sess.graph)
Exemplo n.º 10
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 mf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear'):

        sess = tf_util.make_session()
        nact = ac_space.n
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        ADV_MOMENT = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        R2 = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])
        ENT_COEF = tf.placeholder(tf.float32, [])

        step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False)
        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs * nsteps,
                             nsteps,
                             reuse=True)

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean((ADV) * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        mf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.mf), R2))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        ent_coef = Scheduler(v=ent_coef,
                             nvalues=total_timesteps / 10,
                             schedule='step')
        mf_coef = 0.01
        loss = pg_loss - entropy * ENT_COEF + vf_loss * vf_coef + mf_loss * mf_coef
        # loss = pg_loss + vf_loss * vf_coef + mf_loss * mf_coef
        # loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                            decay=alpha,
                                            epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, rewards_square, masks, actions, values,
                  moments):
            values_random = np.random.normal(
                loc=values, scale=np.sqrt(np.maximum(moments - values**2, 0)))
            # values_random = values - np.sqrt(np.maximum(moments - values ** 2,0))
            advs = rewards - values_random
            # advs = (1 - 2 * rewards) * rewards - values  + 2 * values * values
            advs_moment = rewards_square - moments
            # advs = (1 + 2 * rewards) * (rewards)
            # advs_moment = rewards_square
            for step in range(len(obs)):
                cur_lr = lr.value()
                cur_ent_coef = ent_coef.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                ADV_MOMENT: advs_moment,
                R: rewards,
                R2: rewards_square,
                LR: cur_lr,
                ENT_COEF: cur_ent_coef
            }
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, moment_loss, policy_entropy, _ = sess.run(
                [pg_loss, vf_loss, mf_loss, entropy, _train], td_map)
            return policy_loss, value_loss, moment_loss, policy_entropy

        def save(save_path):
            ps = sess.run(params)
            make_path(osp.dirname(save_path))
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            ps = sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
Exemplo n.º 11
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 nstack,
                 num_procs,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear',
                 optimizer='adam'):
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=num_procs,
                                inter_op_parallelism_threads=num_procs)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs,
                             nsteps,
                             nstack,
                             reuse=True)
        step_model = train_model

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        loss = pg_loss + vf_loss * vf_coef - entropy * ent_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        if optimizer == 'adam':
            trainer = tf.train.AdamOptimizer()
        else:
            trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                                decay=alpha,
                                                epsilon=epsilon)

        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                R: rewards,
                LR: cur_lr
            }
            if states != []:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            total_loss, policy_loss, value_loss, policy_entropy, _ = sess.run(
                [loss, pg_loss, vf_loss, entropy, _train], td_map)
            return total_loss, policy_loss, value_loss, policy_entropy

        def save(save_path):
            ps = sess.run(params)
            make_path(save_path)
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            ps = sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
Exemplo n.º 12
0
class MetaA2CModel:
    """
    The Meta A2C (Advantage Actor Critic) model class

    :param gamma: (float) Discount factor
    :param vf_coef: (float) Value function coefficient for the loss calculation
    :param ent_coef: (float) Entropy coefficient for the loss caculation
    :param max_grad_norm: (float) The maximum value for the gradient clipping
    :param learning_rate: (float) The learning rate
    :param alpha: (float)  RMSProp decay parameter (default: 0.99)
    :param epsilon: (float) RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update)
        (default: 1e-5)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                              'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param verbose: (int) the verbosity level: 0 none, 1 training information, 2 tensorflow debug
    :param _init_setup_model: (bool) Whether or not to build the network at the creation of the instance
                              (used only for loading)
        WARNING: this logging can take a lot of space quickly
    """

    def __init__(self, input_length: int, output_length: int, n_steps: int, window_size: int, seed=3, gamma=0.8, vf_coef=0.25, ent_coef=0,
                 max_grad_norm=0.5, learning_rate=1e-3, alpha=0.99, epsilon=1e-5, lr_schedule='linear', verbose=0, _init_setup_model=True):

        self.policy = MetaLstmActorCriticPolicy
        self.verbose = verbose
        self.input_length = input_length
        self.output_length = output_length
        self.num_train_steps = 0
        self.n_steps = n_steps
        self.window_size = window_size
        self.total_timesteps = config.max_timesteps/10_000

        self.gamma = gamma
        self.vf_coef = vf_coef
        self.ent_coef = ent_coef
        self.max_grad_norm = max_grad_norm
        self.alpha = alpha
        self.epsilon = epsilon
        self.lr_schedule = lr_schedule
        self.learning_rate = learning_rate

        self.graph = None
        self.sess = None
        self.learning_rate_ph = None
        self.actions_ph = None
        self.advs_ph = None
        self.rewards_ph = None
        self.pg_loss = None
        self.vf_loss = None
        self.entropy = None
        self.apply_backprop = None
        self.policy_model = None
        self.step = None
        self.value = None
        self.learning_rate_schedule = None
        self.trainable_variables = None

        self.layers = config.meta_layers
        self.lstm_units = config.meta_lstm_units

        if seed is not None:
            set_global_seeds(seed)

        self.learning_rate_schedule = Scheduler(initial_value=self.learning_rate, n_values=self.total_timesteps,
                                                schedule=self.lr_schedule, init_step=self.num_train_steps)

        if _init_setup_model:
            self.setup_model()

    def setup_model(self):
        """
        Create all the functions and tensorflow graphs necessary to train the model
        """

        assert issubclass(self.policy, MetaLstmActorCriticPolicy), "Error: the input policy for the A2C model must be an " \
                                                                         "instance of MetaLstmActorCriticPolicy."

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.sess = tf_utils.make_session(graph=self.graph)

            # azért nincs step model mert ugyanaz a lépés (n_batch) így felesleges.
            policy_model = self.policy(sess=self.sess, input_length=self.input_length, output_length=self.output_length, n_steps=self.n_steps,
                                       window_size=self.window_size, layers=self.layers, lstm_units=self.lstm_units)

            with tf.variable_scope("loss", reuse=False):
                self.actions_ph = policy_model.pdtype.sample_placeholder([self.n_steps], name="action_ph")
                self.advs_ph = tf.placeholder(tf.float32, [self.n_steps], name="advs_ph")
                self.rewards_ph = tf.placeholder(tf.float32, [self.n_steps], name="rewards_ph")
                self.learning_rate_ph = tf.placeholder(tf.float32, [], name="learning_rate_ph")

                neglogpac = policy_model.proba_distribution.neglogp(self.actions_ph)
                self.entropy = tf.reduce_mean(policy_model.proba_distribution.entropy())
                self.pg_loss = tf.reduce_mean(self.advs_ph * neglogpac)
                self.vf_loss = mse(tf.squeeze(policy_model.value_fn), self.rewards_ph)
                loss = self.pg_loss - self.entropy * self.ent_coef + self.vf_loss * self.vf_coef

                self.trainable_variables = tf_utils.find_trainable_variables("model")
                grads = tf.gradients(loss, self.trainable_variables)
                if self.max_grad_norm is not None:
                    grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm)
                grads = list(zip(grads, self.trainable_variables))

            trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.alpha,
                                                epsilon=self.epsilon)
            self.apply_backprop = trainer.apply_gradients(grads)
            self.step = policy_model.step
            self.policy_model = policy_model
            self.value = self.policy_model.value
            tf.global_variables_initializer().run(session=self.sess)

    def train_step(self, inputs: np.ndarray, discounted_rewards, actions, values):
        """
        applies a training step to the model
        """
        advs = discounted_rewards - values

        cur_lr = None
        for _ in range(self.n_steps):
            cur_lr = self.learning_rate_schedule.value()

        td_map = {self.policy_model.input_ph: inputs, self.actions_ph: actions, self.advs_ph: advs,
                  self.rewards_ph: discounted_rewards, self.learning_rate_ph: cur_lr}

        policy_loss, value_loss, policy_entropy, _ = self.sess.run(
            [self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop], td_map)

        return policy_loss, value_loss, policy_entropy

    def save(self, save_path: str, id: str):
        """
        Save the current parameters to file

        :param save_path: (str or file-like object) the save location
        """

        params = {
            "gamma": self.gamma,
            "vf_coef": self.vf_coef,
            "ent_coef": self.ent_coef,
            "max_grad_norm": self.max_grad_norm,
            "learning_rate": self.learning_rate,
            "alpha": self.alpha,
            "epsilon": self.epsilon,
            "lr_schedule": self.lr_schedule,
            "verbose": self.verbose,
            "policy": self.policy,
            "num_train_steps": self.num_train_steps,
            "input_length": self.input_length,
            "output_length": self.output_length,
            "n_steps": self.n_steps,
            "window_size": self.window_size,
            "total_timesteps": self.total_timesteps,
            "layers": self.layers,
            "lstm_units": self.lstm_units,
        }

        json_params = {
            "input_length": self.input_length,
            "output_length": self.output_length,
            "n_steps": self.n_steps,
            "window_size": self.window_size,
            "total_timesteps": self.total_timesteps,
            "gamma": self.gamma,
            "vf_coef": self.vf_coef,
            "ent_coef": self.ent_coef,
            "max_grad_norm": self.max_grad_norm,
            "learning_rate": self.learning_rate,
            "alpha": self.alpha,
            "epsilon": self.epsilon,
            "lr_schedule": self.lr_schedule,
            "layers": self.layers,
            "lstm_units": self.lstm_units,
        }

        weights = self.sess.run(self.trainable_variables)

        utils._save_model_to_file(save_path, id, 'meta', json_params=json_params, weights=weights, params=params)

    @classmethod
    def load(cls, model_id: str,  input_len: int, output_len: int):
        """
        Load the model from file

        """
        load_path = os.path.join(config.model_path, model_id)
        weights, params = utils._load_model_from_file(load_path, 'meta')

        if params['input_length'] != input_len or params['output_length'] != output_len:
            raise ValueError("The input and the output length must be the same as the model's that trying to load.")

        model = cls(input_length=params["input_length"], output_length=params["output_length"],
                    n_steps=params["n_steps"], window_size=params["window_size"], _init_setup_model=False)
        model.__dict__.update(params)
        model.setup_model()

        restores = []
        for param, loaded_weight in zip(model.trainable_variables, weights):
            restores.append(param.assign(loaded_weight))
        model.sess.run(restores)

        return model
Exemplo n.º 13
0
def train():

    # start evaluation process
    popen_args = dict(shell=True, universal_newlines=True,
                      encoding='utf-8')  # , stdout=PIPE, stderr=STDOUT, )
    command_valid = 'python main.py -mode=eval ' + ' '.join(
        ['-log_root=' + args.log_root] + sys.argv[1:])
    valid = subprocess.Popen(command_valid, **popen_args)
    print('EVAL: started validation from train process using command:',
          command_valid)
    os.environ[
        'CUDA_VISIBLE_DEVICES'] = args.gpu  # eval may or may not be on gpu

    # build graph, dataloader
    cleanloader, dirtyloader, _ = get_loader(join(home, 'datasets'),
                                             batchsize=args.batch_size,
                                             poison=args.poison,
                                             svhn=args.svhn,
                                             fracdirty=args.fracdirty,
                                             cifar100=args.cifar100,
                                             noaugment=args.noaugment,
                                             nogan=args.nogan,
                                             cinic=args.cinic,
                                             tanti=args.tanti)
    dirtyloader = utils.itercycle(dirtyloader)
    # print('Validation check: returncode is '+str(valid.returncode))
    model = resnet_model.ResNet(args, args.mode)
    # print('Validation check: returncode is '+str(valid.returncode))

    # initialize session
    print('===================> TRAIN: STARTING SESSION at ' + timenow())
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                            gpu_options=tf.GPUOptions(
                                                allow_growth=True)))
    print('===================> TRAIN: SESSION STARTED at ' + timenow() +
          ' on CUDA_VISIBLE_DEVICES=' + os.environ['CUDA_VISIBLE_DEVICES'])

    # load checkpoint
    utils.download_pretrained(
        log_dir, pretrain_dir=args.pretrain_dir)  # download pretrained model
    ckpt_file = join(log_dir, 'model.ckpt')
    ckpt_state = tf.train.get_checkpoint_state(log_dir)
    var_list = list(
        set(tf.global_variables()) - set(tf.global_variables('accum')) -
        set(tf.global_variables('projvec')))
    saver = tf.train.Saver(var_list=var_list, max_to_keep=1)
    sess.run(tf.global_variables_initializer())
    if not (ckpt_state and ckpt_state.model_checkpoint_path):
        print('TRAIN: No pretrained model. Initialized from random')
    else:

        print('TRAIN: Loading checkpoint %s', ckpt_state.model_checkpoint_path)

    print('TRAIN: Start')
    scheduler = Scheduler(args)
    for epoch in range(args.epoch_end):  # loop over epochs
        accumulator = Accumulator()

        if args.poison:

            # loop over batches
            for batchid, (cleanimages, cleantarget) in enumerate(cleanloader):

                # pull anti-training samples
                dirtyimages, dirtytarget = dirtyloader.__next__()

                # convert from torch format to numpy onehot, batch them, and apply softmax hack
                cleanimages, cleantarget, dirtyimages, dirtytarget, batchimages, batchtarget, dirtyOne, dirtyNeg = \
                  utils.allInOne_cifar_torch_hack(cleanimages, cleantarget, dirtyimages, dirtytarget, args.nodirty, args.num_classes, args.nogan)

                # from matplotlib.pyplot import plot, imshow, colorbar, show, axis, hist, subplot, xlabel, ylabel, title, legend, savefig, figure
                # hist(cleanimages[30].ravel(), 25); show()
                # hist(dirtyimages[30].ravel(), 25); show()
                # imshow(utils.imagesc(cleanimages[30])); show()
                # imshow(utils.imagesc(dirtyimages[30])); show()

                # run the graph
                _, global_step, loss, predictions, acc, xent, xentPerExample, weight_norm = sess.run(
                    [
                        model.train_op, model.global_step, model.loss,
                        model.predictions, model.precision, model.xent,
                        model.xentPerExample, model.weight_norm
                    ],
                    feed_dict={
                        model.lrn_rate: scheduler._lrn_rate,
                        model._images: batchimages,
                        model.labels: batchtarget,
                        model.dirtyOne: dirtyOne,
                        model.dirtyNeg: dirtyNeg
                    })

                metrics = {}
                metrics['clean/xent'], metrics['dirty/xent'], metrics['clean/acc'], metrics['dirty/acc'] = \
                  accumulator.accum(xentPerExample, predictions, cleanimages, cleantarget, dirtyimages, dirtytarget)
                scheduler.after_run(global_step, len(cleanloader))

                if np.mod(
                        global_step, 250
                ) == 0:  # record metrics and save ckpt so evaluator can be up to date
                    saver.save(sess, ckpt_file)
                    metrics['lr'], metrics['train/loss'], metrics['train/acc'], metrics['train/xent'] = \
                      scheduler._lrn_rate, loss, acc, xent
                    metrics['clean_minus_dirty'] = metrics[
                        'clean/acc'] - metrics['dirty/acc']
                    if 'timeold' in locals():
                        metrics['time_per_step'] = (time() - timeold) / 250
                    timeold = time()
                    experiment.log_metrics(metrics, step=global_step)
                    print(
                        'TRAIN: loss: %.3f, acc: %.3f, global_step: %d, epoch: %d, time: %s'
                        % (loss, acc, global_step, epoch, timenow()))

            # log clean and dirty accuracy over entire batch
            metrics = {}
            metrics['clean/acc_full'], metrics['dirty/acc_full'], metrics['clean_minus_dirty_full'], metrics['clean/xent_full'], metrics['dirty/xent_full'] = \
              accumulator.flush()
            experiment.log_metrics(metrics, step=global_step)
            experiment.log_metric('weight_norm', weight_norm)
            print('TRAIN: epoch', epoch, 'finished. cleanacc',
                  metrics['clean/acc_full'], 'dirtyacc',
                  metrics['dirty/acc_full'])

        else:  # use hessian

            # loop over batches
            for batchid, (cleanimages, cleantarget) in enumerate(cleanloader):

                # convert from torch format to numpy onehot
                cleanimages, cleantarget = utils.cifar_torch_to_numpy(
                    cleanimages, cleantarget, args.num_classes)

                # run the graph
                gradsSpecCorr, valtotEager, bzEager, valEager, _, _, global_step, loss, predictions, acc, xent, grad_norm, valEager, projvec_corr, weight_norm = \
                  sess.run([model.gradsSpecCorr, model.valtotEager, model.bzEager, model.valEager, model.train_op, model.projvec_op, model.global_step,
                    model.loss, model.predictions, model.precision, model.xent, model.grad_norm, model.valEager, model.projvec_corr, model.weight_norm],
                    feed_dict={model.lrn_rate: scheduler._lrn_rate,
                               model._images: cleanimages,
                               model.labels: cleantarget,
                               model.speccoef: scheduler.speccoef,
                               model.projvec_beta: args.projvec_beta})

                # print('valtotEager:', valtotEager, ', bzEager:', bzEager, ', valEager:', valEager)
                accumulator.accum(predictions, cleanimages, cleantarget)
                scheduler.after_run(global_step, len(cleanloader))

                if np.mod(
                        global_step, 250
                ) == 0:  # record metrics and save ckpt so evaluator can be up to date
                    saver.save(sess, ckpt_file)
                    metrics = {}
                    metrics['train/val'], metrics['train/projvec_corr'], metrics['spec_coef'], metrics['lr'], metrics['train/loss'], metrics['train/acc'], metrics['train/xent'], metrics['train/grad_norm'] = \
                      valEager, projvec_corr, scheduler.speccoef, scheduler._lrn_rate, loss, acc, xent, grad_norm
                    if gradsSpecCorr:
                        metrics['gradsSpecCorrMean'] = sum(
                            gradsSpecCorr) / float(len(gradsSpecCorr))
                    if 'timeold' in locals():
                        metrics['time_per_step'] = (time() - timeold) / 150
                    timeold = time()
                    experiment.log_metrics(metrics, step=global_step)
                    experiment.log_metric('weight_norm', weight_norm)

                    # plot example train image
                    # plt.imshow(cleanimages[0])
                    # plt.title(cleantarget[0])
                    # experiment.log_figure()

                    # log progress
                    print(
                        'TRAIN: loss: %.3f\tacc: %.3f\tval: %.3f\tcorr: %.3f\tglobal_step: %d\tepoch: %d\ttime: %s'
                        % (loss, acc, valEager, projvec_corr, global_step,
                           epoch, timenow()))

            # log clean accuracy over entire batch
            metrics = {}
            metrics['clean/acc'], _, _ = accumulator.flush()
            experiment.log_metrics(metrics, step=global_step)
            print('TRAIN: epoch', epoch, 'finished. clean/acc',
                  metrics['clean/acc'])

        # log ckpt to comet
        if not epoch % 20:
            if args.upload:
                experiment.log_asset_folder(log_dir)

        # restart evaluation process if it somehow died
        # if valid.returncode != None:
        #   valid.kill(); sleep(1)
        #   valid = subprocess.Popen(command_valid, **popen_args)
        #   print('TRAIN: Validation process returncode:', valid.returncode)
        #   print('===> Restarted validation process, new PID', valid.pid)

    # uploader to dropbox
    if args.upload:
        comet.log_asset_folder(log_dir)
        os.system('dbx pload ' + log_dir + ' ' +
                  join('ckpt/poisoncifar', projname) + '/')
Exemplo n.º 14
0
def run(args):
    device = 'cuda' if torch.cuda.is_available() and (
        not args.no_cuda) else 'cpu'

    num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(
        args)
    net = get_network(device, args, input_size, input_channel, n_class)
    print(net)
    n_params = 0
    for param_name, param_value in net.named_parameters():
        if 'deepz_lambda' not in param_name:
            n_params += param_value.numel()
            param_value.requires_grad_(True)
        else:
            param_value.data = torch.ones(param_value.size()).to(device)
            param_value.requires_grad_(False)
    print('Number of parameters: ', n_params)

    n_epochs = args.n_epochs
    if args.train_mode == 'train':
        timestamp = int(time.time())
        model_dir = args.root_dir + 'models_new/%s/%s/%d/%s_%.5f/%d' % (
            args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps,
            timestamp)
        print('Saving model to:', model_dir)
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        args_file = os.path.join(model_dir, 'args.json')
        with open(args_file, 'w') as fou:
            json.dump(vars(args), fou, indent=4)
        writer = None

        epoch = 0
        relu_stable = args.relu_stable
        lr = args.lr
        for j in range(len(args.layers) - 1):
            if args.opt == 'adam':
                opt = optim.Adam(net.parameters(), lr=lr, weight_decay=0)
            else:
                opt = optim.SGD(net.parameters(),
                                lr=lr,
                                momentum=0.9,
                                weight_decay=0)

            if args.lr_sched == 'step_lr':
                lr_scheduler = optim.lr_scheduler.StepLR(
                    opt, step_size=args.lr_step, gamma=args.lr_factor)
            else:
                lr_scheduler = optim.lr_scheduler.OneCycleLR(
                    opt,
                    div_factor=10000,
                    max_lr=lr,
                    pct_start=args.pct_start,
                    steps_per_epoch=len(train_loader),
                    epochs=n_epochs)

            eps = args.eps_factor**(len(args.layers) - 2 - j) * (
                args.start_eps_factor * args.train_eps)
            kappa_sched = Scheduler(0.0, 1.0, num_train * args.mix_epochs, 0)
            eps_sched = Scheduler(0 if args.anneal else eps, eps,
                                  num_train * args.mix_epochs, 0)
            prev_layer_idx, curr_layer_idx = args.layers[j], args.layers[j + 1]
            next_layer_idx = args.layers[j + 2] if j + 2 < len(
                args.layers) else None
            print(
                'new train phase: eps={}, lr={}, prev_layer={}, curr_layer={}, next_layer={}'
                .format(eps, lr, prev_layer_idx, curr_layer_idx,
                        next_layer_idx))
            layer_dir = '{}/{}'.format(model_dir, curr_layer_idx)
            if not os.path.exists(layer_dir):
                os.makedirs(layer_dir)
            for curr_epoch in range(n_epochs):
                train(device, writer, epoch, args, prev_layer_idx,
                      curr_layer_idx, next_layer_idx, net, eps_sched,
                      kappa_sched, opt, train_loader, lr_scheduler,
                      relu_stable)
                if curr_epoch >= args.mix_epochs and isinstance(
                        lr_scheduler, optim.lr_scheduler.StepLR):
                    lr_scheduler.step()
                if (epoch + 1) % args.test_freq == 0:
                    torch.save(
                        net.state_dict(),
                        os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1)))
                    with torch.no_grad():
                        valid_nat_loss, valid_nat_acc, valid_robust_loss, valid_robust_acc = test(
                            device, epoch, args, net, test_loader,
                            [curr_layer_idx])
                epoch += 1
            relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_factor
            n_epochs -= args.n_epochs_reduce
            lr = lr * args.lr_layer_dec
    elif args.train_mode == 'print':
        print('printing network to:', args.out_net_file)
        dummy_input = torch.randn(1,
                                  input_channel,
                                  input_size,
                                  input_size,
                                  device='cuda')
        net.skip_norm = True
        torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True)
    elif args.train_mode == 'test':
        with torch.no_grad():
            test(device, 0, args, net, test_loader, args.layers)
    else:
        assert False, 'Unknown mode: {}!'.format(args.train_mode)
    return valid_nat_loss, valid_nat_acc, valid_robust_loss, valid_robust_acc
Exemplo n.º 15
0
class SkyblockEvents(commands.Cog, name='Skyblock'):
    """
    General commands for skyblock
    """

    emoji = '🏝️'

    def __init__(self, bot):
        self.bot = bot
        self.guilds_db = bot.db['guilds']

        self.scheduler = Scheduler(self.__class__.__name__)
        self.event_schedules = {}

    def cog_unload(self):
        """
        Cancel scheduled event tasks.
        """
        self.scheduler.cancel_all()

    @commands.group(invoke_without_command=True)
    async def events(self, ctx):
        """
        Commands for skyblock events.
        """
        await ctx.send_help(ctx.command)

    @events.command()
    @checks.is_guild_admin()
    @commands.max_concurrency(1, per=commands.BucketType.guild, wait=False)
    async def setup(self, ctx):
        """
        Config skyblock events alert for your server.
        """
        guild_config = await get_guild_config(self.guilds_db, ctx=ctx)

        page = EventPages(ctx, guild_config, self.guilds_db)
        await page.paginate()

    @events.group(invoke_without_command=True)
    @checks.is_sbs_admin()
    async def schedule(self, ctx):
        """
        Commands for scheduling skyblock events.
        """
        await ctx.send_help(ctx.command)

    @schedule.command()
    @checks.is_sbs_admin()
    async def start(self, ctx, *, event_name):
        """
        Enter <event name> to start that event schedule or `all` to start all.
        """
        _text = ''
        _found = False
        for event in SKYBLOCK_EVENTS.keys():
            if event_name.lower() in (SKYBLOCK_EVENTS[event]['name'].lower(),
                                      'all', event.lower()):
                _found = True
                _text += await self.schedule_event(event)

        if not _found:
            await ctx.send(
                f'{ctx.author.mention}, Failed to start {event_name}.')
        else:
            await ctx.send(f'{ctx.author.mention}\n{_text}')

    @schedule.command()
    @checks.is_sbs_admin()
    async def stop(self, ctx, *, event_name):
        """ 
        Enter <event name> to stop that event schedule or `all` to stop all.
        """
        _text = ''
        _found = False
        for event in SKYBLOCK_EVENTS.keys():
            if event_name.lower() in (SKYBLOCK_EVENTS[event]['name'].lower(),
                                      'all', event.lower()):
                _found = True
                _text += await self.delete_event_schedule(event)

        if not _found:
            await ctx.send(
                f'{ctx.author.mention}, Failed to start {event_name}.')
        else:
            await ctx.send(f'{ctx.author.mention}\n{_text}')

    @schedule.command()
    @checks.is_sbs_admin()
    async def status(self, ctx):
        """
        Check all the skyblock event schedules status.
        """
        embed = Embed(
            ctx=ctx,
            title='Current event schedules status',
            description=
            f'There are currently {len(self.event_schedules)} event schedules. (EST time)'
        )

        for event in self.event_schedules.values():
            estimate = datetime.fromtimestamp(
                event["estimate"] / 1000.0).astimezone(EST).strftime(
                    datetime_fmt).lstrip('0').replace(' 0', ' ')
            embed.add_field(
                name=f'{SKYBLOCK_EVENTS[event["name"]]["name"]} Status',
                value=f'Current estimate > {estimate}\n'
                f'Next schedule > {event["time"].astimezone(EST).strftime(datetime_fmt).lstrip("0").replace(" 0", " ")}\n'
                f'Next schedule type > {event["type"]}',
                inline=False)

        await embed.send()

    def _schedule_event_task(self, event_data):
        """
        Schedule an event.
        """
        event_task_id = event_data['task_id']
        event_datetime = event_data['time']

        self.event_schedules[event_task_id] = event_data

        coroutine = self.send_event_alert(event_data) if event_data[
            'type'] == 'alert' else self.get_event_estimate(event_data)
        self.scheduler.schedule_at(event_datetime, event_task_id, coroutine)

    async def send_event_alert(self, event_data):
        """
        Send the event alert 5 mins before starting and schedule tasks to get new estimate time.
        """
        del self.event_schedules[event_data['task_id']]

        event = event_data['name']
        _howlong = round(
            ((event_data['estimate'] - current_milli_time()) / 1000.0) / 60.0)
        _when = datetime.fromtimestamp(
            event_data["estimate"] /
            1000.0).astimezone(EST).strftime(time_fmt).lstrip('0').replace(
                ' 0', ' ')
        embed = Embed(
            title=f'{SKYBLOCK_EVENTS[event]["name"]} Alert',
            description=
            f'The event is starting in {_howlong} minutes at {_when} EST.')

        async for guild in self.guilds_db.find({
                'events.default_enabled': True,
                f'events.{event}.enabled': True,
                'global_blacklisted': False,
                'global_blacklisted_commands': {
                    '$ne': 'events'
                }
        }):
            self.bot.loop.create_task(self._event_alert(guild, event, embed))

        # Calculate time when to get new estimate time. (20 min after event happened)
        time = datetime.fromtimestamp((event_data['estimate'] / 1000.0) + 1200)

        # Schedule new task to get new estimate
        event_data['task_id'] = id(time)
        event_data['type'] = 'get_estimate'
        event_data['time'] = time

        self._schedule_event_task(event_data)

    async def get_event_estimate(self, event_data):
        """
        Get new event estimate time and schedule tasks to alert the event.
        """
        del self.event_schedules[event_data['task_id']]

        event = event_data['name']
        estimate = await get_event_estimate_time(
            SKYBLOCK_EVENTS[event]['endpoint'], session=self.bot.http_session)
        if estimate is None or estimate < (current_milli_time() +
                                           (300 * 1000)):
            time = (current_milli_time() / 1000.0) + 600
            time = datetime.fromtimestamp(time)

            # Reschedule in 10 mins to get new estimate
            event_data['task_id'] = id(time)
            event_data['time'] = time

            self._schedule_event_task(event_data)
        else:
            time = datetime.fromtimestamp((estimate / 1000.0) - 300.0)

            # Schedule new event alert
            event_data['task_id'] = id(time)
            event_data['type'] = 'alert'
            event_data['estimate'] = estimate
            event_data['time'] = time

            self._schedule_event_task(event_data)

    async def delete_event_schedule(self, event):
        """
        Delete an event schedule given its name and cancel the running task.
        """
        for event_schedule in self.event_schedules.values():
            if event_schedule['name'] == event:
                del self.event_schedules[event_schedule['task_id']]
                self.scheduler.cancel(event_schedule['task_id'])

                return f'{SKYBLOCK_EVENTS[event]["name"]} has been successfully stopped.\n'
        return f'{SKYBLOCK_EVENTS[event]["name"]} is already stopped.\n'

    async def schedule_event(self, event):
        """
        Schedule an event given its name.
        """
        # Check if event is already started
        if any(event == schedule['name']
               for schedule in self.event_schedules.values()):
            return f'{SKYBLOCK_EVENTS[event]["name"]} is already running.\n'

        estimate = await get_event_estimate_time(
            SKYBLOCK_EVENTS[event]['endpoint'], session=self.bot.http_session)
        if estimate is None or estimate < (current_milli_time() +
                                           (300 * 1000)):
            time = (current_milli_time() / 1000.0) + 1200
            time = datetime.fromtimestamp(time)

            # Schedule in 20 mins to get new estimate
            event_data = {
                'name': event,
                'task_id': id(time),
                'type': 'get_estimate',
                'estimate': estimate,
                'time': time
            }

            self._schedule_event_task(event_data)
        else:
            # Calculate when to alert event (5 mins before event starts)
            time = datetime.fromtimestamp((estimate / 1000.0) - 300.0)

            event_data = {
                'name': event,
                'task_id': id(time),
                'type': 'alert',
                'estimate': estimate,
                'time': time
            }
            self._schedule_event_task(event_data)

        return f'{SKYBLOCK_EVENTS[event]["name"]} has been successfully started.\n'

    async def _event_alert(self, guild_config, event, embed):
        if guild_config['events'][event]['webhook_data'] is not None:
            splitted_webhook_data = guild_config['events'][event][
                'webhook_data'].split('/')
        else:
            splitted_webhook_data = guild_config['events'][
                'default_webhook_data'].split('/')

        # Get webhook to alert to
        webhook = Webhook.partial(splitted_webhook_data[0],
                                  splitted_webhook_data[1],
                                  adapter=AsyncWebhookAdapter(
                                      self.bot.http_session))

        embed.timestamp = datetime.now()
        embed.set_footer(text='Skyblock Simplified',
                         icon_url='https://i.imgur.com/V7ENVHr.png')

        mention_id = guild_config['events'][event]['mention_id']
        if mention_id is None:
            mention_id = guild_config['events']['default_mention_id']

        # Send webhook embed
        try:
            await webhook.send(
                content=f'<@&{mention_id}>' if mention_id else '',
                embed=embed,
                username=f'Skyblock Event Alert',
                avatar_url='https://i.imgur.com/Fhx03E7.png')
        except Exception:
            await self._handle_failed_webhook_send(guild_config, event,
                                                   int(guild_config['_id']))

    async def _handle_failed_webhook_send(self, guild_config, event, guild_id):
        # Try to send message to owner that it's failed
        try:
            guild = self.bot.get_guild(guild_id)
            owner = guild.owner

            await owner.send(
                f'{owner.mention}\nFailed to send {SKYBLOCK_EVENTS[event]["name"]} to the configurated channel.\n'
                f'This may be due to not enough permission or someone deleted the webhook.\n'
                f'Please configure again with `sbs events setup`.')
        except Exception:
            pass

        # Set enabled to false, webhook/channel to none
        await self.guilds_db.update_one({'_id': guild_config['_id']}, {
            '$set': {
                f'events.{event}.enabled': False,
                f'events.{event}.webhook_data': None
            }
        })
Exemplo n.º 16
0
Arquivo: main.py Projeto: eth-sri/ACE
def run(args=None):
    device = 'cuda' if torch.cuda.is_available() and (
        not args.no_cuda) else 'cpu'
    num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(
        args)

    lossFn = nn.CrossEntropyLoss(reduction='none')
    evalFn = lambda x: torch.max(x, dim=1)[1]

    net = get_net(device,
                  args.dataset,
                  args.net,
                  input_size,
                  input_channel,
                  n_class,
                  load_model=args.load_model,
                  net_dim=args.cert_net_dim
                  )  #, feature_extract=args.core_feature_extract)

    timestamp = int(time.time())
    model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name,
                                               args.exp_id, args.net,
                                               args.train_eps, timestamp)
    model_dir = args.root_dir + 'models_new/%s' % (model_signature)
    args.model_dir = model_dir
    count_vars(args, net)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if isinstance(net, UpscaleNet):
        relaxed_net = None
        relu_ids = None
    else:
        relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device)
        relu_ids = relaxed_net.get_relu_ids()

    if "nat" in args.train_mode:
        cnet = CombinedNetwork(net,
                               relaxed_net,
                               lossFn=lossFn,
                               evalFn=evalFn,
                               device=device,
                               no_r_net=True).to(device)
    else:
        dummy_input = torch.rand((1, ) + net.dims[0],
                                 device=device,
                                 dtype=torch.float32)
        cnet = CombinedNetwork(net,
                               relaxed_net,
                               lossFn=lossFn,
                               evalFn=evalFn,
                               device=device,
                               dummy_input=dummy_input).to(device)

    n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None

    if 'train' in args.train_mode:
        tb_writer = SummaryWriter(model_dir)
        stats = Statistics(len(train_loader), tb_writer, model_dir)
        args_file = os.path.join(model_dir, 'args.json')
        with open(args_file, 'w') as fou:
            json.dump(vars(args), fou, indent=4)
        write_config(args, os.path.join(model_dir, 'run_config.txt'))

        eps = 0
        epoch = 0
        lr = args.lr
        n_epochs = args.n_epochs

        if "COLT" in args.train_mode:
            relu_stable = args.relu_stable
            # if args.layers is None:
            #     args.layers = [-2, -1] + relu_ids
            layers = get_layers(args.train_mode,
                                cnet,
                                n_attack_layers=args.n_attack_layers,
                                protected_layers=args.protected_layers)
        elif "adv" in args.train_mode:
            relu_stable = None
            layers = [-1, -1]
            args.mix = False
        elif "natural" in args.train_mode:
            relu_stable = None
            layers = [-2, -2]
            args.nat_factor = 1
            args.mix = False
        elif "diffAI" in args.train_mode:
            relu_stable = None
            layers = [-2, -2]
        else:
            assert False, "Unknown train mode %s" % args.train_mode

        print('Saving model to:', model_dir)
        print('Training layers: ', layers)

        for j in range(len(layers) - 1):
            opt, lr_scheduler = get_opt(cnet.net,
                                        args.opt,
                                        lr,
                                        args.lr_step,
                                        args.lr_factor,
                                        args.n_epochs,
                                        train_loader,
                                        args.lr_sched,
                                        fixup="fixup" in args.net)

            curr_layer_idx = layers[j + 1]
            eps_old = eps
            eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j)

            kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0,
                                    num_train * args.mix_epochs, 0)
            beta_sched = Scheduler(
                args.beta_start if args.mix else args.beta_end, args.beta_end,
                args.train_batch * len(train_loader) * args.mix_epochs, 0)
            eps_sched = Scheduler(eps_old if args.anneal else eps, eps,
                                  num_train * args.anneal_epochs, 0)

            layer_dir = '{}/{}'.format(model_dir, curr_layer_idx)
            if not os.path.exists(layer_dir):
                os.makedirs(layer_dir)

            print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'.
                  format(eps, lr, curr_layer_idx))

            for curr_epoch in range(n_epochs):
                train(device,
                      epoch,
                      args,
                      j + 1,
                      layers,
                      cnet,
                      eps_sched,
                      kappa_sched,
                      opt,
                      train_loader,
                      lr_scheduler,
                      relu_ids,
                      stats,
                      relu_stable,
                      relu_stable_protected=args.relu_stable_protected,
                      beta_sched=beta_sched)

                if isinstance(lr_scheduler, optim.lr_scheduler.StepLR
                              ) and curr_epoch >= args.mix_epochs:
                    lr_scheduler.step()

                if (epoch + 1) % args.test_freq == 0:
                    with torch.no_grad():
                        test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test(
                            device,
                            args,
                            cnet,
                            test_loader if args.test_set == "test" else
                            train_loader, [curr_layer_idx],
                            stats=stats,
                            log_ind=(epoch + 1) % n_epochs == 0)

                if (epoch + 1) % args.test_freq == 0 or (epoch +
                                                         1) % n_epochs == 0:
                    torch.save(
                        net.state_dict(),
                        os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1)))
                    torch.save(
                        opt.state_dict(),
                        os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1)))

                stats.update_tb(epoch)
                epoch += 1
            relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec
            lr = lr * args.lr_layer_dec
        if args.cert:
            with torch.no_grad():
                diffAI_cert(
                    device,
                    args,
                    cnet,
                    test_loader if args.test_set == "test" else train_loader,
                    stats=stats,
                    log_ind=True,
                    epoch=epoch,
                    domains=args.cert_domain)
    elif args.train_mode == 'print':
        print('printing network to:', args.out_net_file)
        dummy_input = torch.randn(1,
                                  input_channel,
                                  input_size,
                                  input_size,
                                  device='cuda')
        net.skip_norm = True
        torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True)
    elif args.train_mode == 'test':
        with torch.no_grad():
            test(device,
                 args,
                 cnet,
                 test_loader if args.test_set == "test" else train_loader,
                 [-1],
                 log_ind=True)
    elif args.train_mode == "cert":
        tb_writer = SummaryWriter(model_dir)
        stats = Statistics(len(train_loader), tb_writer, model_dir)
        args_file = os.path.join(model_dir, 'args.json')
        with open(args_file, 'w') as fou:
            json.dump(vars(args), fou, indent=4)
        write_config(args, os.path.join(model_dir, 'run_config.txt'))
        print('Saving results to:', model_dir)
        with torch.no_grad():
            diffAI_cert(
                device,
                args,
                cnet,
                test_loader if args.test_set == "test" else train_loader,
                stats=stats,
                log_ind=True,
                domains=args.cert_domain)
        exit(0)
    else:
        assert False, 'Unknown mode: {}!'.format(args.train_mode)

    return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc
Exemplo n.º 17
0
def main():
    # Create test env
    print("Creating test environment")
    test_env = gym.make(env_name)

    # Traning parameters
    lr_scheduler = Scheduler(initial_value=3e-4, interval=1000,
                             decay_factor=1)  #0.75)
    std_scheduler = Scheduler(initial_value=2.0,
                              interval=1000,
                              decay_factor=0.75)
    discount_factor = 0.99
    gae_lambda = 0.95
    ppo_epsilon = 0.2
    t_max = 10  #180
    num_epochs = 10
    batch_size = 40  #64
    save_interval = 500
    eval_interval = 100
    training = True

    # Environment constants
    frame_stack_size = 4
    input_shape = (84, 84, frame_stack_size)
    num_actions = 1  #envs.action_space.shape[0]
    action_min = np.array([-1.0])  #np.array([-1.0, 0.0, 0.0])
    action_max = np.array([1.0])  #np.array([ 1.0, 1.0, 1.0])

    # Create model
    print("Creating model")
    model_checkpoint = None  #"./models/CarRacing-v0/run2/episode0_step455000.ckpt"
    model = PPO(num_actions,
                input_shape,
                action_min,
                action_max,
                ppo_epsilon,
                value_scale=0.5,
                entropy_scale=0.0001,
                model_checkpoint=model_checkpoint,
                model_name="CarRacing-v0")

    if training:
        print("Creating environments")
        num_envs = 4
        envs = SubprocVecEnv([make_env for _ in range(num_envs)])

        initial_frames = envs.reset()
        initial_frames = envs.get_images()
        frame_stacks = [
            FrameStack(initial_frames[i], preprocess_fn=preprocess_frame)
            for i in range(num_envs)
        ]

        print("Main loop")
        step = 0
        while training:
            # While there are running environments
            print("Training...")
            states, taken_actions, values, rewards, dones = [], [], [], [], []
            learning_rate = np.maximum(lr_scheduler.get_value(), 1e-6)
            std = np.maximum(std_scheduler.get_value(), 0.2)

            # Simulate game for some number of steps
            for _ in range(t_max):
                # Predict and value action given state
                # π(a_t | s_t; θ_old)
                states_t = [
                    frame_stacks[i].get_state() for i in range(num_envs)
                ]
                actions_t, values_t = model.predict(states_t,
                                                    use_old_policy=True,
                                                    std=std)
                for i in range(num_envs):
                    actions_t[i] = 0 if actions_t[i] < 0 else 1
                actions_t = np.squeeze(actions_t.astype(np.int32), axis=-1)

                # Sample action from a Gaussian distribution
                envs.step_async(actions_t)
                frames, rewards_t, dones_t, infos = envs.step_wait()
                frames = envs.get_images()  # render

                # Store state, action and reward
                states.append(states_t)  # [T, N, 84, 84, 1]
                taken_actions.append(actions_t)  # [T, N, 3]
                values.append(np.squeeze(values_t, axis=-1))  # [T, N]
                rewards.append(rewards_t)  # [T, N]
                dones.append(dones_t)  # [T, N]

                # Get new state
                for i in range(num_envs):
                    frame_stacks[i].add_frame(frames[i])

            # Calculate last values (bootstrap values)
            states_last = [
                frame_stacks[i].get_state() for i in range(num_envs)
            ]
            last_values = np.squeeze(model.predict(states_last)[-1],
                                     axis=-1)  # [N]

            # Compute returns
            returns = compute_returns(rewards, last_values, dones,
                                      discount_factor)

            # Compute advantages
            advantages = compute_gae(rewards, values, last_values, dones,
                                     discount_factor, gae_lambda)

            # Normalize advantages
            advantages = (advantages -
                          np.mean(advantages)) / np.std(advantages)

            # Flatten arrays
            states = np.array(states).reshape(
                (-1, *input_shape))  # [T x N, 84, 84, 1]
            taken_actions = np.array(taken_actions).reshape(
                (-1, num_actions))  # [T x N, 3]
            returns = returns.flatten()  # [T x N]
            advantages = advantages.flatten()  # [T X N]

            # Train for some number of epochs
            model.update_old_policy()  # θ_old <- θ
            for _ in range(num_epochs):
                # Sample mini-batch randomly and train
                mb_idx = np.random.choice(len(states),
                                          batch_size,
                                          replace=False)

                # Optimize network
                model.train(states[mb_idx],
                            taken_actions[mb_idx],
                            returns[mb_idx],
                            advantages[mb_idx],
                            learning_rate=learning_rate,
                            std=std)

            # Reset environment's frame stack if done
            for i, done in enumerate(dones_t):
                if done:
                    frame_stacks[i].add_frame(frames[i])

            # Save model
            step += 1
            if step % save_interval == 0:
                model.save()
            if step % eval_interval == 0:
                avg_reward = evaluate(model, test_env, 10)
                model.write_to_summary("eval_avg_reward", avg_reward)

    # Training complete, evaluate model
    avg_reward = evaluate(model, test_env, 10)
    print("Model achieved a final reward of:", avg_reward)
Exemplo n.º 18
0
Arquivo: test.py Projeto: hvcl/ColorRL
def test_func(args,
              shared_model,
              env_conf,
              datasets=None,
              tests=None,
              shared_dict=None):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        log = {}

        logger = Logger(args.log_dir)

        create_dir(args.log_dir + "models/")
        create_dir(args.log_dir + "tifs/")
        create_dir(args.log_dir + "tifs_test/")

        os.system("cp *.py " + args.log_dir)
        os.system("cp *.sh " + args.log_dir)
        os.system("cp models/*.py " + args.log_dir + "models/")

        setup_logger('{}_log'.format(args.env),
                     r'{0}{1}_log'.format(args.log_dir, args.env))
        log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
            args.env))
        d_args = vars(args)
        env_conf_log = env_conf

    if tests is not None:
        if args.testlbl:
            test_env = EM_env(tests[0],
                              env_conf,
                              type="test",
                              gt_lbl_list=tests[1])
        else:
            test_env = EM_env(tests[0], env_conf, type="test")

    if not args.deploy:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))
        for k in env_conf_log.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw_list, gt_lbl_list = datasets
    env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             gpu_id=gpu_id,
                             multi=args.multi)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_FgBgDice = ScalaTracker(100)
    recent_bestDice = ScalaTracker(100)
    recent_diffFG = ScalaTracker(100)

    recent_MUCov = ScalaTracker(100)
    recent_MWCov = ScalaTracker(100)
    recent_AvgFP = ScalaTracker(100)
    recent_AvgFN = ScalaTracker(100)

    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    # ----------------------------------------- Deploy / Inference -----------------------------------------
    if args.deploy:
        with torch.cuda.device(gpu_id):
            player.model.load_state_dict(shared_model.state_dict())

        # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0]))
        if len(tests) == 4:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]), tests[3])
        else:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]))

        return
    # ----------------------------------------- End Deploy / Inference -----------------------------------------

    merge_ratios = []
    split_ratios = []

    if args.wctrl == "s2m":
        schedule = args.wctrl_schedule

        delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 *
                                                                 len(schedule))

        mer_w_delta = delta
        mer_w_var = shared_dict['mer_w']
        mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta)

        split_delta = -delta / len(args.out_radius)
        split_var = shared_dict['spl_w'] / len(args.out_radius)
        spl_w_scheduler = Scheduler(split_var, schedule, split_delta)

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                if tests is not None and not args.DEBUG:
                    inference(args, logger, player.model, tests[0], test_env,
                              gpu_id, player.env.rng, num_tests)

                if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0):
                    bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate(
                        args, player.env)

                    recent_FgBgDice.push(FgBgDice)
                    recent_diffFG.push(abs(diffFG))
                    recent_bestDice.push(bestDice)

                    recent_MWCov.push(MWCov)
                    recent_MUCov.push(MUCov)
                    recent_AvgFP.push(AvgFP)
                    recent_AvgFN.push(AvgFN)

                    recent_rand_i.push(rand_i)

                    log_info = {
                        "bestDice": recent_bestDice.mean(),
                        "FgBgDice": recent_FgBgDice.mean(),
                        "diffFG": recent_diffFG.mean(),
                        "MWCov": recent_MWCov.mean(),
                        "MUCov": recent_MUCov.mean(),
                        "AvgFP": recent_AvgFP.mean(),
                        "AvgFN": recent_AvgFN.mean(),
                        "rand_i": recent_rand_i.mean()
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)
                else:
                    bestDice, FgBgDice, diffFG = 0, 0, 0
                    MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0
                    rand_i = 0

                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:",
                      diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:",
                      AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i)
                # print ("mean bestDice")
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                print("#gt_values:", len(np.unique(player.env.gt_lbl)))
                print("values:")
                values = player.env.unique()
                print(np.concatenate([values[0][None], values[1][None]], 0))
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist[::-1], 0)

                if not "3D" in args.data:
                    for i in range(3):
                        player.probs.insert(0, np.zeros_like(player.probs[0]))
                    while (len(player.probs) - 3 < args.max_episode_length):
                        player.probs.append(np.zeros_like(player.probs[0]))

                    probslist = [
                        np.repeat(np.expand_dims(prob, -1), 3, -1)
                        for prob in player.probs
                    ]
                    probslist = np.concatenate(probslist, 1)
                    probslist = (probslist * 256).astype(np.uint8, copy=False)
                    # log_img = renderlist [-1]
                    print(probslist.shape, log_img.shape)
                    log_img = np.concatenate([probslist, log_img], 0)

                log_info = {"valid_sample": log_img}

                print(log_img.shape)
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_sample.tif",
                    log_img.astype(np.uint8))
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_pred.tif",
                    player.env.lbl.astype(np.uint8))
                io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif",
                          player.env.gt_lbl.astype(np.int32))

                if args.seg_scale:
                    log_info["scaler"] = player.env.scaler

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward':
                        reward_mean,
                        '100_mean_reward':
                        recent_episode_scores.mean(),
                        'split_ratio':
                        player.env.split_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                        'merge_ratio':
                        player.env.merge_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                    }

                    if args.wctrl == 's2m':
                        log_info.update({
                            'mer_w':
                            mer_w_scheduler.value(),
                            'spl_w':
                            spl_w_scheduler.value() * len(args.out_radius),
                        })

                    merge_ratios.append(player.env.merge_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))
                    split_ratios.append(player.env.split_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))

                    print("split ratio: ", np.max(player.env.split_ratio_sum),
                          np.min(player.env.split_ratio_sum))
                    print("merge ratio: ", np.max(player.env.merge_ratio_sum),
                          np.min(player.env.merge_ratio_sum))

                    print("merge ratio: ", merge_ratios)
                    print("split ratio: ", split_ratios)

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            if args.wctrl == "s2m":
                shared_dict["spl_w"] = spl_w_scheduler.next()
                shared_dict["mer_w"] = mer_w_scheduler.next()
                player.env.config["spl_w"] = shared_dict["spl_w"]
                player.env.config["mer_w"] = shared_dict["mer_w"]

            player.clear_actions()
            state = player.env.reset(player.model, gpu_id)
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
Exemplo n.º 19
0
class RayWorkerManager(AbstractWorkerManager):
    """
    Adapts Dragonfly's "workers" that execute a function at a point to use Ray's actors
    """
    def __init__(self,
                 max_pending=None,
                 default_func_caller=None,
                 cpu_only=False):
        num_gpus_available = len(
            tf.config.experimental.list_physical_devices('GPU'))

        if max_pending is None:
            max_pending = 4 if cpu_only else max(1, num_gpus_available)
        super().__init__(max_pending)

        worker_resources = {
            "num_gpus":
            0 if debug_mode() or cpu_only or num_gpus_available == 0 else 1,
        }

        self.worker_cls = ray.remote(**worker_resources)(Worker)

        if not ray.is_initialized():
            ray.init(local_mode=debug_mode(), ignore_reinit_error=True)

        self.max_pending = max_pending
        self.default_func_caller = default_func_caller
        if self.default_func_caller:
            self.caller_handle = ray.put(default_func_caller)
        self.scheduler = Scheduler(self.worker_cls.remote()
                                   for _ in range(max_pending))
        self.last_receive_time = 0

    def _child_reset(self):
        pass

    def close_all_queries(self):
        pass

    def a_worker_is_free(self, force_await=False):
        if not self.scheduler.has_a_free_worker() or force_await:
            qinfo = self.scheduler.await_any()
            if not hasattr(qinfo, 'true_val'):
                qinfo.true_val = qinfo.val

            if hasattr(
                    qinfo,
                    'caller_eval_cost') and qinfo.caller_eval_cost is not None:
                qinfo.eval_time = qinfo.caller_eval_cost
            else:
                qinfo.eval_time = 1.0
            qinfo.receive_time = qinfo.send_time + qinfo.eval_time
            qinfo.worker_id = 0
            self.last_receive_time = qinfo.receive_time

            self.latest_results.append(qinfo)

        return self.last_receive_time

    def all_workers_are_free(self):
        num_pending_tasks = self.scheduler.pending_tasks()
        for _ in range(num_pending_tasks):
            self.a_worker_is_free(force_await=True)
        return self.last_receive_time

    def _dispatch_experiment(self, func_caller, qinfo, **kwargs):
        if func_caller is self.default_func_caller:
            func_caller = self.caller_handle
        self.scheduler.submit(func_caller, qinfo, **kwargs)

    def dispatch_single_experiment(self, func_caller, qinfo, **kwargs):
        self._dispatch_experiment(func_caller, qinfo, **kwargs)

    def dispatch_batch_of_experiments(self, func_caller, qinfos, **kwargs):
        for qinfo in qinfos:
            self.dispatch_single_experiment(func_caller, qinfo, **kwargs)

    def get_time_distro_info(self):
        return 'caller_eval_cost'

    def get_poll_time_real(self):
        return 5.0