Exemplo n.º 1
0
def main(config):
    with SummaryWriter(
            comment='_{}_{}'.format(config.arch, config.dataset)) as writer:
        dataset_config = datasets.cifar10(
        ) if config.dataset == 'cifar10' else datasets.cifar100()
        num_classes = dataset_config.pop('num_classes')
        train_loader, eval_loader = create_data_loaders(**dataset_config,
                                                        config=config)

        dummy_input = (torch.randn(10, 3, 32, 32), )
        net = arch[config.arch](num_classes)
        writer.add_graph(net, dummy_input)

        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        criterion = create_loss_fn(config)
        if config.is_parallel:
            net = torch.nn.DataParallel(net).to(device)
        else:
            device = 'cuda:{}'.format(
                config.gpu) if torch.cuda.is_available() else 'cpu'
            net = net.to(device)
        optimizer = create_optim(net.parameters(), config)
        scheduler = create_lr_scheduler(optimizer, config)

        trainer = Trainer.PseudoLabel(net, optimizer, criterion, device,
                                      config, writer)
        trainer.loop(config.epochs,
                     train_loader,
                     eval_loader,
                     scheduler=scheduler,
                     print_freq=config.print_freq)
Exemplo n.º 2
0
def train_mdn_with_proposal(save=True):
    """Use the prior proposal learnt by bootstrapping to train a brand new mdn."""

    # load prior proposal and observations
    _, obs_stats = helper.load(datadir + 'observed_data.pkl')
    net, _, prior_proposal, _ = helper.load(netsdir + 'mdn_svi_proposal_prior_{0}.pkl'.format(n_bootstrap_iter-1))

    n_inputs = n_percentiles
    n_outputs = 3
    n_samples = 5000

    # generate data
    ps = np.empty([n_samples, n_outputs])
    stats = np.empty([n_samples, n_inputs])

    for i in xrange(n_samples):
            prior = 0.0
            while prior < 0.5:
                ps[i] = prior_proposal.gen()[0]
                prior = eval_prior(*ps[i])
            _, _, _, idts, _ = sim_likelihood(*ps[i])
            stats[i] = calc_summary_stats(idts)

    # train an mdn to give the posterior
    minibatch = 100
    maxiter = int(10000 * n_samples / minibatch)
    monitor_every = 1000
    net = mdn.replicate_gaussian_mdn(net, 8)
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.1)
    trainer = Trainer.Trainer(
        model=net,
        trn_data=[stats, ps],
        trn_loss=net.mlprob + regularizer / n_samples,
        trn_target=net.y
    )
    trainer.train(
        maxiter=maxiter,
        minibatch=minibatch,
        show_progress=True,
        monitor_every=monitor_every
    )

    # calculate the approximate posterior
    mdn_mog = net.get_mog(obs_stats)
    mdn_mog.prune_negligible_components(1.0e-6)
    approx_posterior = mdn_mog / prior_proposal

    # save the net
    if save:
        filename = netsdir + 'mdn_svi_proposal_hiddens_50_tanh_comps_8_sims_5k.pkl'
        helper.save((net, approx_posterior), filename)
    def run(self):
        best_err1 = 100.
        best_epoch = 0

        logger.info('==> creating model "{}"'.format(args.model_name))
        model = Util.getModel(**vars(args))

        model = model.to(DEVICE)
        # 大部分情况下,设置这个flag可以让内置的cuDNN的auto - tuner自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题。
        cudnn.benchmark = True
        # define loss function (criterion) and pptimizer
        # criterion = nn.CrossEntropyLoss().to(DEVICE)
        # 标签平滑
        criterion = LabelSmoothingLoss(classes=self.args.num_classes, smoothing=0.2)

        # define optimizer
        optimizer = Util.getOptimizer(model=model, args=self.args)

        trainer = Trainer(dataset=self.dataset, criterion=criterion, optimizer=optimizer, args=self.args, logger=logger)
        logger.info('train: {} test: {}'.format(self.dataset.get_train_length(), self.dataset.get_validation_length()))
        for epoch in range(0, self.args.EPOCHS):
            # train for one epoch
            model = trainer.train(model=model, epoch=epoch)

            # evaluate on validation set
            model, val_loss, val_err1 = trainer.test(model=model, epoch=epoch)

            # remember best err@1 and save checkpoint
            is_best = val_err1 < best_err1
            if is_best:
                best_err1 = val_err1
                best_epoch = epoch
                logger.info('Best var_err1 {}'.format(best_err1))
            Util.save_checkpoint(model.state_dict(), is_best, args.output_models_dir)
            if not is_best and epoch - best_epoch >= args.patience > 0:
                break

        logger.info('Best val_err1: {:.4f} at epoch {}'.format(best_err1, best_epoch))
Exemplo n.º 4
0
def train_mdn_with_proposal(save=True):
    """Use the prior proposal learnt by bootstrapping to train an mdn."""

    # load prior proposal and observations
    _, x, obs_data = helper.load(datadir + 'observed_data.pkl')
    net, _, prior_proposal, _ = helper.load(
        netsdir +
        'mdn_svi_proposal_prior_{0}.pkl'.format(n_bootstrap_iter - 1))

    n_inputs = n_data
    n_outputs = n_dim
    n_samples = 2000

    # generate data
    ws = np.empty([n_samples, n_outputs])
    data = np.empty([n_samples, n_inputs])

    for i in xrange(n_samples):
        ws[i] = prior_proposal.gen()[0]
        data[i] = gen_y_data(ws[i], x)

    # train an mdn to give the posterior
    minibatch = 100
    maxiter = int(5000 * n_samples / minibatch)
    monitor_every = 1000
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.01)
    trainer = Trainer.Trainer(model=net,
                              trn_data=[data, ws],
                              trn_loss=net.mlprob + regularizer / n_samples,
                              trn_target=net.y)
    trainer.train(maxiter=maxiter,
                  minibatch=minibatch,
                  show_progress=True,
                  monitor_every=monitor_every)

    # calculate the approximate posterior
    mdn_mog = net.get_mog(obs_data)
    approx_posterior = (mdn_mog * get_prior()) / prior_proposal

    # save the net
    if save:
        filename = netsdir + 'mdn_svi_proposal_hiddens_50_tanh.pkl'
        helper.save((net, approx_posterior), filename)
Exemplo n.º 5
0
def train_mdn_on_sims_from_prior(save=True):
    """
    Loads simulations done on parameters sampled from the prior, and trains an mdn on them.
    """

    # read data
    params, stats, _ = load_sims_from_prior()
    n_data = 10 ** 5
    params, stats = params[:n_data], stats[:n_data]

    # split data into train and validation sets
    trn_perc = 0.95
    n_trn_data = int(trn_perc * n_data)
    params_trn, stats_trn = params[:n_trn_data], stats[:n_trn_data]
    params_val, stats_val = params[n_trn_data:], stats[n_trn_data:]

    # train an mdn to give the posterior
    n_components = 1
    minibatch = 100
    maxiter = int(1000 * n_data / minibatch)
    monitor_every = 1000
    net = mdn.MDN(n_inputs=9, n_hiddens=[50, 50], act_fun='tanh', n_outputs=4, n_components=n_components)
    trainer = Trainer.Trainer(
        model=net,
        trn_data=[stats_trn, np.log(params_trn)],
        trn_loss=net.mlprob,
        trn_target=net.y,
        val_data=[stats_val, np.log(params_val)],
        val_loss=net.mlprob,
        val_target=net.y
    )
    trainer.train(
        maxiter=maxiter,
        minibatch=minibatch,
        show_progress=True,
        monitor_every=monitor_every
    )

    # save the net
    if save:
        filename = netsdir + 'mdn_prior_hiddens_50_50_tanh_comps_1_sims_100k.pkl'
        helper.save(net, filename)
Exemplo n.º 6
0
def train_mdn_on_sims_from_prior(save=True):
    """
    Loads simulations done on parameters sampled from the prior, and trains an mdn on them.
    """

    # read data
    ws, data, _ = load_sims_from_prior(n_files=1)
    n_sims = 10**5
    ws, data = ws[:n_sims], data[:n_sims]

    # split data into train and validation sets
    trn_perc = 0.95
    n_trn_data = int(trn_perc * n_sims)
    ws_trn, data_trn = ws[:n_trn_data], data[:n_trn_data]
    ws_val, data_val = ws[n_trn_data:], data[n_trn_data:]

    # train an mdn to give the posterior
    minibatch = 100
    maxiter = int(1000 * n_trn_data / minibatch)
    monitor_every = 1000
    net = mdn.MDN(n_inputs=data.shape[1],
                  n_hiddens=[50],
                  act_fun='tanh',
                  n_outputs=n_dim,
                  n_components=1)
    trainer = Trainer.Trainer(model=net,
                              trn_data=[data_trn, ws_trn],
                              trn_loss=net.mlprob,
                              trn_target=net.y,
                              val_data=[data_val, ws_val],
                              val_loss=net.mlprob,
                              val_target=net.y)
    trainer.train(maxiter=maxiter,
                  minibatch=minibatch,
                  show_progress=True,
                  monitor_every=monitor_every)

    # save the net
    if save:
        filename = netsdir + 'mdn_prior_hiddens_50_tanh_sims_100k.pkl'
        helper.save(net, filename)
model = DDPG(
    'MlpPolicy',
    env,
    action_noise=action_noise,
    verbose=1,
    tensorboard_log="./h={}/".format(horizons[rank]),
    gamma=0.99,
    learning_rate=0.0003,
)
# model = DDPG.load("Model_DDPG_FS_30.zip")
# model.learning_rate = 0.0003
# model.gamma = 0.99
# action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions))
# model.action_noise = action_noise
trainer = Trainer(env)
trainer.retrain_rl(model,
                   episodes=20000,
                   path="./h={}/".format(horizons[rank]))

# ## Training on horizon observations
# env = HorizonObservationWrapper(gym.make("reference_environment:reference-environment-v0"),
#                               horizon_length=horizons[rank],
#                               transform_name="Standard")
# trainer = Trainer(env)
# trainer.train_rl(models_to_train=1, episodes_per_model=20000, path='./h={}/'.format(horizons[rank]))

# ## Testing random action wrapper
# env = JoesActionWrapper(gym.make("reference_environment:reference-environment-v0"))
# trainer = Trainer(env)
# trainer.train_rl(models_to_train=1, episodes_per_model=20000)
Exemplo n.º 8
0
def train_mdn_proposal_prior(save=True):
    """Trains an svi mdn to return the proposal prior with boostrapping."""

    n_iterations = n_bootstrap_iter
    n_samples = 200

    true_w, x, y = helper.load(datadir + 'observed_data.pkl')
    obs_data = y

    # create an mdn
    n_inputs = obs_data.size
    net = mdn.MDN_SVI(n_inputs=n_inputs,
                      n_hiddens=[50],
                      act_fun='tanh',
                      n_outputs=n_dim,
                      n_components=1)
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.01)
    prior = get_prior()
    prior_proposal = prior

    for iter in xrange(n_iterations):

        # generate new data
        ws = np.empty([n_samples, n_dim])
        data = np.empty([n_samples, n_inputs])
        dist = np.empty(n_samples)

        for i in xrange(n_samples):

            w = prior_proposal.gen()[0]
            y = gen_y_data(w, x)
            this_data = y

            ws[i] = w
            data[i] = this_data
            dist[i] = calc_dist(this_data, obs_data)

            print 'simulation {0}, distance = {1}'.format(i, dist[i])

        # plot distance histogram
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(dist, bins=int(np.sqrt(n_samples)))
        ax.set_title('iteration = {0}'.format(iter + 1))
        ax.set_xlim([0.0, 20.0])
        plt.show(block=False)

        # train an mdn to give the posterior
        minibatch = 50
        maxiter = int(1000 * n_samples / minibatch)
        monitor_every = 10
        trainer = Trainer.Trainer(model=net,
                                  trn_data=[data, ws],
                                  trn_loss=net.mlprob +
                                  regularizer / n_samples,
                                  trn_target=net.y)
        trainer.train(maxiter=maxiter,
                      minibatch=minibatch,
                      show_progress=True,
                      monitor_every=monitor_every)

        # calculate the approximate posterior
        mdn_mog = net.get_mog(obs_data, n_samples=None)
        approx_posterior = (mdn_mog * prior) / prior_proposal
        prior_proposal = approx_posterior.project_to_gaussian()

        # save the net and the approximate posterior
        if save:
            helper.save(
                (net, approx_posterior, prior_proposal, dist),
                netsdir + 'mdn_svi_proposal_prior_{0}.pkl'.format(iter))
Exemplo n.º 9
0
from models import ConvModel


dataset = KPIDataset(
    '../data/train_preprocessed.csv',
    seq_length=1001,
    step_width=1
)

model = ConvModel(1001)

args = {
    "lr": 0.5e-4,
    "betas": (0.9, 0.999),
    "eps": 1e-8,
    "weight_decay": 0.0
}

trainer = Trainer(
    model,
    dataset,
    batch_size=512,
    epochs=100,
    log_nth=800,
    validation_size=0.2,
    optim_args=args,
    loss_func=CrossEntropyLoss()
)

trainer.train()
Exemplo n.º 10
0
from util import Trainer
from torchvision import models
from torch.utils.data import random_split, DataLoader

device = "cuda:1"

dataset = CervixDataset(root='./',
                        csv_path="table_label_v2.csv",
                        transform=preprocess)
# print(len(dataset))
trainset, valset = random_split(dataset, [75, 23])
trainloader = DataLoader(trainset, shuffle=True)
valloader = DataLoader(valset, shuffle=True)

model = models.resnet50(pretrained=False)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
data_loader_dict = {'train': trainloader, 'val': valloader}
model.fc = torch.nn.Linear(in_features=2048, out_features=2, bias=True)
model = model.to(device)

trainer = Trainer(model=model,
                  dataloaders=data_loader_dict,
                  criterion=criterion,
                  optimizer=optimizer,
                  device=device,
                  filename="RESNET50_ADAM_Cervix.pth",
                  num_epochs=100)
best_model, val_acc_history = trainer.train_model()
Exemplo n.º 11
0
def train_prior_proposal_with_bootstrapping(save=True):
    """Trains an svi mdn to return the posterior with boostrapping."""

    n_samples = 400

    true_ps, obs_stats = helper.load(datadir + 'observed_data.pkl')

    # create an mdn
    n_inputs = len(obs_stats)
    n_outputs = len(true_ps)
    net = mdn.MDN_SVI(n_inputs=n_inputs, n_hiddens=[50], act_fun='tanh', n_outputs=n_outputs, n_components=1)
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.01)
    prior_proposal = None

    for iter in xrange(n_bootstrap_iter):

        # generate new data
        ps = np.empty([n_samples, n_outputs])
        stats = np.empty([n_samples, n_inputs])
        dist = np.empty(n_samples)

        for i in xrange(n_samples):

            prior = 0.0
            while prior < 0.5:
                ps[i] = sim_prior() if iter == 0 else prior_proposal.gen()[0]
                prior = eval_prior(*ps[i])
            _, _, _, idts, _ = sim_likelihood(*ps[i])
            stats[i] = calc_summary_stats(idts)
            dist[i] = calc_dist(stats[i], obs_stats)

            print 'simulation {0}, distance = {1}'.format(i, dist[i])

        # plot distance histogram
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(dist, bins=int(np.sqrt(n_samples)))
        ax.set_title('iteration = {0}'.format(iter + 1))
        ax.set_xlim([0.0, 1.0])
        plt.show(block=False)

        # train an mdn to give the posterior
        minibatch = 50
        maxiter = int(1500 * n_samples / minibatch)
        monitor_every = 10
        trainer = Trainer.Trainer(
            model=net,
            trn_data=[stats, ps],
            trn_loss=net.mlprob + regularizer / n_samples,
            trn_target=net.y
        )
        trainer.train(
            maxiter=maxiter,
            minibatch=minibatch,
            show_progress=True,
            monitor_every=monitor_every
        )

        # calculate the approximate posterior
        mdn_mog = net.get_mog(obs_stats, n_samples=None)
        approx_posterior = mdn_mog if iter == 0 else mdn_mog / prior_proposal
        prior_proposal = approx_posterior.project_to_gaussian()

        # save the net and the approximate posterior
        if save:
            helper.save((net, approx_posterior, prior_proposal, dist), netsdir + 'mdn_svi_proposal_prior_{0}.pkl'.format(iter))
Exemplo n.º 12
0
def train_mdn_proposal_prior(save=True):
    """
    Train a proposal prior using bootstrapping.
    """

    n_iterations = n_bootstrap_iter
    n_data = 500

    # read data
    pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl')
    obs_stats = helper.load(datadir + 'obs_stats.pkl')
    obs_stats -= pilot_means
    obs_stats /= pilot_stds

    # create an mdn
    net = mdn.MDN_SVI(n_inputs=9, n_hiddens=[50], act_fun='tanh', n_outputs=4, n_components=1)
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.01)
    prior_proposal = None

    for iter in xrange(n_iterations):

        # generate new data
        params = []
        stats = []
        dist = []
        i = 0

        while i < n_data:

            prop_params = sim_prior_params() if iter == 0 else np.exp(prior_proposal.gen())[0]
            if np.any(np.log(prop_params) < log_prior_min) or np.any(np.log(prop_params) > log_prior_max):
                continue
            try:
                lv = mjp.LotkaVolterra(init, prop_params)
                states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
            except mjp.SimTooLongException:
                continue

            sum_stats = calc_summary_stats(states)
            sum_stats -= pilot_means
            sum_stats /= pilot_stds

            params.append(prop_params)
            stats.append(sum_stats)
            dist.append(calc_dist(sum_stats, obs_stats))
            i += 1

            print 'simulation {0}, distance = {1}'.format(i, dist[-1])

        params = np.array(params)
        stats = np.array(stats)
        dist = np.array(dist)

        # plot distance histogram
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(dist, bins=int(np.sqrt(n_data)))
        ax.set_title('iteration = {0}'.format(iter + 1))
        ax.set_xlim([0.0, 12.0])
        plt.show(block=False)

        # train an mdn to give the posterior
        minibatch = 100
        maxiter = int(2000 * n_data / minibatch)
        monitor_every = 100
        trainer = Trainer.Trainer(
            model=net,
            trn_data=[stats, np.log(params)],
            trn_loss=net.mlprob + regularizer / n_data,
            trn_target=net.y
        )
        trainer.train(
            maxiter=maxiter,
            minibatch=minibatch,
            show_progress=True,
            monitor_every=monitor_every
        )

        # calculate the approximate posterior
        mdn_mog = net.get_mog(obs_stats)
        approx_posterior = mdn_mog if iter == 0 else mdn_mog / prior_proposal
        prior_proposal = approx_posterior.project_to_gaussian()

        # save the net and the approximate posterior
        if save:
            helper.save((net, approx_posterior, prior_proposal, dist), netsdir + 'mdn_svi_proposal_prior_{0}.pkl'.format(iter))
Exemplo n.º 13
0
def train_mdn_with_proposal(save=True):
    """Use the prior proposal learnt by bootstrapping to train an mdn."""

    # load prior proposal and observations
    pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl')
    obs_stats = helper.load(datadir + 'obs_stats.pkl')
    obs_stats -= pilot_means
    obs_stats /= pilot_stds
    net, _, prior_proposal, _ = helper.load(netsdir + 'mdn_svi_proposal_prior_{0}.pkl'.format(n_bootstrap_iter-1))

    n_samples = 2000

    # generate data
    params = []
    stats = []
    i = 0

    while i < n_samples:

        prop_params = np.exp(prior_proposal.gen())[0]
        if np.any(np.log(prop_params) < log_prior_min) or np.any(np.log(prop_params) > log_prior_max):
            continue
        try:
            lv = mjp.LotkaVolterra(init, prop_params)
            states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
        except mjp.SimTooLongException:
            continue

        sum_stats = calc_summary_stats(states)
        sum_stats -= pilot_means
        sum_stats /= pilot_stds

        params.append(prop_params)
        stats.append(sum_stats)
        i += 1

    params = np.array(params)
    stats = np.array(stats)

    # train an mdn to give the posterior
    minibatch = 100
    maxiter = int(5000 * n_samples / minibatch)
    monitor_every = 1000
    regularizer = lf.regularizerSvi(net.mps, net.sps, 0.01)
    trainer = Trainer.Trainer(
        model=net,
        trn_data=[stats, np.log(params)],
        trn_loss=net.mlprob + regularizer / n_samples,
        trn_target=net.y
    )
    trainer.train(
        maxiter=maxiter,
        minibatch=minibatch,
        show_progress=True,
        monitor_every=monitor_every
    )

    # calculate the approximate posterior
    mdn_mog = net.get_mog(obs_stats)
    mdn_mog.prune_negligible_components(1.0e-3)
    approx_posterior = mdn_mog / prior_proposal

    # save the net
    if save:
        filename = netsdir + 'mdn_svi_proposal_hiddens_50_tanh_comps_1_sims_2k.pkl'
        helper.save((net, approx_posterior), filename)
Exemplo n.º 14
0
def main(config):
    # SummaryWriter画图用的
    with SummaryWriter(
            comment='_{}_{}'.format(config.arch, config.dataset)) as writer:
        # 选择datasets中的cifar10
        dataset_config = datasets.FPN(
            config) if config.dataset == 'FPN' else datasets.cifar10()
        # dataset_config = datasets.cifar10() if config.dataset == 'cifar10' else datasets.cifar100()
        # num_classes为类别数
        # num_classes = dataset_config.pop('num_classes')
        train_loader, eval_loader = data_loaders(**dataset_config,
                                                 config=config)
        # torch.set_default_dtype(torch.float64)
        dummy_input = torch.randn(1, 1, 200, 200)  # 添加一个模型的图
        #net = dw(FPN_ResNet18())
        net = FPN_ResNet18()
        # net = Fpn_n()
        writer.add_graph(net, dummy_input)
        ###
        # checkpoint = torch.load(config.PATH)
        # net.load_state_dict(checkpoint['weight'])
        ###
        device1 = 'cuda' if torch.cuda.is_available() else 'cpu'
        criterion_l = create_loss_fn(config)
        if config.is_parallel:
            net = torch.nn.DataParallel(net).to(device1)
        else:
            device1 = 'cuda:{}'.format(
                config.gpu) if torch.cuda.is_available() else 'cpu'
            net = net.to(device1)
        optimizer = create_optim(net.parameters(), config)
        if config.train:
            trainer = Trainer.PseudoLabel(net,
                                          optimizer,
                                          criterion_l,
                                          device1,
                                          config,
                                          writer,
                                          save_dir='./model')
            scheduler = create_lr_scheduler(optimizer, config)
            trainer.loop(config.epochs,
                         train_loader,
                         eval_loader,
                         scheduler=scheduler,
                         print_freq=config.print_freq)
        else:
            checkpoint = torch.load(config.PATH)
            net.load_state_dict(checkpoint['weight'])
            trainer = Trainer.PseudoLabel(net,
                                          optimizer,
                                          criterion_l,
                                          device1,
                                          config,
                                          writer,
                                          save_dir='./model')
            trainer.testonce(eval_loader, print_freq=config.print_freq)


# class dw(torch.nn.Module):
#     '''
#     '''
#
#     def __init__(self, model):
#         '''
#         '''
#
#         # initialize the module using super() constructor
#         super(dw, self).__init__()
#         # assign the architectures
#         self.model = model
#         # assign the weights for each task
#         self.weights = torch.nn.Parameter(torch.ones(2).float())
#
#     def forward(self, x):
#         out = self.model(x)
#         return out
Exemplo n.º 15
0
from stable_baselines3.common.callbacks import EvalCallback
env_action = RelativeActionWrapper(gym.make("reference_environment:reference-environment-v0"))
env_horizon = HorizonObservationWrapper(env_action,
                              horizon_length=4,
                              transform_name="Deltas")
env = PhaseRewardWrapper(env_horizon, phase="Full")          # Set Phase to Full
eval_callback = EvalCallback(env, best_model_save_path='./logs/',
                             log_path='./logs/', eval_freq=500,
                             deterministic=True, render=False)


### DDPG Noise
### Try increasing the noise when retraining.
### Try less noise based on the policy plot.
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=1 * np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1, tensorboard_log="./logs",
            gamma=0.99,
            learning_rate=0.0003,
            )
# model = DDPG.load("Model_DDPG_FS_30.zip")
# model.learning_rate = 0.0003
# model.gamma = 0.99
# action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions))
# model.action_noise = action_noise
trainer = Trainer(env)
trainer.retrain_rl(model, episodes=20000)
# Import resnet model
import torch

resnet18 = models.resnet18()
resnet18.fc = torch.nn.Linear(512, 10)
resnet18.eval()

# To see the structure of network, uncomment this line
# print(resnet18)

import time
import copy

device = "cuda:1"

from custom_model.alexresnet import alexresnet

model = alexresnet()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
data_loader_dict = {'train': trainloader, 'val': valloader}

from util import Trainer

trainer = Trainer(model=model,
                  dataloaders=data_loader_dict,
                  criterion=criterion,
                  optimizer=optimizer)
best_model, val_acc_history = trainer.train_model()