def keras2tf(file_path):
    print("[INFO] Freezing model...")

    K.set_learning_phase(0)

    model_file_basename, file_extension = os.path.splitext(os.path.basename(file_path))

    # This needs to be altered depending on the model
    custom_objects = {'tf': tf}

    model = keras.models.load_model(file_path, custom_objects=custom_objects)

    model_input = model.input.name.replace(':0', '')
    model_output = model.output.name.replace(':0', '')

    sess = K.get_session()

    width, height, channels = int(model.input.shape[2]), int(model.input.shape[1]), int(model.input.shape[3])

    freeze(sess, model_file_basename, model_input, width, height, channels, model_output)

    # Remove not important files
    removables = []
    [removables.append(x) for x in glob.glob('*.ckpt*')]
    [removables.append(x) for x in glob.glob('*.binary.pb')]
    [removables.append(x) for x in glob.glob('checkpoint')]
    [removables.append(x) for x in glob.glob('*.sh')]
    for f in removables:
        os.remove(f)
Пример #2
0
    def __init__(self,
                 dimS,
                 nA,
                 action_map: Callable[..., List[int]],
                 gamma=0.99,
                 pi_lr=3e-4,
                 q_lr=3e-4,
                 alpha_lr=3e-4,
                 polyak=1e-3,
                 alpha=0.2,
                 adjust_entropy=False,
                 target_entropy=-6.,
                 hidden1=400,
                 hidden2=300,
                 buffer_size=1000000,
                 batch_size=128,
                 device='cpu',
                 render=False):

        self.dimS = dimS
        self.nA = nA

        self.gamma = gamma
        self.pi_lr = pi_lr
        self.q_lr = q_lr
        self.polyak = polyak

        # attributes for automating entropy adjustment
        self.adjust_entropy = adjust_entropy
        if adjust_entropy:
            self.target_entropy = target_entropy
            self.log_alpha = torch.tensor([0.],
                                          requires_grad=True,
                                          device=device)
            self.alpha_optimizer = Adam([self.log_alpha], lr=alpha_lr)
            self.alpha = torch.exp(self.log_alpha)
        else:
            # if the temperature parameter is not adjusted automatically, we set it to a fixed value
            self.alpha = alpha

        self.batch_size = batch_size
        # networks definition
        # pi : actor network, Q : 2 critic network
        self.pi = SACActor(dimS, nA, hidden1, hidden2).to(device)
        self.Q = DoubleCritic(dimS, nA, hidden1, hidden2).to(device)

        # target networks
        self.target_Q = copy.deepcopy(self.Q).to(device)

        freeze(self.target_Q)
        self.action_map = action_map
        self.buffer = SemiMDPReplayBuffer(dimS, limit=buffer_size)

        self.Q_optimizer = Adam(self.Q.parameters(), lr=self.q_lr)
        self.pi_optimizer = Adam(self.pi.parameters(), lr=self.pi_lr)

        self.device = device
        self.render = render

        return
Пример #3
0
    def __init__(self, gen, disc, g_optim, d_optim, aux_clf, ac_optim, writer,
                 logger, evaluator, cv_loaders, cfg):
        super().__init__(gen, disc, g_optim, d_optim, aux_clf, ac_optim,
                         writer, logger, evaluator, cv_loaders, cfg)

        self.frozen_emb_style = copy.deepcopy(self.gen.emb_style)
        self.frozen_emb_comp = copy.deepcopy(self.gen.emb_comp)
        utils.freeze(self.frozen_emb_style)
        utils.freeze(self.frozen_emb_comp)
Пример #4
0
 def load_model(category_shard):
     model = nn.DataParallel(SeNet("seresnext50", 68))
     model.load_state_dict(
         torch.load("/storage/models/quickdraw/cat_{}/model.pth".format(
             category_shard),
                    map_location=torch.device(
                        "cuda:0" if torch.cuda.is_available() else "cpu")))
     freeze(model)
     return model
 def infer(self, x, w):
     _frx = freeze(x), freeze(w)
     if _frx not in self._infers:
         _argmax = pymzn.minizinc(self.mzn_infer,
                                  data={
                                      **x, 'w': w
                                  },
                                  solver=pymzn.opturion)[0]
         self._infers[_frx] = _argmax
     return self._infers[_frx]
Пример #6
0
    def __build_model(self):
        model_func = getattr(models, self.backbone);
        backbone = model_func(pretrained=True)
        _layers = list(backbone.children())[:-1]
        self.feature_extractor = torch.nn.Sequential(*_layers)
        freeze(module = self.feature_extractor, train_bn = self.train_bn)

        _fc_layers = [torch.nn.Linear(self.config.get('FC1'), self.config.get('FC2')),
                      torch.nn.Linear(self.config.get('FC2'), self.config.get('FC3')),
                      torch.nn.Linear(self.config.get('FC3'), self.n_classes)]

        self.fc = torch.nn.Sequential(*_fc_layers)
        self.loss_func = F.cross_entropy 
Пример #7
0
    def train(self):

        device = self.device

        batch = self.buffer.sample_batch(batch_size=self.batch_size)

        # unroll batch
        obs_batch = torch.tensor(batch.obs, dtype=torch.float).to(device)
        act_batch = torch.tensor(batch.act, dtype=torch.float).to(device)
        next_obs_batch = torch.tensor(batch.next_obs,
                                      dtype=torch.float).to(device)
        rew_batch = torch.tensor(batch.rew, dtype=torch.float).to(device)
        done_batch = torch.tensor(batch.done, dtype=torch.float).to(device)

        masks = torch.tensor([1.]) - done_batch

        with torch.no_grad():
            next_actions, log_probs = self.pi(next_obs_batch,
                                              with_log_prob=True)
            target_q1, target_q2 = self.target_Q(next_obs_batch, next_actions)
            target_q = torch.min(target_q1, target_q2)
            target = rew_batch + self.gamma * masks * (target_q -
                                                       self.alpha * log_probs)

        out1, out2 = self.Q(obs_batch, act_batch)

        Q_loss1 = torch.mean((out1 - target)**2)
        Q_loss2 = torch.mean((out2 - target)**2)
        Q_loss = Q_loss1 + Q_loss2

        self.Q_optimizer.zero_grad()
        Q_loss.backward()
        self.Q_optimizer.step()

        actions, log_probs = self.pi(obs_batch, with_log_prob=True)

        freeze(self.Q)
        q1, q2 = self.Q(obs_batch, actions)
        q = torch.min(q1, q2)

        pi_loss = torch.mean(self.alpha * log_probs - q)

        self.pi_optimizer.zero_grad()
        pi_loss.backward()
        self.pi_optimizer.step()

        unfreeze(self.Q)

        self.target_update()

        return
 def phi(self, x, y):
     _frx = freeze(x), freeze(y)
     if _frx not in self._phis:
         ykeys = ['x', 'y', 'dx', 'dy']
         _phi = pymzn.minizinc(
             self.mzn_phi,
             output_vars=['phi'],
             data={
                 **self.inputize(subdict(y, ykeys), ykeys),
                 **x
             },
             solver=pymzn.opturion)[0]['phi']
         self._phis[_frx] = np.array(_phi, dtype=np.float64)
     return self._phis[_frx]
    def improve(self, x, phi, changed):
        """Returns an object with the given phi.

        This is used to get a new object after the user changes some feature.
        """
        _frx = freeze(x), freeze(phi)
        if _frx not in self._improves:
            _impr = pymzn.minizinc(self.mzn_improve,
                                   data={
                                       **x, 'input_phi': phi,
                                       'changed': changed + 1
                                   },
                                   solver=pymzn.opturion)[0]
            self._improves[_frx] = _impr
        return self._improves[_frx]
Пример #10
0
    def __build_model(self):
        # Sneaky
        model_func = getattr(models, self.backbone)
        backbone = model_func(pretrained=True)
        _layers = list(backbone.children())[:-1]
        self.feature_extractor = torch.nn.Sequential(*_layers)
        freeze(module=self.feature_extractor, train_bn=self.train_bn)

        _fc_layers = [
            torch.nn.Linear(2048, 256),
            torch.nn.Linear(256, 32),
            torch.nn.Linear(32, self.n_classes)
        ]

        self.fc = torch.nn.Sequential(*_fc_layers)
        self.loss_func = nn.CrossEntropyLoss
Пример #11
0
    def __init__(self,
                 dimS,
                 dimA,
                 ctrl_range,
                 gamma=0.99,
                 pi_lr=1e-4,
                 q_lr=1e-3,
                 polyak=1e-3,
                 alpha=0.2,
                 hidden1=400,
                 hidden2=300,
                 buffer_size=1000000,
                 batch_size=128,
                 device='cpu',
                 render=False):

        self.dimS = dimS
        self.dimA = dimA
        self.ctrl_range = ctrl_range

        self.gamma = gamma
        self.pi_lr = pi_lr
        self.q_lr = q_lr
        self.polyak = polyak
        self.alpha = alpha

        self.batch_size = batch_size
        # networks definition
        # pi : actor network, Q : 2 critic network
        self.pi = SACActor(dimS, dimA, hidden1, hidden2, ctrl_range).to(device)
        self.Q = DoubleCritic(dimS, dimA, hidden1, hidden2).to(device)

        # target networks
        self.target_Q = copy.deepcopy(self.Q).to(device)

        freeze(self.target_Q)

        self.buffer = ReplayBuffer(dimS, dimA, limit=buffer_size)

        self.Q_optimizer = Adam(self.Q.parameters(), lr=self.q_lr)
        self.pi_optimizer = Adam(self.pi.parameters(), lr=self.pi_lr)

        self.device = device
        self.render = render

        return
Пример #12
0
    def __init__(self,
                 dimS,
                 nA,
                 action_map: Callable[..., List[int]],
                 gamma,
                 hidden1,
                 hidden2,
                 lr,
                 tau,
                 buffer_size,
                 batch_size,
                 device='cpu',
                 render=False):

        arg_dict = locals()
        print('agent spec')
        print('-' * 80)
        print(arg_dict)
        print('-' * 80)

        self.dimS = dimS
        self.nA = nA

        # set networks
        self.Q = Critic(dimS, nA, hidden_size1=hidden1,
                        hidden_size2=hidden2).to(device)
        self.target_Q = copy.deepcopy(self.Q).to(device)
        freeze(self.target_Q)

        self.optimizer = Adam(self.Q.parameters(), lr=lr)
        # discount factor & polyak constant
        self.gamma = gamma
        self.tau = tau

        # replay buffer for experience replay in semi-MDP
        self.buffer = SemiMDPReplayBuffer(dimS, buffer_size)
        self.batch_size = batch_size
        self.counter = 0
        # function which returns the set of executable actions at a given state
        # expected return type : numpy array when 2nd arg = True / list when False
        self.action_map = action_map
        self.render = render
        self.device = device
        return
Пример #13
0
    def ac_backward(self, retain_graph):
        if self.aux_clf is None:
            return

        org_grads = utils.freeze(self.gen.memory.persistent_memory)

        if 'ac' in self.ac_losses:
            self.ac_losses['ac'].backward(retain_graph=retain_graph)

        if 'ac_gen' in self.ac_losses:
            with utils.temporary_freeze(self.aux_clf):
                self.ac_losses['ac_gen'].backward(retain_graph=retain_graph)

        utils.unfreeze(self.gen.memory.persistent_memory, org_grads)
Пример #14
0
    def __init__(self, gen, disc, g_optim, d_optim, aux_clf, ac_optim, writer,
                 logger, evaluator, cv_loaders, cfg):
        self.gen = gen
        self.gen_ema = copy.deepcopy(self.gen)
        self.g_optim = g_optim

        self.is_bn_gen = has_bn(self.gen)
        self.disc = disc
        self.d_optim = d_optim

        self.aux_clf = aux_clf
        self.ac_optim = ac_optim

        self.cfg = cfg

        [self.gen, self.gen_ema, self.disc, self.aux_clf], [
            self.g_optim, self.d_optim, self.ac_optim
        ] = self.set_model([self.gen, self.gen_ema, self.disc, self.aux_clf],
                           [self.g_optim, self.d_optim, self.ac_optim],
                           num_losses=4,
                           level=cfg.half_type)

        self.writer = writer
        self.logger = logger
        self.evaluator = evaluator
        self.cv_loaders = cv_loaders

        self.step = 1

        self.g_losses = {}
        self.d_losses = {}
        self.ac_losses = {}

        self.frozen_enc = copy.deepcopy(self.gen.component_encoder)
        utils.freeze(self.frozen_enc)
        self.ac_gen_decoder_only_grad = self.cfg.get(
            'ac_gen_decoder_only_grad', False)
Пример #15
0
                               shuffle=False,
                               num_workers=dataconfig["fetchworker_num"])

        from frameworks.Speech_Models import GRU_CTC_Model as Model
        from solvers import CTC_Solver as Solver

        model = Model.create_model(modelconfig["signal"],
                                   modelconfig["encoder"],
                                   modelconfig["decoder"]["vocab_size"])

        if trainingconfig['load_splayer']:
            logging.info("Load pretrained splayer from {}.".format(
                trainingconfig["load_splayer"]))
            pkg = torch.load(trainingconfig["load_splayer"])
            model.load_splayer(pkg["model"])
            utils.freeze(model.splayer)

    logging.info("\nModel info:\n{}".format(model))

    if args.continue_training:
        logging.info("Load package from {}.".format(
            os.path.join(trainingconfig["exp_dir"], "last.pt")))
        pkg = torch.load(os.path.join(trainingconfig["exp_dir"], "last.pt"))
        model.restore(pkg["model"])

    if "multi_gpu" in trainingconfig and trainingconfig["multi_gpu"] == True:
        logging.info("Let's use {} GPUs!".format(torch.cuda.device_count()))
        model = torch.nn.DataParallel(model)

    if torch.cuda.is_available():
        model = model.cuda()
Пример #16
0
    def train(self):

        device = self.device
        batch = self.buffer.sample_batch(batch_size=self.batch_size)
        epsilon = 1e-4
        states = batch['state']
        # m = np.vstack([mask_add(self.action_map(states[i])) for i in range(self.batch_size)])
        # m = torch.tensor(m, dtype=torch.float).to(self.device)
        m2 = np.vstack([
            mask_mul(self.action_map(states[i]))
            for i in range(self.batch_size)
        ])
        m2 = torch.tensor(m2, dtype=torch.float).to(self.device)

        # unroll batch
        states_tensor = torch.tensor(states, dtype=torch.float).to(device)
        actions = torch.tensor(batch['action'], dtype=torch.long).to(device)
        rewards = torch.tensor(batch['reward'], dtype=torch.float).to(device)
        next_states = torch.tensor(batch['next_state'],
                                   dtype=torch.float).to(device)
        d = torch.tensor(batch['done'], dtype=torch.float).to(device)
        dt = torch.tensor(batch['dt'], dtype=torch.float).to(device)

        with torch.no_grad():
            # TODO : invalid action filtering
            probs = self.pi(next_states)

            probs_new = m2 * (probs + 1e-4)
            probs_new = probs_new / torch.sum(probs_new, dim=1, keepdim=True)
            # clipped double-Q target
            target_q1, target_q2 = self.target_Q(
                next_states)  # Q_1(s^\prime, \cdot), Q_2(s^\prime, \cdot)
            # a_next = torch.unsqueeze(torch.max(target_q1 + m, 1)[1], 1)

            target_q = torch.min(target_q1, target_q2)
            v_next = torch.sum(probs_new * target_q, dim=1, keepdim=True)
            # \mathbb{E}_{a \sim \pi(\cdot \vert s)}Q^\pi (s^\prime, a^\prime)
            # v1_next = torch.sum(probs_new * target_q1, dim=1, keepdim=True)
            # v2_next = torch.sum(probs_new * target_q2, dim=1, keepdim=True)
            # v_next = torch.min(v1_next, v2_next)

            # entropy of policy
            log_probs = torch.log(probs_new + epsilon)
            H = -torch.sum(probs_new * log_probs, dim=1, keepdim=True)
            """
            target_q1, target_q2 = self.target_Q(next_states)  # Q_1(s^\prime, \cdot), Q_2(s^\prime, \cdot)
            # a_next = torch.unsqueeze(torch.max(target_q1 + m, 1)[1], 1)

            # \mathbb{E}_{a \sim \pi(\cdot \vert s)}Q^\pi (s^\prime, a^\prime)
            target_q = torch.min(target_q1, target_q2)
            v_next = torch.sum(probs * target_q, dim=1, keepdim=True)
            # v1_next = torch.sum(probs * target_q1, dim=1, keepdim=True)
            # v2_next = torch.sum(probs * target_q2, dim=1, keepdim=True)
            # v_next = torch.min(v1_next, v2_next)

            log_probs = torch.log(probs + epsilon)
            H = torch.sum(probs * log_probs, dim=1, keepdim=True)
            """
            # semi-MDP target construction
            target = rewards + (self.gamma**
                                dt) * (1. - d) * (v_next + self.alpha * H)

        # out1, out2 = self.Q(states).gather(1, actions)
        q1, q2 = self.Q(states_tensor)
        out1 = q1.gather(1, actions)
        out2 = q2.gather(1, actions)

        Q_loss1 = torch.mean((out1 - target)**2)
        Q_loss2 = torch.mean((out2 - target)**2)
        Q_loss = Q_loss1 + Q_loss2

        self.Q_optimizer.zero_grad()
        Q_loss.backward()
        self.Q_optimizer.step()

        # actor loss
        # here we use normalized probability which considers a set of admissible actions at each state

        probs = self.pi(states_tensor)

        probs_new = m2 * (probs + 1e-4)
        probs_new = probs_new / torch.sum(probs_new, dim=1, keepdim=True)
        log_probs = torch.log(probs_new + 1e-7)
        freeze(self.Q)
        q1, q2 = self.Q(states_tensor)
        q = torch.min(q1, q2)

        # pi_loss = torch.mean(self.alpha * log_probs - q)
        pi_loss = torch.mean(probs_new * (self.alpha * log_probs - q))
        # print(pi_loss.item())
        self.pi_optimizer.zero_grad()
        pi_loss.backward()
        self.pi_optimizer.step()

        if self.adjust_entropy:
            alpha_loss = -torch.mean(
                self.log_alpha * (log_probs + self.target_entropy).detach())
            self.alpha_optimizer.zero_grad()
            alpha_loss.backward()
            self.alpha_optimizer.step()
            self.alpha = torch.exp(self.log_alpha)

        unfreeze(self.Q)
        self.target_update()

        return
Пример #17
0
net_Z.train()
net_G.train()

for i in range(EPOCH):
    cumul_loss_Z = 0
    cumul_loss_H_G = 0
    nb_batch = 0
    loss_Z = 0
    for X, _ in train_dataloader:
        X = select_white_line_images(X, proba_white_line)

        # put a mask on images
        input_masked = X.to(device) * mask

        # Freeze H network
        freeze(net_H)
        H, skip_connect_layers = net_H(input_masked)

        # freeze G network
        freeze(net_G)

        # generate init z
        z_t = sample_z(X.shape[0], z_size=10)
        z_t.requires_grad = True

        for _ in range(STEPS):
            ###########################
            # ##### updating z  ##### #
            ###########################
            z = z_t.clone().detach()
Пример #18
0
def _create_output_stubs(producer_tag, producer):
    return freeze(
        **{
            name: Stub.create(name, schema, producer_tag)
            for (name, schema) in producer.output_schema.items()
        })
Пример #19
0
    def __init__(self,
                 dimS,
                 nA,
                 action_map: Callable[..., List[int]],
                 gamma,
                 hidden1,
                 hidden2,
                 lr,
                 tau,
                 buffer_size,
                 batch_size,
                 priority_exponent,
                 normalize_weights,
                 uniform_sample_prob,
                 anneal_schedule: Callable,
                 clipped=False,
                 device='cpu',
                 render=False):

        arg_dict = locals()
        print('agent spec')
        print('-' * 80)
        print(arg_dict)
        print('-' * 80)

        self.dimS = dimS
        self.nA = nA
        self.clipped = clipped
        # set networks
        if clipped:
            self.Q = DoubleCritic(dimS,
                                  nA,
                                  hidden_size1=hidden1,
                                  hidden_size2=hidden2).to(device)
        else:
            self.Q = Critic(dimS,
                            nA,
                            hidden_size1=hidden1,
                            hidden_size2=hidden2).to(device)
        self.target_Q = copy.deepcopy(self.Q).to(device)
        freeze(self.target_Q)

        self.optimizer = Adam(self.Q.parameters(), lr=lr)
        # discount factor & polyak constant
        self.gamma = gamma
        self.tau = tau
        self.batch_size = batch_size

        replay_structure = Transition(s_tm1=None,
                                      a_tm1=None,
                                      r_t=None,
                                      s_t=None,
                                      dt=None,
                                      d=None)

        # replay buffer for experience replay in semi-MDP
        # prioritized experience replay for semi-DQN
        self.replay = PrioritizedTransitionReplay(
            capacity=buffer_size,
            structure=replay_structure,
            priority_exponent=priority_exponent,
            importance_sampling_exponent=anneal_schedule,
            uniform_sample_probability=uniform_sample_prob,
            normalize_weights=normalize_weights,
            random_state=np.random.RandomState(1),
            encoder=None,
            decoder=None)
        self.max_seen_priority = 1.
        self.schedule = anneal_schedule

        # function which returns the set of executable actions at a given state
        # expected return type : numpy array when 2nd arg = True / list when False
        self.action_map = action_map
        self.render = render
        self.device = device
        return
Пример #20
0
    for images, labels in loader:
        digit = labels[0].item()
        if digit not in train_on_digits:
            continue
        batch = images.expand(n_classes, -1, -1, -1)
        transformed = apply_transformations(batch, transformations)
        latent = digit_processors[digit](transformed.view(n_classes, dim))
        y_pred = classifier(latent)
        loss = loss_func(y_pred, torch.LongTensor(transformations))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    progress_bar.set_description("step 1/3 loss=%.3f" % loss.item())

freeze(classifier)

# prepare the testing data
n_letters = 26
test_images = []
for letter in tqdm(range(n_letters), total=n_letters, desc="step 2/3"):
    loader = torch.utils.data.DataLoader(test_set,
                                         batch_size=4096,
                                         shuffle=True)
    for images, labels in loader:
        filtering = [i for i, l in enumerate(labels) if l.item() == letter + 1]
        images = images[filtering].clone()
        test_images.append(images)
        break

# continual learning loop
Пример #21
0
def train_wdcgan(G, D, train_loader, BATCH_SIZE):
    optimizer_d = t.optim.RMSprop(D.parameters(), lr=2e-4)
    optimizer_g = t.optim.RMSprop(G.parameters(), lr=2e-4)

    clip = None

    losses_g = []
    losses_d = []
    losses_ds = []
    losses_dt = []

    for epoch in range(250):
        for i, batch in enumerate(train_loader):
            img, label = batch
            z = variable(np.random.normal(size=(BATCH_SIZE, G.latent_dim)),
                         cuda=True)
            img = variable(img, cuda=True)

            if i % 6 == 5:  # train gen
                # init
                D.eval()
                G.train()
                freeze(D, True)  # do not compute gradients of D
                freeze(G, False)
                optimizer_g.zero_grad()

                # forward pass
                synthetic = G(z)
                loss_s = D(synthetic)
                loss = -loss_s

                # backprop
                loss.backward()
                optimizer_g.step()

                # monitor
                losses_g.append(loss.data.cpu().numpy()[0])

                # ready to train
                D.train()

            else:  # train disc
                # init
                G.eval()
                D.train()
                freeze(D, False)
                freeze(G, True)  # do not compute gradients of G
                optimizer_d.zero_grad()

                # forward pass
                synthetic = G(z).detach()
                loss_s = D(synthetic)
                loss_t = D(img)
                loss = loss_s - loss_t + D.gradient_penalty(img, synthetic)

                # backprop
                loss.backward()
                optimizer_d.step()
                D.clip(clip)

                # monitor
                losses_d.append(loss.data.cpu().numpy()[0])
                losses_ds.append(loss_s.data.cpu().numpy()[0])
                losses_dt.append(loss_t.data.cpu().numpy()[0])

                # ready to train
                G.train()

        if epoch % 5 == 0:
            monitoring(G, D, epoch, losses_g, losses_d, losses_ds, losses_dt)
Пример #22
0
    print(f"Currect Memory Allocated: {torch.cuda.memory_allocated()}")

    # Create Model
    net = models.resnet34(pretrained=True)

    print(f"Currect Memory Allocated: {torch.cuda.memory_allocated()}")

    # Transfer
    filter = [
        # 'resnet18.layer1',
        # 'resnet18.layer2',
        # 'resnet18.layer3',
        'resnet34.layer4',
        'resnet34.fc'
    ]
    freeze(net, filter, prefix='resnet34', verbose=True)
    net.fc = nn.Linear(in_features=512, out_features=10, bias=True)

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(net.parameters(),
                          LR,
                          momentum=0.9,
                          weight_decay=WEIGHT_DECAY)

    print(f"Currect Memory Allocated: {torch.cuda.memory_allocated()}")

    transform = transforms.Compose([
        transforms.RandomResizedCrop((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])
    def fit(self, img: np.ndarray, steps_per_scale: int = 2000) -> None:
        # initialize task tracking parameters
        self.total_steps = (self.N + 1) * steps_per_scale
        self.update_celery_state()

        # precompute all the sizes of the different scales
        target_size = img.shape[:-1]
        self.train_img = img

        # compute scales sizes
        scale_sizes = self.compute_scale_sizes(target_size)

        # print scales to validate choice of N
        print(scale_sizes)

        # preprocess input image and pack it in a batch
        img = torch.from_numpy(img.transpose(2, 0, 1))
        img = self.transform_input(img)
        img = img.expand(1, 3, target_size[0], target_size[1])

        # fix initial noise map for reconstruction loss computation
        self.z_init = self.generate_random_noise(scale_sizes[:1])[0]

        # training progression
        self.logger.set_scale(self.N + 1)
        for p in range(self.N + 1):
            self.logger.new_scale()

            # double number of initial channels every 4 scales
            n_channels = self.hypers['base_n_channels'] * (2**(p // 4))

            # instantiate new models for the next scale
            new_generator = SingleScaleGenerator(
                n_channels=n_channels,
                min_channels=self.hypers['min_n_channels'],
                n_blocks=self.hypers['n_blocks']).to(self.device)

            new_discriminator = Discriminator(
                n_channels=n_channels,
                min_channels=self.hypers['min_n_channels'],
                n_blocks=self.hypers['n_blocks']).to(self.device)

            # initialize weights via copy if possible
            if (p - 1) // 4 == p // 4:
                new_generator.load_state_dict(self.g_pyramid[0].state_dict())
                new_discriminator.load_state_dict(
                    self.d_pyramid[0].state_dict())

            # reset the optimizers
            self.g_optimizer = torch.optim.Adam(new_generator.parameters(),
                                                lr=self.hypers['g_lr'],
                                                betas=[0.5, 0.999])
            self.d_optimizer = torch.optim.Adam(new_discriminator.parameters(),
                                                lr=self.hypers['d_lr'],
                                                betas=[0.5, 0.999])

            # insert new generator and discriminator at the bottom of the pyramids
            self.g_pyramid.insert(0, new_generator)
            self.d_pyramid.insert(0, new_discriminator)

            # fit the currently finest scale
            self.fit_single_scale(img=img,
                                  target_size=scale_sizes[p],
                                  steps=steps_per_scale)

            # freeze the weights after training
            freeze(self.g_pyramid[0])
            freeze(self.d_pyramid[0])

            # switch them to evaluation mode
            self.g_pyramid[0].eval()
            self.d_pyramid[0].eval()