def setup_and_train(self, finetune=False):
        if (not finetune):
            model_name = "yolov3-spp3"
            tmp_cfg = os.path.dirname(
                os.path.realpath(__file__)) + "/cfg/" + model_name + ".cfg"
            cmd = "cp " + tmp_cfg + " " + os.getcwd(
            ) + "/" + model_name + ".cfg"
            os.system(cmd)
            self.system_dict["params"]["cfg"] = model_name + ".cfg"

            #update(self.system_dict["params"]["cfg"], self.system_dict["local"]["num_classes"]);
            #attempt_download('darknet53.conv.74')

        if (not os.path.isdir("weights")):
            os.mkdir("weights")

        cfg = self.system_dict["params"]["cfg"]
        img_size = self.system_dict["params"]["img_size"]
        epochs = self.system_dict["params"]["epochs"]
        batch_size = self.system_dict["params"]["batch_size"]
        accumulate = self.system_dict["params"]["accumulate"]
        weights = self.system_dict["params"]["weights"]
        sparsity = self.system_dict["params"]["sparsity"]
        arc = self.system_dict["params"]["arc"]
        hyp = self.system_dict["fixed_params"]["hyp"]
        rect = self.system_dict["params"]["rect"]
        cache_images = self.system_dict["params"]["cache_images"]
        batch_size = self.system_dict["params"]["batch_size"]
        train_img_dir = self.system_dict["dataset"]["train"]["img_dir"]
        train_label_dir = self.system_dict["dataset"]["train"]["label_dir"]
        val_img_dir = self.system_dict["dataset"]["val"]["img_dir"]
        val_label_dir = self.system_dict["dataset"]["val"]["label_dir"]

        # Initialize
        init_seeds()
        wdir = self.system_dict["fixed_params"]["wdir"]
        last = self.system_dict["fixed_params"]["last"]
        best = self.system_dict["fixed_params"]["best"]
        mixed_precision = self.system_dict["params"]["mixed_precision"]
        device = torch_utils.select_device(apex=mixed_precision)
        multi_scale = self.system_dict["params"]["multi_scale"]

        if multi_scale:
            img_sz_min = round(img_size / 32 / 1.5) + 1
            img_sz_max = round(img_size / 32 * 1.5) - 1
            img_size = img_sz_max * 32  # initiate with maximum multi_scale size
            print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))

        f = open(self.system_dict["dataset"]["train"]["class_list"], 'r')
        lines = f.readlines()
        f.close()
        self.system_dict["local"]["classes"] = []
        for i in range(len(lines)):
            if (lines[i] != "" and lines[i] != "\n"):
                self.system_dict["local"]["classes"].append(lines[i])
        self.system_dict["local"]["num_classes"] = int(
            len(self.system_dict["local"]["classes"]))

        if (not finetune):
            update(self.system_dict["params"]["cfg"],
                   self.system_dict["local"]["num_classes"])
            attempt_download('darknet53.conv.74')
            weights = "darknet53.conv.74"

        classes = self.system_dict["local"]["classes"]
        nc = self.system_dict["local"]["num_classes"]

        # Initialize model
        model = Darknet(cfg, arc=arc).to(device)

        if self.system_dict["params"]["adam"]:
            optimizer = optim.Adam(model.parameters(),
                                   lr=hyp['lr0'],
                                   weight_decay=hyp['weight_decay'])
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=hyp['lr0'],
                                  momentum=hyp['momentum'],
                                  weight_decay=hyp['weight_decay'],
                                  nesterov=True)

        cutoff = -1  # backbone reaches to cutoff layer
        start_epoch = 0
        best_fitness = 0.
        if weights.endswith('.pt'):  # pytorch format
            # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc
            chkpt = torch.load(weights, map_location=device)

            # load model
            if not finetune:
                chkpt['model'] = {
                    k: v
                    for k, v in chkpt['model'].items()
                    if model.state_dict()[k].numel() == v.numel()
                }
                model.load_state_dict(chkpt['model'], strict=False)
            else:
                model.load_state_dict(chkpt['model'])

            # load optimizer
            if chkpt['optimizer'] is not None:
                optimizer.load_state_dict(chkpt['optimizer'])
                best_fitness = chkpt['best_fitness']

            del chkpt
        elif len(weights) > 0:  # darknet format
            # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
            cutoff = load_darknet_weights(model, weights)

        scheduler = lr_scheduler.MultiStepLR(
            optimizer,
            milestones=[round(epochs * x) for x in [0.8, 0.9]],
            gamma=0.1)
        scheduler.last_epoch = start_epoch - 1

        if mixed_precision:
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level='O1',
                                              verbosity=0)

        if torch.cuda.device_count() > 1:
            dist.init_process_group(
                backend='nccl',  # 'distributed backend'
                init_method=
                'tcp://127.0.0.1:9999',  # distributed training init method
                world_size=1,  # number of nodes for distributed training
                rank=0)  # distributed training node rank
            model = torch.nn.parallel.DistributedDataParallel(model)
            model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

        dataset = LoadImagesAndLabels(train_img_dir,
                                      train_label_dir,
                                      img_size,
                                      batch_size,
                                      augment=True,
                                      hyp=hyp,
                                      rect=rect,
                                      cache_images=cache_images)

        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=batch_size,
            num_workers=min(os.cpu_count(), batch_size),
            shuffle=not rect,  # Shuffle=True unless rectangular training is used
            pin_memory=True,
            collate_fn=dataset.collate_fn)

        # Remove previous results
        for f in glob.glob('*_batch*.jpg') + glob.glob('results.txt'):
            os.remove(f)

        tb_writer = SummaryWriter()

        # Start training
        model.nc = nc  # attach number of classes to model
        model.arc = arc  # attach yolo architecture
        model.hyp = hyp  # attach hyperparameters to model
        model.class_weights = labels_to_class_weights(dataset.labels, nc).to(
            device)  # attach class weights
        model_info(model, report='summary')  # 'full' or 'summary'
        nb = len(dataloader)
        maps = np.zeros(nc)  # mAP per class
        results = (
            0, 0, 0, 0, 0, 0, 0
        )  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
        t0 = time.time()
        for epoch in range(
                start_epoch, epochs
        ):  # epoch ------------------------------------------------------------------
            model.train()
            print(
                ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj',
                                       'cls', 'total', 'targets', 'img_size'))

            # Update scheduler
            if epoch > 0:
                scheduler.step()

            # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
            freeze_backbone = False
            if freeze_backbone and epoch < 2:
                for name, p in model.named_parameters():
                    if int(name.split('.')[1]) < cutoff:  # if layer < 75
                        p.requires_grad = False if epoch == 0 else True

            # Update image weights (optional)
            if dataset.image_weights:
                w = model.class_weights.cpu().numpy() * (
                    1 - maps)**2  # class weights
                image_weights = labels_to_image_weights(dataset.labels,
                                                        nc=nc,
                                                        class_weights=w)
                dataset.indices = random.choices(
                    range(dataset.n), weights=image_weights,
                    k=dataset.n)  # rand weighted idx

            mloss = torch.zeros(4).to(device)  # mean losses
            pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
            for i, (
                    imgs, targets, paths, _
            ) in pbar:  # batch -------------------------------------------------------------
                ni = i + nb * epoch  # number integrated batches (since train start)
                imgs = imgs.to(device)
                targets = targets.to(device)

                # Multi-Scale training
                if multi_scale:
                    if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
                        img_size = random.randrange(img_sz_min,
                                                    img_sz_max + 1) * 32
                    sf = img_size / max(imgs.shape[2:])  # scale factor
                    if sf != 1:
                        ns = [
                            math.ceil(x * sf / 32.) * 32
                            for x in imgs.shape[2:]
                        ]  # new shape (stretched to 32-multiple)
                        imgs = F.interpolate(imgs,
                                             size=ns,
                                             mode='bilinear',
                                             align_corners=False)

                # Plot images with bounding boxes
                if ni == 0:
                    fname = 'train_batch%g.jpg' % i
                    plot_images(imgs=imgs,
                                targets=targets,
                                paths=paths,
                                fname=fname)
                    if tb_writer:
                        tb_writer.add_image(fname,
                                            cv2.imread(fname)[:, :, ::-1],
                                            dataformats='HWC')

                # Hyperparameter burn-in
                # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
                # if ni <= n_burn:
                #     for m in model.named_modules():
                #         if m[0].endswith('BatchNorm2d'):
                #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
                #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
                #     for x in optimizer.param_groups:
                #         x['lr'] = hyp['lr0'] * g
                #         x['weight_decay'] = hyp['weight_decay'] * g

                # Run model
                pred = model(imgs)

                # Compute loss
                loss, loss_items = compute_loss(pred, targets, model)
                if torch.isnan(loss):
                    print('WARNING: nan loss detected, ending training')
                    return results

                # Divide by accumulation count
                if accumulate > 1:
                    loss /= accumulate

                # Compute gradient
                if mixed_precision:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                # Accumulate gradient for x batches before optimizing
                if ni % accumulate == 0:
                    if sparsity != 0:
                        self.updateBN(sparsity, model)
                    optimizer.step()
                    optimizer.zero_grad()

                # Print batch results
                mloss = (mloss * i + loss_items) / (i + 1
                                                    )  # update mean losses
                mem = torch.cuda.memory_cached(
                ) / 1E9 if torch.cuda.is_available() else 0  # (GB)
                s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' %
                                                   (epoch, epochs - 1),
                                                   '%.3gG' % mem, *mloss,
                                                   len(targets), img_size)
                pbar.set_description(
                    s
                )  # end batch -----------------------------------------------------------------------

            final_epoch = epoch + 1 == epochs

            with torch.no_grad():
                results, maps = validate(
                    cfg,
                    val_img_dir,
                    val_label_dir,
                    classes,
                    batch_size=batch_size,
                    img_size=img_size,
                    model=model,
                    conf_thres=0.001
                    if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
                    save_json=False)

            # Write epoch results
            with open('results.txt', 'a') as file:
                file.write(s + '%10.3g' * 7 % results +
                           '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

            # Write Tensorboard results
            if tb_writer:
                x = list(mloss) + list(results)
                titles = [
                    'GIoU', 'Objectness', 'Classification', 'Train loss',
                    'Precision', 'Recall', 'mAP', 'F1', 'val GIoU',
                    'val Objectness', 'val Classification'
                ]
                for xi, title in zip(x, titles):
                    tb_writer.add_scalar(title, xi, epoch)

            # Update best mAP
            fitness = results[2]  # mAP
            if fitness > best_fitness:
                best_fitness = fitness

            # Save training results
            save = True
            if save:
                with open('results.txt', 'r') as file:
                    # Create checkpoint
                    chkpt = {
                        'epoch':
                        epoch,
                        'best_fitness':
                        best_fitness,
                        'training_results':
                        file.read(),
                        'model':
                        model.module.state_dict()
                        if type(model) is nn.parallel.DistributedDataParallel
                        else model.state_dict(),
                        'optimizer':
                        None if final_epoch else optimizer.state_dict()
                    }

                # Save last checkpoint
                torch.save(chkpt, last)

                # Save best checkpoint
                if best_fitness == fitness:
                    torch.save(chkpt, best)

                # Delete checkpoint
                del chkpt  # end epoch -------------------------------------------------------------------------------------

        # Report time
        plot_results()  # save as results.png
        print('%g epochs completed in %.3f hours.\n' %
              (epoch - start_epoch + 1, (time.time() - t0) / 3600))
        dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
        torch.cuda.empty_cache()
        return results
    def setup(self):
        #folder
        if (not os.path.isdir("weights")):
            os.mkdir("weights")

        #Device Setup
        self.system_dict["params"]["weights"] = last if self.system_dict[
            "params"]["resume"] else self.system_dict["params"]["weights"]
        self.system_dict["local"]["device"] = torch_utils.select_device(
            self.system_dict["params"]["device"],
            apex=self.system_dict["params"]["mixed_precision"],
            batch_size=self.system_dict["params"]["batch_size"])
        if self.system_dict["local"]["device"].type == 'cpu':
            self.system_dict["params"]["mixed_precision"] = False

        self.system_dict["local"]["tb_writer"] = None
        self.system_dict["local"]["tb_writer"] = SummaryWriter()

        #Data Setup
        img_size, img_size_test = self.system_dict["params"]["img_size"] if len(
            self.system_dict["params"]
            ["img_size"]) == 2 else self.system_dict["params"]["img_size"] * 2

        init_seeds()
        if self.system_dict["params"]["multi_scale"]:
            img_sz_min = round(img_size / 32 / 1.5)
            img_sz_max = round(img_size / 32 * 1.5)
            img_size = img_sz_max * 32  # initiate with maximum multi_scale size
            print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))

            self.system_dict["params"]["img_sz_min"] = img_sz_min
            self.system_dict["params"]["img_sz_max"] = img_sz_max

        self.system_dict["params"]["img_size"] = img_size
        self.system_dict["params"]["img_size_test"] = img_size_test

        f = open(self.system_dict["dataset"]["train"]["class_list"], 'r')
        lines = f.readlines()
        f.close()

        self.system_dict["local"]["classes"] = []
        for i in range(len(lines)):
            if (lines[i] != "" and lines[i] != "\n"):
                self.system_dict["local"]["classes"].append(lines[i])
        self.system_dict["local"]["num_classes"] = int(
            len(self.system_dict["local"]["classes"]))
        if (self.system_dict["local"]["num_classes"] == 1):
            self.system_dict["params"]["single_cls"] = True
        else:
            self.system_dict["params"]["single_cls"] = False

        self.system_dict["local"]["nc"] = 1 if self.system_dict["params"][
            "single_cls"] else self.system_dict["local"]["num_classes"]

        # Remove previous results
        for f in glob.glob('*_batch*.png') + glob.glob(
                self.system_dict["fixed_params"]["results_file"]):
            os.remove(f)

        if 'pw' not in self.system_dict["params"][
                "arc"]:  # remove BCELoss positive weights
            self.system_dict["fixed_params"]["hyp"]['cls_pw'] = 1.
            self.system_dict["fixed_params"]["hyp"]['obj_pw'] = 1.

        #Update Config file
        update(self.system_dict["params"]["cfg"],
               self.system_dict["local"]["num_classes"])

        #Model
        self.system_dict["local"]["model"] = Darknet(
            self.system_dict["params"]["cfg"],
            arc=self.system_dict["params"]["arc"]).to(
                self.system_dict["local"]["device"])

        attempt_download(self.system_dict["params"]["weights"])

        # Optimizer
        pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
        for k, v in dict(
                self.system_dict["local"]["model"].named_parameters()).items():
            if '.bias' in k:
                pg2 += [v]  # biases
            elif 'Conv2d.weight' in k:
                pg1 += [v]  # apply weight_decay
            else:
                pg0 += [v]  # all else

        if self.system_dict["params"]["adam"]:
            # hyp['lr0'] *= 0.1  # reduce lr (i.e. SGD=5E-3, Adam=5E-4)
            self.system_dict["local"]["optimizer"] = optim.Adam(
                pg0, lr=self.system_dict["fixed_params"]["hyp"]['lr0'])
            # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
        else:
            self.system_dict["local"]["optimizer"] = optim.SGD(
                pg0,
                lr=self.system_dict["fixed_params"]["hyp"]['lr0'],
                momentum=self.system_dict["fixed_params"]["hyp"]['momentum'],
                nesterov=True)

        self.system_dict["local"]["optimizer"].add_param_group({
            'params':
            pg1,
            'weight_decay':
            self.system_dict["fixed_params"]["hyp"]['weight_decay']
        })  # add pg1 with weight_decay
        self.system_dict["local"]["optimizer"].add_param_group(
            {'params': pg2})  # add pg2 (biases)
        del pg0, pg1, pg2

        self.system_dict["local"]["start_epoch"] = 0
        self.system_dict["local"]["best_fitness"] = float('inf')

        if self.system_dict["params"]["weights"].endswith('.pt'):
            chkpt = torch.load(
                self.system_dict["params"]["weights"],
                map_location=self.system_dict["local"]["device"])

            # load model
            try:
                chkpt['model'] = {
                    k: v
                    for k, v in chkpt['model'].items()
                    if self.system_dict["local"]["model"].state_dict()
                    [k].numel() == v.numel()
                }
                self.system_dict["local"]["model"].load_state_dict(
                    chkpt['model'], strict=False)
            except KeyError as e:
                s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                    "See https://github.com/ultralytics/yolov3/issues/657" % (self.system_dict["params"]["weights"], self.system_dict["params"]["cfg"],
                                                                              self.system_dict["params"]["weights"])
                raise KeyError(s) from e

            # load optimizer
            if chkpt['optimizer'] is not None:
                self.system_dict["local"]["optimizer"].load_state_dict(
                    chkpt['optimizer'])
                self.system_dict["local"]["best_fitness"] = chkpt[
                    'best_fitness']

            # load results
            if chkpt.get('training_results') is not None:
                with open(self.system_dict["fixed_params"]["results_file"],
                          'w') as file:
                    file.write(chkpt['training_results'])

            self.system_dict["local"]["start_epoch"] = chkpt['epoch'] + 1
            del chkpt

        elif len(self.system_dict["params"]["weights"]) > 0:  # darknet format
            # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
            load_darknet_weights(self.system_dict["local"]["model"],
                                 self.system_dict["params"]["weights"])

        #Scheduler
        self.system_dict["local"]["scheduler"] = lr_scheduler.MultiStepLR(
            self.system_dict["local"]["optimizer"],
            milestones=[
                round(self.system_dict["params"]["epochs"] * x)
                for x in [0.8, 0.9]
            ],
            gamma=0.1)
        self.system_dict["local"]["scheduler"].last_epoch = self.system_dict[
            "local"]["start_epoch"] - 1

        if self.system_dict["params"]["mixed_precision"]:
            self.system_dict["local"]["model"], self.system_dict["local"][
                "optimizer"] = amp.initialize(
                    self.system_dict["local"]["model"],
                    self.system_dict["local"]["optimizer"],
                    opt_level='O1',
                    verbosity=0)

        # Initialize distributed training
        if self.system_dict["local"][
                "device"].type != 'cpu' and torch.cuda.device_count() > 1:
            dist.init_process_group(backend='nccl',
                                    init_method='tcp://127.0.0.1:9999',
                                    world_size=1,
                                    rank=0)
            self.system_dict["local"][
                "model"] = torch.nn.parallel.DistributedDataParallel(
                    self.system_dict["local"]["model"],
                    find_unused_parameters=True)
            self.system_dict["local"]["model"].yolo_layers = self.system_dict[
                "local"]["model"].module.yolo_layers

        # Dataset
        self.system_dict["local"]["dataset"] = LoadImagesAndLabels(
            self.system_dict["dataset"]["train"]["img_dir"],
            self.system_dict["dataset"]["train"]["label_dir"],
            self.system_dict["params"]["img_size"],
            self.system_dict["params"]["batch_size"],
            augment=True,
            hyp=self.system_dict["fixed_params"]["hyp"],
            rect=self.system_dict["params"]["rect"],
            cache_labels=True,
            cache_images=self.system_dict["params"]["cache_images"],
            single_cls=self.system_dict["params"]["single_cls"])

        # Dataloader
        self.system_dict["params"]["batch_size"] = min(
            self.system_dict["params"]["batch_size"],
            len(self.system_dict["local"]["dataset"]))
        self.system_dict["local"]["nw"] = min([
            os.cpu_count(), self.system_dict["params"]["batch_size"]
            if self.system_dict["params"]["batch_size"] > 1 else 0, 8
        ])

        self.system_dict["local"]["dataloader"] = torch.utils.data.DataLoader(
            self.system_dict["local"]["dataset"],
            batch_size=self.system_dict["params"]["batch_size"],
            num_workers=self.system_dict["local"]["nw"],
            shuffle=not self.system_dict["params"]["rect"],
            pin_memory=True,
            collate_fn=self.system_dict["local"]["dataset"].collate_fn)

        # Testloader
        if (self.system_dict["dataset"]["val"]["status"]):
            self.system_dict["local"][
                "testloader"] = torch.utils.data.DataLoader(
                    LoadImagesAndLabels(
                        self.system_dict["dataset"]["val"]["img_dir"],
                        self.system_dict["dataset"]["val"]["label_dir"],
                        self.system_dict["params"]["img_size"],
                        self.system_dict["params"]["batch_size"] * 2,
                        hyp=self.system_dict["fixed_params"]["hyp"],
                        rect=False,
                        cache_labels=True,
                        cache_images=self.system_dict["params"]
                        ["cache_images"],
                        single_cls=self.system_dict["params"]["single_cls"]),
                    batch_size=self.system_dict["params"]["batch_size"] * 2,
                    num_workers=self.system_dict["local"]["nw"],
                    pin_memory=True,
                    collate_fn=self.system_dict["local"]["dataset"].collate_fn)
    def setup(self, finetune=False):
        if (not finetune):
            model_name = "yolov3-spp3"
            tmp_cfg = os.path.dirname(
                os.path.realpath(__file__)) + "/cfg/" + model_name + ".cfg"
            cmd = "cp " + tmp_cfg + " " + os.getcwd(
            ) + "/" + model_name + ".cfg"
            os.system(cmd)
            self.system_dict["params"]["cfg"] = model_name + ".cfg"

        if (not os.path.isdir("weights")):
            os.mkdir("weights")

        self.system_dict["params"]["weights"] = last if self.system_dict[
            "params"]["resume"] else self.system_dict["params"]["weights"]
        self.system_dict["local"]["device"] = torch_utils.select_device(
            apex=self.system_dict["params"]["mixed_precision"])
        if self.system_dict["local"]["device"].type == 'cpu':
            self.system_dict["params"]["mixed_precision"] = False

        self.system_dict["local"]["tb_writer"] = None
        self.system_dict["local"]["tb_writer"] = SummaryWriter()

        img_size, img_size_test = self.system_dict["params"]["img_size"] if len(
            self.system_dict["params"]
            ["img_size"]) == 2 else self.system_dict["params"]["img_size"] * 2

        if self.system_dict["params"]["multi_scale"]:
            img_sz_min = round(img_size / 32 / 1.5)
            img_sz_max = round(img_size / 32 * 1.5)
            img_size = img_sz_max * 32  # initiate with maximum multi_scale size
            print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))

            self.system_dict["params"]["img_sz_min"] = img_sz_min
            self.system_dict["params"]["img_sz_max"] = img_sz_max

        self.system_dict["params"]["img_size"] = img_size
        self.system_dict["params"]["img_size_test"] = img_size_test

        f = open(self.system_dict["dataset"]["train"]["class_list"], 'r')
        lines = f.readlines()
        f.close()

        self.system_dict["local"]["classes"] = []
        for i in range(len(lines)):
            if (lines[i] != "" and lines[i] != "\n"):
                self.system_dict["local"]["classes"].append(lines[i])
        self.system_dict["local"]["num_classes"] = int(
            len(self.system_dict["local"]["classes"]))
        if (self.system_dict["local"]["num_classes"] == 1):
            self.system_dict["params"]["single_cls"] = True
        else:
            self.system_dict["params"]["single_cls"] = False

        self.system_dict["local"]["nc"] = 1 if self.system_dict["params"][
            "single_cls"] else self.system_dict["local"]["num_classes"]

        if 'pw' not in self.system_dict["params"][
                "arc"]:  # remove BCELoss positive weights
            self.system_dict["fixed_params"]["hyp"]['cls_pw'] = 1.
            self.system_dict["fixed_params"]["hyp"]['obj_pw'] = 1.

        #Update Config file
        if (not finetune):
            update(self.system_dict["params"]["cfg"],
                   self.system_dict["local"]["num_classes"])

        #Model
        self.system_dict["local"]["model"] = Darknet(
            self.system_dict["params"]["cfg"],
            arc=self.system_dict["params"]["arc"]).to(
                self.system_dict["local"]["device"])

        if (not finetune):
            attempt_download('darknet53.conv.74')

        if self.system_dict["params"]["adam"]:
            self.system_dict["local"]["optimizer"] = optim.Adam(
                self.system_dict["local"]["model"].parameters(),
                lr=self.system_dict["fixed_params"]["hyp"]['lr0'])
        else:
            self.system_dict["local"]["optimizer"] = optim.SGD(
                self.system_dict["local"]["model"].parameters(),
                lr=self.system_dict["fixed_params"]["hyp"]['lr0'],
                momentum=self.system_dict["fixed_params"]["hyp"]['momentum'],
                nesterov=True)

        self.system_dict["local"][
            "cutoff"] = -1  # backbone reaches to cutoff layer
        self.system_dict["local"]["start_epoch"] = 0
        self.system_dict["local"]["best_fitness"] = 0.

        if self.system_dict["params"]["weights"].endswith('.pt'):
            chkpt = torch.load(
                self.system_dict["params"]["weights"],
                map_location=self.system_dict["local"]["device"])

            # load model
            try:
                chkpt['model'] = {
                    k: v
                    for k, v in chkpt['model'].items()
                    if self.system_dict["local"]["model"].state_dict()
                    [k].numel() == v.numel()
                }
                self.system_dict["local"]["model"].load_state_dict(
                    chkpt['model'], strict=False)
            except KeyError as e:
                s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                    "See https://github.com/ultralytics/yolov3/issues/657" % (self.system_dict["params"]["weights"], self.system_dict["params"]["cfg"],
                                                                              self.system_dict["params"]["weights"])
                raise KeyError(s) from e

            # load optimizer
            if chkpt['optimizer'] is not None:
                self.system_dict["local"]["optimizer"].load_state_dict(
                    chkpt['optimizer'])
                self.system_dict["local"]["best_fitness"] = chkpt[
                    'best_fitness']
            '''
            # load results
            if chkpt.get('training_results') is not None:
                with open(self.system_dict["fixed_params"]["results_file"], 'w') as file:
                    file.write(chkpt['training_results'])

            self.system_dict["local"]["start_epoch"] = chkpt['epoch'] + 1
            '''
            del chkpt

        elif len(self.system_dict["params"]["weights"]) > 0:  # darknet format
            # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
            load_darknet_weights(self.system_dict["local"]["model"],
                                 self.system_dict["params"]["weights"])

        #Scheduler
        self.system_dict["local"]["scheduler"] = lr_scheduler.MultiStepLR(
            self.system_dict["local"]["optimizer"],
            milestones=[
                round(self.system_dict["params"]["epochs"] * x)
                for x in [0.8, 0.9]
            ],
            gamma=0.1)
        self.system_dict["local"]["scheduler"].last_epoch = self.system_dict[
            "local"]["start_epoch"] - 1

        if self.system_dict["params"]["mixed_precision"]:
            self.system_dict["local"]["model"], self.system_dict["local"][
                "optimizer"] = amp.initialize(
                    self.system_dict["local"]["model"],
                    self.system_dict["local"]["optimizer"],
                    opt_level='O1',
                    verbosity=0)

        # Initialize distributed training
        if self.system_dict["local"][
                "device"].type != 'cpu' and torch.cuda.device_count() > 1:
            dist.init_process_group(backend='nccl',
                                    init_method='tcp://127.0.0.1:9999',
                                    world_size=1,
                                    rank=0)
            self.system_dict["local"][
                "model"] = torch.nn.parallel.DistributedDataParallel(
                    self.system_dict["local"]["model"],
                    find_unused_parameters=True)
            self.system_dict["local"]["model"].yolo_layers = self.system_dict[
                "local"]["model"].module.yolo_layers

        # Dataset
        self.system_dict["local"]["dataset"] = LoadImagesAndLabels(
            self.system_dict["dataset"]["train"]["img_dir"],
            self.system_dict["dataset"]["train"]["label_dir"],
            self.system_dict["params"]["img_size"],
            self.system_dict["params"]["batch_size"],
            augment=True,
            hyp=self.system_dict["fixed_params"]["hyp"],
            rect=self.system_dict["params"]["rect"],
            cache_images=self.system_dict["params"]["cache_images"])

        # Dataloader
        self.system_dict["params"]["batch_size"] = min(
            self.system_dict["params"]["batch_size"],
            len(self.system_dict["local"]["dataset"]))
        self.system_dict["local"]["nw"] = min([
            os.cpu_count(), self.system_dict["params"]["batch_size"]
            if self.system_dict["params"]["batch_size"] > 1 else 0, 8
        ])

        self.system_dict["local"]["dataloader"] = torch.utils.data.DataLoader(
            self.system_dict["local"]["dataset"],
            batch_size=self.system_dict["params"]["batch_size"],
            num_workers=self.system_dict["local"]["nw"],
            shuffle=not self.system_dict["params"]["rect"],
            pin_memory=True,
            collate_fn=self.system_dict["local"]["dataset"].collate_fn)

        for f in glob.glob('*_batch*.png') + glob.glob(
                self.system_dict["fixed_params"]["results_file"]):
            os.remove(f)

        if (self.system_dict["dataset"]["val"]["status"]):
            self.system_dict["local"][
                "testloader"] = torch.utils.data.DataLoader(
                    LoadImagesAndLabels(
                        self.system_dict["dataset"]["val"]["img_dir"],
                        self.system_dict["dataset"]["val"]["label_dir"],
                        self.system_dict["params"]["img_size"],
                        self.system_dict["params"]["batch_size"],
                        hyp=self.system_dict["fixed_params"]["hyp"],
                        rect=False,
                        cache_images=self.system_dict["params"]
                        ["cache_images"]),
                    batch_size=self.system_dict["params"]["batch_size"],
                    num_workers=self.system_dict["local"]["nw"],
                    pin_memory=True,
                    collate_fn=self.system_dict["local"]["dataset"].collate_fn)