Exemplo n.º 1
0
    def __init__(
        self,
        pointVAE,
        im_encoder,
        encoder_dim,
        grid_dims,
        Generate1_dims,
        Generate2_dims,
        Generate3_dims,
        args,
    ):
        super(GeneratorVAE, self).__init__()
        self.args = args

        #self.im_encoder = SegNet(input_channels=encoder_dim[0], output_channels=encoder_dim[1])
        self.im_encoder = im_encoder
        init_weights(self.im_encoder, init_type="kaiming")
        self.N = grid_dims[0] * grid_dims[1]
        # self.G1 = PointGeneration(Generate1_dims)
        self.G1 = FoldingNet(indim=Generate1_dims)
        init_weights(self.G1, init_type="xavier")
        self.G2 = FoldingNet(indim=Generate2_dims)
        init_weights(self.G2, init_type="xavier")
        self.G3 = FoldingNet(indim=Generate3_dims)
        init_weights(self.G3, init_type="xavier")
        #self.pointVAE = PointVAE(args=args)
        self.pointVAE = pointVAE
        init_weights(self.pointVAE, init_type="xavier")

        self.P1 = PointProjection()
        self.P2 = PointProjection()
Exemplo n.º 2
0
    def __init__(self, opt):
        self.device = torch.device('cuda')
        self.opt = opt
        self.G = Generator(self.opt['network_G']).to(self.device)
        util.init_weights(self.G, init_type='kaiming', scale=0.1)
        if self.opt['path']['pretrain_G']:
            self.G.load_state_dict(torch.load(self.opt['path']['pretrain_G']),
                                   strict=True)
        self.D = Discriminator(self.opt['network_D']).to(self.device)
        util.init_weights(self.D, init_type='kaiming', scale=1)
        self.FE = VGGFeatureExtractor().to(self.device)
        self.G.train()
        self.D.train()
        self.FE.eval()

        self.log_dict = OrderedDict()

        self.optim_params = [
            v for k, v in self.G.named_parameters() if v.requires_grad
        ]
        self.opt_G = torch.optim.Adam(self.optim_params,
                                      lr=self.opt['train']['lr_G'],
                                      betas=(self.opt['train']['b1_G'],
                                             self.opt['train']['b2_G']))
        self.opt_D = torch.optim.Adam(self.D.parameters(),
                                      lr=self.opt['train']['lr_D'],
                                      betas=(self.opt['train']['b1_D'],
                                             self.opt['train']['b2_D']))

        self.optimizers = [self.opt_G, self.opt_D]
        self.schedulers = [
            lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'],
                                     self.opt['train']['lr_gamma'])
            for optimizer in self.optimizers
        ]
Exemplo n.º 3
0
    def __init__(self, args):
        super().__init__()

        self.args = args
        self.in_drop1d = nn.Dropout(args.in_dropout1d)
        self.in_drop2d = nn.Dropout2d(args.in_dropout2d)
        self.drop = nn.Dropout(args.hid_dropout)

        self.embed = nn.Embedding(num_embeddings=21,
                                  embedding_dim=args.n_features,
                                  padding_idx=20)

        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=args.n_features,
                      out_channels=args.n_filters,
                      kernel_size=i,
                      padding=i // 2) for i in args.conv_kernels
        ])
        self.cnn_final = nn.Conv1d(in_channels=len(self.convs) *
                                   args.n_filters,
                                   out_channels=128,
                                   kernel_size=3,
                                   padding=3 // 2)
        self.relu = nn.ReLU()

        self.lstm = nn.LSTM(128,
                            args.n_hid,
                            bidirectional=True,
                            batch_first=True)

        init_weights(self)
Exemplo n.º 4
0
    def _init_module(self):
        if self.base_model == 'Conv64F':
            self.features = CNNEncoder(**self.base_model_info)
        else:
            raise RuntimeError

        self.metric_layer = CrossRelationModule(**self.kwargs)

        init_weights(self, init_type='normal')
    def __init__(self, args, in_size):
        super().__init__()
        self.args = args

        self.attn = Attention(in_size=in_size, att_size=args.att_size)
        self.drop = nn.Dropout(args.hid_dropout)
        self.label = nn.Linear(in_size, args.num_classes)
        self.mem = nn.Linear(in_size, 1)

        init_weights(self)
    def __init__(self, args, in_size):
        super().__init__()
        self.args = args

        self.linear = nn.Linear(in_size, 32)
        self.drop = nn.Dropout(0.25)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(32)
        self.label = nn.Linear(32, args.num_classes)
        self.mem = nn.Linear(32, 1)

        init_weights(self)
def _init_weights(model, weights_path):
    logger.info("Initializing weights")
    if weights_path is None:
        init_weights(model)
    elif weights_path.endswith(".pth"):
        logger.info(f"Loading weights {os.path.basename(weights_path)}")
        try:
            model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            logger.info(
                f"Ignoring {e} probably caused by loading weights of a model with a "
                "different number of classes. The part of the weights not affected by "
                "this should already be loaded.")
    else:
        raise ValueError(f"{weights_path} is not a valid weights path.")
Exemplo n.º 8
0
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2)
        self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2)
        self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                              self.args.n_features2,
                              hidden_size=self.args.n_hid2,
                              num_layers=3,
                              bidirectional=True,
                              batch_first=True)
        self.drop = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

        init_weights(self)
        self.init_weights()
Exemplo n.º 9
0
  def __init__(self, args, awd_layer, architecture):
    super().__init__(args)
    self.awd_layer = awd_layer
    self.architecture = architecture

    if awd_layer in ["first", "second"]:
      self.project = nn.Linear(1280, 300, bias=False)
    elif awd_layer in ["last"]:
      self.project = nn.Linear(320, 300, bias=False)

    if self.architecture in ["before", "both"]:
      self.lstm = nn.LSTM(128+300, args.n_hid, bidirectional=True, batch_first=True)

    init_weights(self)
        
    self.awd = AWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True)
    self.awd.load_pretrained()
    def __init__(self, args, in_size):
        super().__init__()
        self.args = args

        self.drop = nn.Dropout(p=0.25)
        self.relu = nn.ReLU()

        self.cnn_1 = nn.Conv1d(in_channels=in_size,
                               out_channels=32,
                               kernel_size=7,
                               padding=7 // 2)
        self.cnn_2 = nn.Conv1d(in_channels=32,
                               out_channels=8,
                               kernel_size=7,
                               padding=7 // 2)

        init_weights(self)
Exemplo n.º 11
0
    def __init__(self, in_size, out_size, is_deconv, n_concat=2):
        super(unetUp, self).__init__()
        self.conv = unetConv2(in_size + (n_concat - 2) * out_size, out_size,
                              False)
        if is_deconv:
            self.up = nn.ConvTranspose2d(in_size,
                                         out_size,
                                         kernel_size=2,
                                         stride=2,
                                         padding=0)
        else:
            self.up = nn.Sequential(nn.UpsamplingBilinear2d(scale_factor=2),
                                    nn.Conv2d(in_size, out_size, 1))

        # initialise the blocks
        for m in self.children():
            if m.__class__.__name__.find('unetConv2') != -1: continue
            init_weights(m, init_type='kaiming')
    def model(self):
        layer_input = 1
        lb = self.x
        image_min_wid_hei = min(
            self.x.get_shape().as_list()[1:2])  # image 的 宽和高的较小值
        self.p_keep_conv = tf.placeholder("float", name="p_keep_conv")
        self.p_keep_hidden = tf.placeholder("float", name="p_keep_hidden")
        for i in range(self.params.n_layer):
            layer_output = self.params.neurons[i]
            w = init_weights([
                self.params.patch_size[0], self.params.patch_size[1],
                layer_input, layer_output
            ])
            la = tf.nn.relu(
                tf.nn.conv2d(lb, w, strides=[1, 1, 1, 1],
                             padding='SAME'))  # 卷积层

            if round(image_min_wid_hei / (2**(i + 1))) >= 4:
                l1 = tf.nn.max_pool(la,
                                    ksize=[1, 2, 2, 1],
                                    strides=[1, 2, 2, 1],
                                    padding='SAME')
            else:
                l1 = la
            if i == self.params.n_layer - 1:
                wide = round(image_min_wid_hei / (2**self.params.n_layer))  #
                if wide < 4:
                    wide = 4  # 池化(降采样)后,图片的最小宽度为4
                print("wide ", wide)
                w2 = init_weights(
                    [layer_output * wide * wide, self.params.output_dimension])
                l2 = tf.reshape(l1, [-1, w2.get_shape().as_list()[0]])
                print(l2)
            else:
                l2 = tf.nn.dropout(l1, self.p_keep_conv)

            layer_input = layer_output
            lb = l2
        w_o = init_weights(
            [self.params.output_dimension, self.config.label_size])
        lo = tf.nn.relu(tf.matmul(lb, w2))
        lo = tf.nn.dropout(lo, self.p_keep_hidden)
        pyx = tf.matmul(lo, w_o, name="py_x")
        return pyx
Exemplo n.º 13
0
    def __init__(self, args, bi_awd_layer, architecture):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.architecture = architecture
        self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2)
        self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2)
        self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                              self.args.n_features2,
                              hidden_size=self.args.n_hid2,
                              num_layers=3,
                              bidirectional=True,
                              batch_first=True)
        self.drop = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

        if bi_awd_layer in ["second"]:
            self.project = nn.Linear(2560, 300, bias=False)
        elif bi_awd_layer in ["last"]:
            self.project = nn.Linear(320 * 2, 300, bias=False)

        if self.architecture in ["before", "both"]:
            self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                                  self.args.n_features2 + 300,
                                  hidden_size=self.args.n_hid2,
                                  num_layers=3,
                                  bidirectional=True,
                                  batch_first=True)

        init_weights(self)
        self.init_weights()

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
Exemplo n.º 14
0
    def __init__(self, opt):
        self.device = torch.device('cuda')
        self.opt = opt
        self.G = Generator(self.opt['network_G']).to(self.device)
        util.init_weights(self.G, init_type='kaiming', scale=0.1)
        self.G.train()

        self.log_dict = OrderedDict()

        self.optim_params = [
            v for k, v in self.G.named_parameters() if v.requires_grad
        ]
        self.opt_G = torch.optim.Adam(self.optim_params,
                                      lr=self.opt['train']['lr_G'],
                                      betas=(self.opt['train']['b1_G'],
                                             self.opt['train']['b2_G']))

        self.optimizers = [self.opt_G]
        self.schedulers = [
            lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'],
                                     self.opt['train']['lr_gamma'])
            for optimizer in self.optimizers
        ]
Exemplo n.º 15
0
    def __init__(self,
                 in_size,
                 out_size,
                 is_batchnorm,
                 n=2,
                 ks=3,
                 stride=1,
                 padding=1):
        super(unetConv2, self).__init__()
        self.n = n
        self.ks = ks
        self.stride = stride
        self.padding = padding
        s = stride
        p = padding
        if is_batchnorm:
            for i in range(1, n + 1):
                conv = nn.Sequential(
                    nn.Conv2d(in_size, out_size, ks, s, p),
                    nn.BatchNorm2d(out_size),
                    nn.ReLU(inplace=True),
                )
                setattr(self, 'conv%d' % i, conv)
                in_size = out_size

        else:
            for i in range(1, n + 1):
                conv = nn.Sequential(
                    nn.Conv2d(in_size, out_size, ks, s, p),
                    nn.ReLU(inplace=True),
                )
                setattr(self, 'conv%d' % i, conv)
                in_size = out_size

        # initialise the blocks
        for m in self.children():
            init_weights(m, init_type='kaiming')
Exemplo n.º 16
0
    def __init__(self,
                 in_channels=1,
                 n_classes=2,
                 feature_scale=4,
                 is_deconv=True,
                 is_batchnorm=True):
        super(UNet, self).__init__()
        self.in_channels = in_channels
        self.feature_scale = feature_scale
        self.is_deconv = is_deconv
        self.is_batchnorm = is_batchnorm
        self.n_classes = n_classes

        filters = [64, 128, 256, 512, 1024]
        filters = [int(x / self.feature_scale) for x in filters]

        # downsampling
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.conv1 = unetConv2(self.in_channels, filters[0], self.is_batchnorm)
        self.conv2 = unetConv2(filters[0], filters[1], self.is_batchnorm)
        self.conv3 = unetConv2(filters[1], filters[2], self.is_batchnorm)
        self.conv4 = unetConv2(filters[2], filters[3], self.is_batchnorm)
        self.center = unetConv2(filters[3], filters[4], self.is_batchnorm)
        # upsampling
        self.up_concat4 = unetUp(filters[4], filters[3], self.is_deconv)
        self.up_concat3 = unetUp(filters[3], filters[2], self.is_deconv)
        self.up_concat2 = unetUp(filters[2], filters[1], self.is_deconv)
        self.up_concat1 = unetUp(filters[1], filters[0], self.is_deconv)
        # final conv (without any concat)
        self.final = nn.Conv2d(filters[0], n_classes, 1)

        # initialise weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init_weights(m, init_type='kaiming')
            elif isinstance(m, nn.BatchNorm2d):
                init_weights(m, init_type='kaiming')
Exemplo n.º 17
0
    def __init__(self, args, bi_awd_layer, project_size=None):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.project_size = project_size
        self.drop = nn.Dropout(args.hid_dropout)

        if project_size is not None and bi_awd_layer in ["first", "second"]:
            self.project = nn.Linear(2 * 1280, project_size, bias=False)
        elif project_size is not None and bi_awd_layer in ["last"]:
            self.project = nn.Linear(2 * 320, project_size, bias=False)

        if project_size is not None:
            self.lstm = nn.LSTM(project_size,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["first", "second"]:
            self.lstm = nn.LSTM(2 * 1280,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["last"]:
            self.lstm = nn.LSTM(2 * 320,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)

        init_weights(self)

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
Exemplo n.º 18
0
    def __init__(self, encoder_dim, grid_dims, Generate1_dims, Generate2_dims):
        super(GeneratorVanilla, self).__init__()

        self.encoder = SegNet(input_channels=encoder_dim[0],
                              output_channels=encoder_dim[1])
        init_weights(self.encoder, init_type="kaiming")
        self.N = grid_dims[0] * grid_dims[1]
        self.G1 = PointGeneration(Generate1_dims)

        init_weights(self.G1, init_type="xavier")
        self.G2 = PointGeneration(Generate2_dims)
        init_weights(self.G2, init_type="xavier")
        # self.reconstruct = nn.Tanh()

        self.P0 = PointProjection()
        self.P1 = PointProjection()
Exemplo n.º 19
0
    def __init__(self,
                 grid_dims,
                 resgen_width,
                 resgen_depth,
                 resgen_codelength,
                 class_num,
                 MLP_doLastRelu=False,
                 read_view=False,
                 folding_twice=False):

        super(GeneratorVanilla, self).__init__()
        N = grid_dims[0] * grid_dims[1]
        u = (torch.arange(0., grid_dims[0]) / grid_dims[0] - 0.5).repeat(
            grid_dims[1])
        v = (torch.arange(0., grid_dims[1]) / grid_dims[1] - 0.5).expand(
            grid_dims[0], -1).t().reshape(-1)
        t = torch.empty(grid_dims[0] * grid_dims[1], dtype=torch.float)
        t.fill_(0.)

        self.read_view_branch = nn.Linear(2, resgen_codelength)
        self.read_view = read_view
        self.folding_twice = folding_twice

        self.encoder = resnet18(pretrained=False)
        init_weights(self.encoder, init_type="kaiming")
        self.grid = torch.stack((u, v), 1)

        self.N = grid_dims[0] * grid_dims[1]
        self.G1 = GeneratorRes(resgen_width, resgen_codelength)
        init_weights(self.G1, init_type="xavier")
        if self.folding_twice:
            self.G2 = GeneratorRes(resgen_width,
                                   resgen_codelength,
                                   input_dim=3)
            init_weights(self.G2, init_type="xavier")

        self.classifier = nn.Linear(512, class_num)
Exemplo n.º 20
0
def train():
    if config.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    config.saved_path = config.saved_path + '/{0}/'.format(config.dataset_name)
    config.log_path = config.log_path + '/{0}/'.format(config.dataset_name)
    os.makedirs(config.log_path, exist_ok=True)
    os.makedirs(config.saved_path, exist_ok=True)

    training_params = {
        'batch_size': config.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': config.num_workers
    }

    val_params = {
        'batch_size': config.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': config.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    if ("coco" in config.dataset_name):
        DS = CocoDataset
    else:
        DS = PascalVocDataset
    training_set = DS(root_dir=os.path.join(config.data_path,
                                            config.dataset_name),
                      set=config.train_set,
                      img_size=input_sizes[config.compound_coef],
                      anchor_free_mode=config.anchor_free_mode,
                      transform=transforms.Compose([
                          Normalizer(mean=config.mean, std=config.std),
                          Augmenter(),
                          Resizer(input_sizes[config.compound_coef])
                      ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = DS(root_dir=os.path.join(config.data_path, config.dataset_name),
                 set=config.val_set,
                 img_size=input_sizes[config.compound_coef],
                 anchor_free_mode=config.anchor_free_mode,
                 transform=transforms.Compose([
                     Normalizer(mean=config.mean, std=config.std),
                     Resizer(input_sizes[config.compound_coef])
                 ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(config.obj_list),
                                 compound_coef=config.compound_coef,
                                 load_weights=False,
                                 anchor_free_mode=config.anchor_free_mode,
                                 ratios=eval(config.anchors_ratios),
                                 scales=eval(config.anchors_scales))

    init_weights(model)
    last_step = 0
    # load last weights
    if config.load_weights:
        # 首先使用init_weights来初始化网络参数,然后再restore,
        # 使得网络中未restore的参数可以正常初始化

        if config.pret_weight_path.endswith('.pth'):
            weights_path = config.pret_weight_path
        try:
            model_dict = torch.load(weights_path)
            new_dict = {}
            for k, v in model_dict.items():
                if 'header' not in k:
                    new_dict[k] = v
            ret = model.load_state_dict(new_dict, strict=False)
        except RuntimeError as e:
            print('[Warning] Ignoring {0}'.format(e))
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print('[Info] loaded pretrained weights: {0},'.format(weights_path))

    if config.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if config.num_gpus > 1 and config.batch_size // config.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        config.log_path +
        '/{0}/'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=config.debug)

    if config.num_gpus > 0:
        model = model.cuda()
        if config.num_gpus > 1:
            model = CustomDataParallel(model, config.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if config.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), config.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    config.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=config.patience,
        verbose=True,
        factor=config.factor,
        min_lr=config.min_lr)

    epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(config.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            for iter, data in enumerate(training_generator):
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if config.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=config.obj_list)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    print(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, config.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % config.val_interval == 0 and epoch > config.start_interval:

                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if config.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=config.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, config.num_epochs, cls_loss, reg_loss,
                            loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                save_checkpoint(
                    model, 'efficientdet-d{0}_{1}_{2}.pth'.format(
                        config.compound_coef, epoch, step))

                model.train()

    except KeyboardInterrupt:
        save_checkpoint(
            model,
            'efficientdet-d{0}_{1}_{2}.pth'.format(config.compound_coef, epoch,
                                                   step))
        writer.close()
    writer.close()
Exemplo n.º 21
0
def main(args, config):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    video_name = os.path.basename(args.video_path)[:-4]

    if args.output_path is not None:
        outvid = cv2.VideoWriter(args.output_path,
                                 cv2.VideoWriter_fourcc(*'mp4v'), 10,
                                 (frame_width, frame_height))
    else:
        outvid = None

    if not os.path.exists(args.saved_path):
        os.mkdir(args.saved_path)
    if not os.path.exists(args.saved_path + '/{}'.format(video_name)):
        os.mkdir(args.saved_path + '/{}'.format(video_name))

    val_transforms = get_augmentation(config, types='val')
    retransforms = Compose([
        Denormalize(box_transform=False),
        ToPILImage(),
        Resize(size=(frame_width, frame_height))
    ])
    idx_classes = {idx: i for idx, i in enumerate(config.obj_list)}
    NUM_CLASSES = len(config.obj_list)
    net = EfficientDetBackbone(num_classes=NUM_CLASSES,
                               compound_coef=args.c,
                               ratios=eval(config.anchors_ratios),
                               scales=eval(config.anchors_scales))

    model = Detector(n_classes=NUM_CLASSES,
                     model=net,
                     criterion=FocalLoss(),
                     optimizer=torch.optim.Adam,
                     optim_params={'lr': 0.1},
                     device=device)
    model.eval()

    if args.weight is not None:
        load_checkpoint(model, args.weight)
    else:
        print('[Info] initialize weights')
        init_weights(model.model)

    # Start detecting
    vidcap = cv2.VideoCapture(args.video_path)
    obj_track = {}

    frame_idx = 0
    with tqdm(total=args.frame_end) as pbar:
        while (vidcap.isOpened()):
            while frame_idx < args.frame_start:
                success, frame = vidcap.read()

            ims = []
            im_shows = []
            for b in range(args.batch_size):
                success, frame_ = vidcap.read()
                if not success:
                    return

                frame = cv2.cvtColor(frame_, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)
                ims.append(val_transforms(frame))
                im_shows.append(frame_)

            with torch.no_grad():
                batch = {
                    'imgs': torch.stack([i['img'] for i in ims]).to(device)
                }
                outs = model.inference_step(batch, args.min_conf, args.min_iou)
                try:
                    outs = postprocessing(outs, batch['imgs'].cpu()[0],
                                          retransforms)
                except:
                    pass
                for idx, out in enumerate(outs):
                    bbox_xyxy, cls_conf, cls_ids = out['bboxes'], out[
                        'scores'], out['classes']

                    bbox_xyxy = bbox_xyxy.astype(np.int)
                    out_dict = {
                        'bboxes': bbox_xyxy.tolist(),
                        'classes': cls_ids.tolist(),
                        'scores': cls_conf.tolist()
                    }
                    with open(
                            args.saved_path +
                            '/{}/{}'.format(video_name,
                                            str(frame_idx).zfill(5) + '.json'),
                            'w') as f:
                        json.dump(out_dict, f)
                    frame_idx += 1
                display_img(outs,
                            im_shows,
                            imshow=False,
                            outvid=outvid,
                            obj_list=idx_classes)
            pbar.update(args.batch_size)
Exemplo n.º 22
0
def train_cls(opt, cfg):
    training_params = {
        'batch_size': cfg.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': cfg.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [224, 240, 260, 300, 380, 456, 528, 600]

    # training_set = CocoDataset(
    #     # root_dir=os.path.join(opt.data_path, params.project_name),
    #     root_dir=opt.data_path,
    #     set=params.train_set,
    #     transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
    #                                   # AdvProp(),
    #                                   Augmenter(),
    #                                   Resizer(input_sizes[cfg.compound_coef])]))

    training_set = DataGenerator(data_path=os.path.join(
        opt.data_path, 'Train', 'OriginImage'),
                                 class_ids=cfg.dictionary_class_name.keys(),
                                 transform=transforms.Compose([
                                     Augmenter(),
                                     Normalizer(mean=cfg.mean, std=cfg.std),
                                     Resizer(input_sizes[cfg.compound_coef])
                                 ]))
    training_generator = DataLoader(training_set, **training_params)

    # val_set = CocoDataset(
    #     # root_dir=os.path.join(opt.data_path, params.project_name),
    #     root_dir=opt.data_path,
    #     set=params.val_set,
    #     transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
    #                                   Resizer(input_sizes[cfg.compound_coef])]))

    val_set = DataGenerator(
        # root_dir=os.path.join(opt.data_path, params.project_name),
        data_path=os.path.join(opt.data_path, 'Validation'),
        class_ids=cfg.dictionary_class_name.keys(),
        transform=transforms.Compose([
            Normalizer(mean=cfg.mean, std=cfg.std),
            Resizer(input_sizes[cfg.compound_coef])
        ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EffNet.from_name(
        f'efficientnet-b{cfg.compound_coef}',
        override_params={'num_classes': len(cfg.dictionary_class_name.keys())})

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
            print(ret)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, '
                'this might be because you load a pretrained weights with different number of classes. '
                'The rest of the weights should be loaded already.')

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if cfg.training_layer.lower() == 'heads':

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if cfg.num_gpus > 1 and cfg.batch_size // cfg.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = EfficientNetWrapper(model)

    if cfg.num_gpus > 0:
        model = model.cuda()
        if cfg.num_gpus > 1:
            model = CustomDataParallel(model, cfg.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if cfg.optimizer.lower() == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), cfg.learning_rate)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    cfg.learning_rate,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    # Setup complete, then start training
    now = datetime.datetime.now()
    opt.saved_path = opt.saved_path + f'/trainlogs_{now.strftime("%Y%m%d_%H%M%S")}'
    if opt.log_path is None:
        opt.log_path = opt.saved_path
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    # Write history
    if 'backlog' not in opt.config:
        with open(
                os.path.join(opt.saved_path,
                             f'{now.strftime("%Y%m%d%H%M%S")}.backlog.json'),
                'w') as f:
            backlog = dict(cfg.to_pascal_case())
            backlog['__metadata__'] = 'Backlog at ' + now.strftime(
                "%Y/%m/%d %H:%M:%S")
            json.dump(backlog, f)
    else:
        with open(
                os.path.join(opt.saved_path,
                             f'{now.strftime("%Y%m%d%H%M%S")}.history.json'),
                'w') as f:
            history = dict(cfg.to_pascal_case())
            history['__metadata__'] = now.strftime("%Y/%m/%d %H:%M:%S")
            json.dump(history, f)

    writer = SummaryWriter(opt.log_path + f'/tensorboard')

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(cfg.no_epochs):
            # metrics
            correct_preds = 0.

            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.set_description(
                        f'Skip {iter} < {step} - {last_epoch} * {num_iter_per_epoch}'
                    )
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    # if params.num_gpus == 1:
                    #     # if only one gpu, just send it to cuda:0
                    #     # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                    imgs = imgs.cuda()
                    annot = annot.cuda()

                    optimizer.zero_grad()
                    logits, loss = model(imgs, annot)
                    loss = loss.mean()

                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()
                    epoch_loss.append(float(loss))

                    _, preds = torch.max(logits, dim=1)
                    correct_preds += torch.sum(preds == annot)
                    acc = correct_preds / (
                        (step % num_iter_per_epoch + 1) * cfg.batch_size)

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. '
                        'Loss: {:.5f}. Accuracy: {:.5f}.'.format(
                            step, epoch, cfg.no_epochs, iter + 1,
                            num_iter_per_epoch, float(loss), float(acc)))
                    writer.add_scalars('Loss', {'train': float(loss)}, step)
                    writer.add_scalars('Accuracy', {'train': float(acc)}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue

            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                correct_preds = 0.
                fusion_matrix = torch.zeros(
                    len(cfg.dictionary_class_name),
                    len(cfg.dictionary_class_name)).cuda()
                model.eval()
                val_losses = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        # if params.num_gpus == 1:
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                        logits, loss = model(imgs, annot)
                        loss = loss.mean()

                        _, preds = torch.max(logits, dim=1)
                        correct_preds += torch.sum(preds == annot)

                        # Update matrix
                        for i, j in zip(preds, annot):
                            fusion_matrix[i, j] += 1

                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        val_losses.append(loss.item())

                val_loss = np.mean(val_losses)
                val_acc = float(correct_preds) / (len(val_generator) *
                                                  cfg.batch_size)

                progress_bar.set_description(
                    'Val. Epoch: {}/{}. Loss: {:1.5f}. Accuracy: {:1.5f}. '.
                    format(epoch, cfg.no_epochs, val_loss.item(), val_acc))

                # Calculate predictions and recalls
                preds_total = torch.sum(fusion_matrix, dim=1)
                recall_total = torch.sum(fusion_matrix, dim=0)
                predictions = {
                    l:
                    float(fusion_matrix[i, i]) / max(1, preds_total[i].item())
                    for l, i in val_set.classes.items()
                }
                recalls = {
                    l:
                    float(fusion_matrix[i, i]) / max(1, recall_total[i].item())
                    for l, i in val_set.classes.items()
                }

                writer.add_scalars('Loss', {'val': val_loss}, step)
                writer.add_scalars('Accuracy', {'val': val_acc}, step)
                writer.add_scalars('Predictions', predictions, step)
                writer.add_scalars('Recalls', recalls, step)

                print(fusion_matrix)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                save_checkpoint(
                    model,
                    f"{opt.saved_path}/cls_b{cfg.compound_coef}_{epoch}_{step}.pth"
                )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
        print(
            f'[Info] Finished training. Best loss achieved {best_loss} at epoch {best_epoch}.'
        )
    except KeyboardInterrupt:
        save_checkpoint(
            model,
            f"{opt.saved_path}/cls_b{cfg.compound_coef}_{epoch}_{step}.pth")
        writer.close()
    writer.close()
Exemplo n.º 23
0
print(config)
patch_size = config.patch_size.split(',')
config.patch_size = (int(patch_size[0]), int(patch_size[1]))

cuda = config.cuda
if cuda and not torch.cuda.is_available():
    raise Exception("No GPU found, please run without --cuda")

if cuda:
    torch.cuda.manual_seed(KWAI_SEED)

cudnn.benchmark = True
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batchSize = config.batchSize
cls_net = import_module('models.resnet').get_model()
init_weights(cls_net, 'normal')
print(cls_net)

dataIter = ClsData(config.data_root + "train/trainPair2.txt",
                   data_name=['data_y', 'data_uv'],
                   label_name=['label_y', 'label_uv', 'label_cls'],
                   patch_size=config.patch_size,
                   frames=config.N,
                   scale=config.scale,
                   isRotate=config.rotate)

testIter = ClsData(config.data_root + "train/testPair2.txt",
                   data_name=['data_y', 'data_uv'],
                   label_name=['label_y', 'label_uv', 'label_cls'],
                   patch_size=config.patch_size,
                   frames=config.N,
Exemplo n.º 24
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if opt.project == "vcoco":
        num_obj_class = 90
        num_union_action = 25
        num_inst_action = 51
    else:
        assert opt.project == "hico-det"
        num_obj_class = 90
        num_union_action = 117
        num_inst_action = 234

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers,
        'pin_memory': False
    }

    val_params = {
        'batch_size': opt.batch_size * 2,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers,
        'pin_memory': False
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    train_transform = transforms.Compose([
        Normalizer(mean=params.mean, std=params.std),
        Augmenter(),
        Resizer(input_sizes[opt.compound_coef])
    ])
    val_transform = transforms.Compose([
        Normalizer(mean=params.mean, std=params.std),
        Resizer(input_sizes[opt.compound_coef])
    ])

    if opt.project == "vcoco":
        training_set = VCOCO_Dataset(root_dir="./datasets/vcoco",
                                     set=params.train_set,
                                     color_prob=1,
                                     transform=train_transform)
        val_set = VCOCO_Dataset(root_dir="./datasets/vcoco",
                                set=params.val_set,
                                transform=val_transform)
    else:
        training_set = HICO_DET_Dataset(root_dir="datasets/hico_20160224_det",
                                        set="train",
                                        color_prob=1,
                                        transform=train_transform)
        val_set = HICO_DET_Dataset(root_dir="datasets/hico_20160224_det",
                                   set="test",
                                   transform=val_transform)

    training_generator = DataLoader(training_set, **training_params)

    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=num_obj_class,
                                 num_union_classes=num_union_action,
                                 num_inst_classes=num_inst_action,
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    model.train()
    print("num_classes:", num_obj_class)
    print("num_union_classes:", num_union_action)
    print("instance_action_list", num_inst_action)
    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
            # last_epoch = int(os.path.basename(weights_path).split('_')[-2].split('.')[0]) + 1
            # last_step = last_epoch * len(training_generator)
        except:
            last_step = 0

        try:
            init_weights(model)
            print(weights_path)
            model_dict = model.state_dict()
            pretrained_dict = torch.load(weights_path,
                                         map_location=torch.device('cpu'))
            new_pretrained_dict = {}
            for k, v in pretrained_dict.items():
                if k in model_dict:
                    new_pretrained_dict[k] = v
                elif ("instance_branch.object_" + k) in model_dict:
                    new_pretrained_dict["instance_branch.object_" + k] = v
                    # print("instance_branch.object_"+k)
            ret = model.load_state_dict(new_pretrained_dict, strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:
        model.apply(freeze_backbone)
        freeze_bn_backbone(model)
        print('[Info] freezed backbone')

    if opt.freeze_object_detection:
        freeze_object_detection(model)
        freeze_bn_object_detection(model)
        # model.apply(freeze_object_detection)
        print('[Info] freezed object detection branch')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 8:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, dataset=opt.project, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)
                if opt.head_only:
                    print('[Info] freezed SyncBN backbone')
                    freeze_bn_backbone(model.module.model)
                if opt.freeze_object_detection:
                    print('[Info] freezed SyncBN object detection')
                    freeze_bn_object_detection(model.module.model)

    if opt.optim == 'adamw':
        # optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
        optimizer = torch.optim.AdamW(
            filter(lambda p: p.requires_grad, model.parameters()), opt.lr)
    elif opt.optim == "adam":
        # optimizer = torch.optim.Adam(model.parameters(), opt.lr)
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), opt.lr)
    else:
        # optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True)
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                           model.parameters()),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=2,
                                                           verbose=True,
                                                           min_lr=1e-7)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)

    num_iter_per_epoch = (len(training_generator) + opt.accumulate_batch -
                          1) // opt.accumulate_batch

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch + 1
            if epoch < last_epoch:
                continue

            if epoch in [120, 130]:
                optimizer.param_groups[0][
                    'lr'] = optimizer.param_groups[0]['lr'] / 10

            epoch_loss = []
            for iter, data in enumerate(training_generator):
                try:
                    imgs = data['img']
                    annot = data['annot']
                    # torch.cuda.empty_cache()
                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        for key in annot:
                            annot[key] = annot[key].cuda()

                    union_act_cls_loss, union_sub_reg_loss, union_obj_reg_loss, union_diff_reg_loss, \
                    inst_act_cls_loss, inst_obj_cls_loss, inst_obj_reg_loss = model(imgs, annot["instance"], annot["interaction"])

                    union_act_cls_loss = union_act_cls_loss.mean()
                    union_sub_reg_loss = union_sub_reg_loss.mean()
                    union_obj_reg_loss = union_obj_reg_loss.mean()
                    union_diff_reg_loss = union_diff_reg_loss.mean()

                    inst_act_cls_loss = inst_act_cls_loss.mean()
                    inst_obj_cls_loss = inst_obj_cls_loss.mean()
                    inst_obj_reg_loss = inst_obj_reg_loss.mean()

                    union_loss = union_act_cls_loss + union_sub_reg_loss + union_obj_reg_loss + union_diff_reg_loss
                    instance_loss = inst_act_cls_loss + inst_obj_cls_loss + inst_obj_reg_loss

                    loss = union_loss + inst_act_cls_loss

                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    batch_loss = loss / opt.accumulate_batch
                    batch_loss.backward()
                    if (iter + 1) % opt.accumulate_batch == 0 or iter == len(
                            training_generator) - 1:
                        optimizer.step()
                        optimizer.zero_grad()
                        step += 1

                    loss = loss.item()
                    union_loss = union_loss.item()
                    instance_loss = instance_loss.item()

                    epoch_loss.append(float(loss))
                    current_lr = optimizer.param_groups[0]['lr']

                    if step % opt.log_interval == 0:
                        writer.add_scalars('Union Action Classification Loss',
                                           {'train': union_act_cls_loss}, step)
                        writer.add_scalars('Union Subject Regression Loss',
                                           {'train': union_sub_reg_loss}, step)
                        writer.add_scalars('Union Object Regression Loss',
                                           {'train': union_obj_reg_loss}, step)
                        writer.add_scalars('Union Diff Regression Loss',
                                           {'train': union_diff_reg_loss},
                                           step)

                        writer.add_scalars(
                            'Instance Action Classification Loss',
                            {'train': inst_act_cls_loss}, step)
                        writer.add_scalars(
                            'Instance Object Classification Loss',
                            {'train': inst_obj_cls_loss}, step)
                        writer.add_scalars('Instance Regression Loss',
                                           {'train': inst_obj_reg_loss}, step)

                        writer.add_scalars('Total Loss', {'train': loss}, step)
                        writer.add_scalars('Union Loss', {'train': union_loss},
                                           step)
                        writer.add_scalars('Instance Loss',
                                           {'train': instance_loss}, step)

                        # log learning_rate
                        writer.add_scalar('learning_rate', current_lr, step)

                    if iter % 20 == 0:
                        print(
                            'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Union loss: {:.5f}. Instance loss: {:.5f}.  '
                            ' Total loss: {:.5f}. Learning rate: {:.5f}'.
                            format(step, epoch, opt.num_epochs,
                                   (iter + 1) // opt.accumulate_batch,
                                   num_iter_per_epoch, union_loss,
                                   instance_loss, loss, current_lr))

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue

            # scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                # model.eval()

                union_loss_ls = []
                instance_loss_ls = []

                union_act_cls_loss_ls = []
                union_obj_cls_loss_ls = []
                union_act_reg_loss_ls = []

                union_sub_reg_loss_ls = []
                union_obj_reg_loss_ls = []
                union_diff_reg_loss_ls = []

                inst_act_cls_loss_ls = []
                inst_obj_cls_loss_ls = []
                inst_obj_reg_loss_ls = []

                val_loss = []
                for iter, data in enumerate(val_generator):
                    if (iter + 1) % 50 == 0:
                        print("%d/%d" % (iter + 1, len(val_generator)))
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']
                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            for key in annot:
                                annot[key] = annot[key].cuda()

                        union_act_cls_loss, union_sub_reg_loss, union_obj_reg_loss, union_diff_reg_loss, \
                        inst_act_cls_loss, inst_obj_cls_loss, inst_obj_reg_loss = model(imgs, annot["instance"], annot["interaction"])

                        union_act_cls_loss = union_act_cls_loss.mean()
                        union_sub_reg_loss = union_sub_reg_loss.mean()
                        union_obj_reg_loss = union_obj_reg_loss.mean()
                        union_diff_reg_loss = union_diff_reg_loss.mean()

                        inst_act_cls_loss = inst_act_cls_loss.mean()
                        inst_obj_cls_loss = inst_obj_cls_loss.mean()
                        inst_obj_reg_loss = inst_obj_reg_loss.mean()

                        union_loss = union_act_cls_loss + union_sub_reg_loss + union_obj_reg_loss + union_diff_reg_loss
                        instance_loss = inst_act_cls_loss + inst_obj_cls_loss + inst_obj_reg_loss

                        loss = union_loss + inst_act_cls_loss

                        if loss == 0 or not torch.isfinite(loss):
                            continue
                        val_loss.append(loss.item())

                        union_act_cls_loss_ls.append(union_act_cls_loss.item())
                        union_sub_reg_loss_ls.append(union_sub_reg_loss.item())
                        union_obj_reg_loss_ls.append(union_obj_reg_loss.item())
                        union_diff_reg_loss_ls.append(
                            union_diff_reg_loss.item())
                        # union_obj_cls_loss_ls.append(union_obj_cls_loss.item())
                        # union_act_reg_loss_ls.append(union_act_reg_loss.item())

                        inst_act_cls_loss_ls.append(inst_act_cls_loss.item())
                        inst_obj_cls_loss_ls.append(inst_obj_cls_loss.item())
                        inst_obj_reg_loss_ls.append(inst_obj_reg_loss.item())

                        union_loss_ls.append(union_loss.item())
                        instance_loss_ls.append(instance_loss.item())

                union_loss = np.mean(union_loss_ls)
                instance_loss = np.mean(instance_loss_ls)

                union_act_cls_loss = np.mean(union_act_cls_loss_ls)
                union_sub_reg_loss = np.mean(union_sub_reg_loss_ls)
                union_obj_reg_loss = np.mean(union_obj_reg_loss_ls)
                union_diff_reg_loss = np.mean(union_diff_reg_loss_ls)

                inst_act_cls_loss = np.mean(inst_act_cls_loss_ls)
                inst_obj_cls_loss = np.mean(inst_obj_cls_loss_ls)
                inst_obj_reg_loss = np.mean(inst_obj_reg_loss_ls)

                loss = union_loss + inst_act_cls_loss

                print(
                    'Val. Epoch: {}/{}. Union loss: {:1.5f}. Instance loss: {:1.5f}. '
                    'Total loss: {:1.5f}'.format(epoch, opt.num_epochs,
                                                 union_loss, instance_loss,
                                                 loss))

                writer.add_scalars('Union Action Classification Loss',
                                   {'val': union_act_cls_loss}, step)
                writer.add_scalars('Union Subject Regression Loss',
                                   {'val': union_sub_reg_loss}, step)
                writer.add_scalars('Union Object Regression Loss',
                                   {'val': union_obj_reg_loss}, step)
                writer.add_scalars('Union Diff Regression Loss',
                                   {'val': union_diff_reg_loss}, step)

                writer.add_scalars('Instance Action Classification Loss',
                                   {'val': inst_act_cls_loss}, step)
                writer.add_scalars('Instance Object Classification Loss',
                                   {'val': inst_obj_cls_loss}, step)
                writer.add_scalars('Instance Regression Loss',
                                   {'val': inst_obj_reg_loss}, step)

                writer.add_scalars('Total Loss', {'val': loss}, step)
                writer.add_scalars('Union Loss', {'val': union_loss}, step)
                writer.add_scalars('Instance Loss', {'val': instance_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                # model.train()

            # scheduler.step()

                scheduler.step(np.mean(val_loss))
                if optimizer.param_groups[0]['lr'] < opt.lr / 100:
                    break
                # Early stopping
                # if epoch - best_epoch > opt.es_patience > 0:
                #     print('[Info] Stop training at epoch {}. The lowest loss achieved is {}'.format(epoch, loss))
                #     break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Exemplo n.º 25
0
def iae(x_train, y_train, class_idx, restore, args):
    """ l2loss
    :param x_train:
    :param y_train:
    :param class_idx:
    :param restore:
    :param args:
    :return:
    """
    device = torch.device("cuda:" +
                          args.gpu_id if torch.cuda.is_available() else "cpu")
    transform_train = transforms.Compose([
        transforms.ToTensor(),
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])
    if args.dataset == 'mnist' or args.dataset == 'fashion-mnist':
        print("Not using data augmentation")
    elif args.augmentation == 1:
        print("Using data augmentation")
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])

    n_channels = x_train.shape[get_channels_axis()]

    class_name = get_class_name_from_index(class_idx, args.dataset)

    model = CAE(in_channels=n_channels)

    model = model.to(device)
    init_weights(model, init_type='xavier', init_gain=0.02)

    trainset = trainset_pytorch(train_data=x_train,
                                train_labels=y_train,
                                transform=transform_train)
    testset = trainset_pytorch(train_data=x_train,
                               train_labels=y_train,
                               transform=transform_test)

    trainloader = data.DataLoader(trainset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  drop_last=True)
    testloader = data.DataLoader(testset,
                                 batch_size=args.batch_size,
                                 shuffle=False)

    # training
    if not restore:
        train_iae(trainloader, model, class_name, testloader, y_train, device,
                  args)
        if args.save_model == 1:
            model_file_name = '{}_iae-{}_{}.model.npz'.format(
                args.dataset, args.ratio, class_name)
            model_path = os.path.join(RESULTS_DIR, args.dataset)
            save_model(model, model_path, model_file_name)
    else:
        print("restore model from: {}".format(restore))
        model.load_state_dict(torch.load(restore))

    # testing
    reps, losses = test(testloader, model, class_name, args, device, epoch=-1)

    # AUROC based on reconstruction losses
    losses = losses - losses.min()
    losses = losses / (1e-8 + losses.max())
    scores = 1 - losses  # normal: label=1, score near 1, loss near 0

    res_file_name = '{}_iae_rec-{}_{}_{}.npz'.format(
        args.dataset, args.ratio, class_name,
        datetime.now().strftime('%Y-%m-%d-%H%M'))
    res_file_path = os.path.join(RESULTS_DIR, args.dataset, res_file_name)
    os.makedirs(os.path.join(RESULTS_DIR, args.dataset), exist_ok=True)
    auc_roc_rec = roc_auc_score(y_train, scores)
    print('testing result: auc_rec: {:.4f}'.format(auc_roc_rec))
    save_roc_pr_curve_data(scores, y_train, res_file_path)

    # DEC based on reconstruction losses
    centroid = torch.mean(reps, dim=0, keepdim=True)
    _, p = dec_loss_fun(reps, centroid)
    score_p = p[:, 0]

    res_file_name = '{}_iae_dec-{}_{}_{}.npz'.format(
        args.dataset, args.ratio, class_name,
        datetime.now().strftime('%Y-%m-%d-%H%M'))
    res_file_path = os.path.join(RESULTS_DIR, args.dataset, res_file_name)
    os.makedirs(os.path.join(RESULTS_DIR, args.dataset), exist_ok=True)
    auc_roc_dec = roc_auc_score(y_train, score_p)
    print('testing result: auc_dec: {:.4f}'.format(auc_roc_dec))
    save_roc_pr_curve_data(score_p, y_train, res_file_path)
Exemplo n.º 26
0
def train(opt):
    params = Params(f'projects/{opt.project}_crop.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '1-'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    save_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    opt.saved_path = opt.saved_path + f'/{params.project_name}/crop/weights/{save_time}'
    opt.log_path = opt.log_path + f'/{params.project_name}/crop/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)
    print('save_path :', opt.saved_path)
    print('log_path :', opt.log_path)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = Project42Dataset(root_dir=os.path.join(
        opt.data_path, params.project_name, 'crop'),
                                    set=params.train_set,
                                    params=params,
                                    transform=transforms.Compose([
                                        Normalizer(mean=params.mean,
                                                   std=params.std),
                                        Augmenter(),
                                        Resizer(input_sizes[opt.compound_coef])
                                    ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = Project42Dataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name,
                                                     'crop'),
                               set=params.val_set,
                               params=params,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Resizer(input_sizes[opt.compound_coef])
                               ]))
    val_generator = DataLoader(val_set, **val_params)

    # labels
    labels = training_set.labels
    print('label:', labels)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(opt.log_path + f'/{save_time}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    ## train image show
                    # for idx in range(len(imgs)):
                    #     showshow = imgs[idx].numpy()
                    #     print(showshow.shape)
                    #     showshow = showshow.transpose(1, 2, 0)
                    #     a = annot[idx].numpy().reshape(5, )
                    #     img_show = cv2.rectangle(showshow, (a[0],a[1]), (a[2],a[3]), (0, 0, 0), 3)
                    #     cv2.imshow(f'{idx}_{params.obj_list[int(a[4])]}', img_show)
                    #     cv2.waitKey(1000)
                    #     cv2.destroyAllWindows()

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss, regression, classification, anchors = model(
                        imgs, annot, obj_list=params.obj_list)

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    # loss
                    epoch_loss.append(float(loss))

                    # mAP
                    threshold = 0.2
                    iou_threshold = 0.2

                    regressBoxes = BBoxTransform()
                    clipBoxes = ClipBoxes()

                    out = postprocess(imgs, anchors, regression,
                                      classification, regressBoxes, clipBoxes,
                                      threshold, iou_threshold)

                    mAP = mAP_score(annot, out, labels)
                    mAP = mAP.results['mAP']

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}. mAP: {:.2f}'
                        .format(step, epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item(), mAP))

                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)
                    writer.add_scalars('mAP', {'train': mAP}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []

                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss, regression, classification, anchors = model(
                            imgs, annot, obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                # mAP
                threshold = 0.2
                iou_threshold = 0.2

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                out = postprocess(imgs, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)

                mAP = mAP_score(annot, out, labels)
                mAP = mAP.results['mAP']

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}. mAP: {:.2f}'
                    .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                            loss, mAP))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)
                writer.add_scalars('mAP', {'val': mAP}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Exemplo n.º 27
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    training_set = CocoDataset(root_dir=opt.data_path + params.project_name,
                               set=params.train_set,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Augmenter(),
                                   Resizer(input_sizes[opt.compound_coef])
                               ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=opt.data_path + params.project_name,
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[opt.compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_anchors=9,
                                 num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef)

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0
        model.load_state_dict(torch.load(weights_path))
        print(
            f'loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    if params.num_gpus > 0:
        model = model.cuda()
        model = CustomDataParallel(model, params.num_gpus)

    optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    criterion = FocalLoss()

    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        try:
            model.train()
            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus > 0:
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    _, regression, classification, anchors = model(imgs)

                    cls_loss, reg_loss = criterion(
                        classification,
                        regression,
                        anchors,
                        annot,
                        # imgs=imgs, obj_list=params.obj_list  # uncomment this to debug
                    )

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                except Exception as e:
                    print(traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if step % opt.save_interval == 0 and step > 0:
                save_checkpoint(
                    model,
                    f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus > 0:
                            annot = annot.cuda()
                        _, regression, classification, anchors = model(imgs)
                        cls_loss, reg_loss = criterion(classification,
                                                       regression, anchors,
                                                       annot)

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                            loss.mean()))
                writer.add_scalars('Total_loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                    # onnx export is not tested.
                    # dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                    # if torch.cuda.is_available():
                    #     dummy_input = dummy_input.cuda()
                    # if isinstance(model, nn.DataParallel):
                    #     model.module.backbone_net.model.set_swish(memory_efficient=False)
                    #
                    #     torch.onnx.export(model.module, dummy_input,
                    #                       os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'),
                    #                       verbose=False)
                    #     model.module.backbone_net.model.set_swish(memory_efficient=True)
                    # else:
                    #     model.backbone_net.model.set_swish(memory_efficient=False)
                    #
                    #     torch.onnx.export(model, dummy_input,
                    #                       os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'),
                    #                       verbose=False)
                    #     model.backbone_net.model.set_swish(memory_efficient=True)

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        'Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, loss))
                    break
            writer.close()
        except KeyboardInterrupt:
            save_checkpoint(
                model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    # Neptune staff
    all_params = opt.__dict__
    all_params.update(params.params)

    data_path = os.path.join(opt.data_path, params.project_name)

    tags = [
        'EfficientDet', f'D{opt.compound_coef}', f'bs{opt.batch_size}',
        opt.optim
    ]
    if opt.head_only:
        tags.append('head_only')

    if len(params.obj_list) == 1:
        tags.append('one_class')

    if opt.no_aug:
        tags.append('no_aug')

    neptune.create_experiment(name='EfficientDet',
                              tags=tags,
                              params=all_params,
                              upload_source_files=['train.py', 'coco_eval.py'])
    log_data_version(data_path)

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = os.path.join(opt.saved_path, params.project_name)
    opt.log_path = os.path.join(opt.log_path, params.project_name,
                                'tensorboard/')
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    if opt.no_aug:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Resizer(input_sizes[opt.compound_coef])
        ]
    else:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Augmenter(),
            Resizer(input_sizes[opt.compound_coef])
        ]

    training_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name),
                               set=params.train_set,
                               transform=transforms.Compose(transform_list))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                params.project_name),
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[opt.compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=opt.momentum,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_step = 0
    best_checkpoint = None
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            epoch_cls_loss = []
            epoch_reg_loss = []

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression Loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication Loss', {'val': cls_loss},
                                   step)

                neptune.log_metric('Val Loss', step, loss)
                neptune.log_metric('Val Regression Loss', step, reg_loss)
                neptune.log_metric('Val Classification Loss', step, cls_loss)

                with torch.no_grad():
                    stats = evaluate(model.model,
                                     params.params,
                                     threshold=opt.val_threshold,
                                     step=step)

                neptune.log_metric('AP at IoU=.50:.05:.95', step, stats[0])
                neptune.log_metric('AP at IoU=.50', step, stats[1])
                neptune.log_metric('AP at IoU=.75', step, stats[2])
                neptune.log_metric('AR given 1 detection per image', step,
                                   stats[6])
                neptune.log_metric('AR given 10 detection per image', step,
                                   stats[7])
                neptune.log_metric('AR given 100 detection per image', step,
                                   stats[8])

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch
                    best_step = step
                    checkpoint_name = f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    checkpoint_path = save_checkpoint(model, opt.saved_path,
                                                      checkpoint_name)
                    best_checkpoint = checkpoint_path

                model.train()

            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=params.obj_list,
                                               step=step)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))
                    epoch_cls_loss.append(float(cls_loss))
                    epoch_reg_loss.append(float(reg_loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    neptune.log_metric('Train Loss', step, loss)
                    neptune.log_metric('Train Regression Loss', step, reg_loss)
                    neptune.log_metric('Train Classification Loss', step,
                                       cls_loss)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)
                    neptune.log_metric('Learning Rate', step, current_lr)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model, opt.saved_path,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue

            scheduler.step(np.mean(epoch_loss))
            neptune.log_metric('Epoch Loss', step, np.mean(epoch_loss))
            neptune.log_metric('Epoch Classification Loss', step,
                               np.mean(epoch_cls_loss))
            neptune.log_metric('Epoch Regression Loss', step,
                               np.mean(epoch_reg_loss))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                    .format(epoch, best_loss))
                break

    except KeyboardInterrupt:
        save_checkpoint(
            model, opt.saved_path,
            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        send_best_checkpoint(best_checkpoint, best_step)
        writer.close()
    writer.close()
    send_best_checkpoint(best_checkpoint, best_step)
    neptune.stop()
Exemplo n.º 29
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name),
                               set=params.train_set,
                               phase='train',
                               transforms=get_train_transforms())

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                params.project_name),
                          set=params.val_set,
                          phase='val',
                          transforms=get_valid_transforms())
    training_generator = torch.utils.data.DataLoader(
        training_set,
        batch_size=opt.batch_size,
        sampler=RandomSampler(training_set),
        pin_memory=False,
        drop_last=True,
        num_workers=opt.num_workers,
        collate_fn=collate_fn,
    )
    val_generator = torch.utils.data.DataLoader(
        val_set,
        batch_size=opt.batch_size,
        num_workers=opt.num_workers,
        shuffle=False,
        sampler=SequentialSampler(val_set),
        pin_memory=False,
        collate_fn=collate_fn,
    )

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    accumulation_steps = 32
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, (imgs, annots) in enumerate(progress_bar):
                pass
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = torch.stack(imgs)
                    annot = pad_annots(annots)

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()
                    # print(annot)

                    # optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=params.obj_list)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    if (iter + 1) % (accumulation_steps //
                                     opt.batch_size) == 0:
                        # print('step')
                        optimizer.step()
                        optimizer.zero_grad()
                    # optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, (imgs, annots) in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = torch.stack(imgs)
                        annot = pad_annots(annots)

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
    def start_training(self):
        if self.system_dict["params"]["num_gpus"] == 0:
            os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
        else:
            torch.manual_seed(42)

        self.system_dict["params"]["saved_path"] = self.system_dict["params"][
            "saved_path"] + "/" + self.system_dict["params"][
                "project_name"] + "/"
        self.system_dict["params"]["log_path"] = self.system_dict["params"][
            "log_path"] + "/" + self.system_dict["params"][
                "project_name"] + "/tensorboard/"
        os.makedirs(self.system_dict["params"]["saved_path"], exist_ok=True)
        os.makedirs(self.system_dict["params"]["log_path"], exist_ok=True)

        training_params = {
            'batch_size': self.system_dict["params"]["batch_size"],
            'shuffle': True,
            'drop_last': True,
            'collate_fn': collater,
            'num_workers': self.system_dict["params"]["num_workers"]
        }

        val_params = {
            'batch_size': self.system_dict["params"]["batch_size"],
            'shuffle': False,
            'drop_last': True,
            'collate_fn': collater,
            'num_workers': self.system_dict["params"]["num_workers"]
        }

        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        training_set = CocoDataset(
            self.system_dict["dataset"]["train"]["root_dir"],
            self.system_dict["dataset"]["train"]["coco_dir"],
            self.system_dict["dataset"]["train"]["img_dir"],
            set_dir=self.system_dict["dataset"]["train"]["set_dir"],
            transform=transforms.Compose([
                Normalizer(mean=self.system_dict["params"]["mean"],
                           std=self.system_dict["params"]["std"]),
                Augmenter(),
                Resizer(
                    input_sizes[self.system_dict["params"]["compound_coef"]])
            ]))
        training_generator = DataLoader(training_set, **training_params)

        if (self.system_dict["dataset"]["val"]["status"]):
            val_set = CocoDataset(
                self.system_dict["dataset"]["val"]["root_dir"],
                self.system_dict["dataset"]["val"]["coco_dir"],
                self.system_dict["dataset"]["val"]["img_dir"],
                set_dir=self.system_dict["dataset"]["val"]["set_dir"],
                transform=transforms.Compose([
                    Normalizer(self.system_dict["params"]["mean"],
                               self.system_dict["params"]["std"]),
                    Resizer(input_sizes[self.system_dict["params"]
                                        ["compound_coef"]])
                ]))
            val_generator = DataLoader(val_set, **val_params)

        print("")
        print("")
        model = EfficientDetBackbone(
            num_classes=len(self.system_dict["params"]["obj_list"]),
            compound_coef=self.system_dict["params"]["compound_coef"],
            ratios=eval(self.system_dict["params"]["anchors_ratios"]),
            scales=eval(self.system_dict["params"]["anchors_scales"]))

        os.makedirs("pretrained_weights", exist_ok=True)

        if (self.system_dict["params"]["compound_coef"] == 0):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d0.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 1):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d1.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 2):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d2.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 3):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d3.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 4):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d4.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 5):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d5.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 6):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d6.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 7):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d7.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)

        # load last weights
        if self.system_dict["params"]["load_weights"] is not None:
            if self.system_dict["params"]["load_weights"].endswith('.pth'):
                weights_path = self.system_dict["params"]["load_weights"]
            else:
                weights_path = get_last_weights(
                    self.system_dict["params"]["saved_path"])
            try:
                last_step = int(
                    os.path.basename(weights_path).split('_')[-1].split('.')
                    [0])
            except:
                last_step = 0

            try:
                ret = model.load_state_dict(torch.load(weights_path),
                                            strict=False)
            except RuntimeError as e:
                print(f'[Warning] Ignoring {e}')
                print(
                    '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
                )

            print(
                f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
            )
        else:
            last_step = 0
            print('[Info] initializing weights...')
            init_weights(model)

        print("")
        print("")

        # freeze backbone if train head_only
        if self.system_dict["params"]["head_only"]:

            def freeze_backbone(m):
                classname = m.__class__.__name__
                for ntl in ['EfficientNet', 'BiFPN']:
                    if ntl in classname:
                        for param in m.parameters():
                            param.requires_grad = False

            model.apply(freeze_backbone)
            print('[Info] freezed backbone')

        print("")
        print("")

        if self.system_dict["params"]["num_gpus"] > 1 and self.system_dict[
                "params"]["batch_size"] // self.system_dict["params"][
                    "num_gpus"] < 4:
            model.apply(replace_w_sync_bn)
            use_sync_bn = True
        else:
            use_sync_bn = False

        writer = SummaryWriter(
            self.system_dict["params"]["log_path"] +
            f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

        model = ModelWithLoss(model, debug=self.system_dict["params"]["debug"])

        if self.system_dict["params"]["num_gpus"] > 0:
            model = model.cuda()
            if self.system_dict["params"]["num_gpus"] > 1:
                model = CustomDataParallel(
                    model, self.system_dict["params"]["num_gpus"])
                if use_sync_bn:
                    patch_replication_callback(model)

        if self.system_dict["params"]["optim"] == 'adamw':
            optimizer = torch.optim.AdamW(model.parameters(),
                                          self.system_dict["params"]["lr"])
        else:
            optimizer = torch.optim.SGD(model.parameters(),
                                        self.system_dict["params"]["lr"],
                                        momentum=0.9,
                                        nesterov=True)

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               patience=3,
                                                               verbose=True)

        epoch = 0
        best_loss = 1e5
        best_epoch = 0
        step = max(0, last_step)
        model.train()

        num_iter_per_epoch = len(training_generator)

        try:
            for epoch in range(self.system_dict["params"]["num_epochs"]):
                last_epoch = step // num_iter_per_epoch
                if epoch < last_epoch:
                    continue

                epoch_loss = []
                progress_bar = tqdm(training_generator)
                for iter, data in enumerate(progress_bar):
                    if iter < step - last_epoch * num_iter_per_epoch:
                        progress_bar.update()
                        continue
                    try:
                        imgs = data['img']
                        annot = data['annot']

                        if self.system_dict["params"]["num_gpus"] == 1:
                            # if only one gpu, just send it to cuda:0
                            # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        optimizer.zero_grad()
                        cls_loss, reg_loss = model(
                            imgs,
                            annot,
                            obj_list=self.system_dict["params"]["obj_list"])
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss.backward()
                        # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                        optimizer.step()

                        epoch_loss.append(float(loss))

                        progress_bar.set_description(
                            'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                            .format(step, epoch,
                                    self.system_dict["params"]["num_epochs"],
                                    iter + 1, num_iter_per_epoch,
                                    cls_loss.item(), reg_loss.item(),
                                    loss.item()))
                        writer.add_scalars('Loss', {'train': loss}, step)
                        writer.add_scalars('Regression_loss',
                                           {'train': reg_loss}, step)
                        writer.add_scalars('Classfication_loss',
                                           {'train': cls_loss}, step)

                        # log learning_rate
                        current_lr = optimizer.param_groups[0]['lr']
                        writer.add_scalar('learning_rate', current_lr, step)

                        step += 1

                        if step % self.system_dict["params"][
                                "save_interval"] == 0 and step > 0:
                            self.save_checkpoint(
                                model,
                                f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
                            )
                            #print('checkpoint...')

                    except Exception as e:
                        print('[Error]', traceback.format_exc())
                        print(e)
                        continue
                scheduler.step(np.mean(epoch_loss))

                if epoch % self.system_dict["params"][
                        "val_interval"] == 0 and self.system_dict["dataset"][
                            "val"]["status"]:
                    print("Running validation")
                    model.eval()
                    loss_regression_ls = []
                    loss_classification_ls = []
                    for iter, data in enumerate(val_generator):
                        with torch.no_grad():
                            imgs = data['img']
                            annot = data['annot']

                            if self.system_dict["params"]["num_gpus"] == 1:
                                imgs = imgs.cuda()
                                annot = annot.cuda()

                            cls_loss, reg_loss = model(
                                imgs,
                                annot,
                                obj_list=self.system_dict["params"]
                                ["obj_list"])
                            cls_loss = cls_loss.mean()
                            reg_loss = reg_loss.mean()

                            loss = cls_loss + reg_loss
                            if loss == 0 or not torch.isfinite(loss):
                                continue

                            loss_classification_ls.append(cls_loss.item())
                            loss_regression_ls.append(reg_loss.item())

                    cls_loss = np.mean(loss_classification_ls)
                    reg_loss = np.mean(loss_regression_ls)
                    loss = cls_loss + reg_loss

                    print(
                        'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                        .format(epoch,
                                self.system_dict["params"]["num_epochs"],
                                cls_loss, reg_loss, loss))
                    writer.add_scalars('Loss', {'val': loss}, step)
                    writer.add_scalars('Regression_loss', {'val': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                       step)

                    if loss + self.system_dict["params"][
                            "es_min_delta"] < best_loss:
                        best_loss = loss
                        best_epoch = epoch

                        self.save_checkpoint(
                            model,
                            f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
                        )

                    model.train()

                    # Early stopping
                    if epoch - best_epoch > self.system_dict["params"][
                            "es_patience"] > 0:
                        print(
                            '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                            .format(epoch, best_loss))
                        break
        except KeyboardInterrupt:
            self.save_checkpoint(
                model,
                f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
            )
            writer.close()
        writer.close()

        print("")
        print("")
        print("Training complete")