Пример #1
0
def detect(cfgfile, weightfile, imgfile):
    m = Darknet(cfgfile)
    m.load_state_dict(torch.load(weightfile))

    # m.print_network()
    # m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    num_classes = 20
    if num_classes == 20:
        namesfile = 'data/voc.names'
    elif num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/names'

    use_cuda = 1
    if use_cuda:
        m.cuda()

    input_img = cv2.imread(imgfile)
    orig_img = Image.open(imgfile)

    start = time.time()
    boxes,scale = do_detect(m, input_img, 0.5, 0.4, use_cuda)
    finish = time.time()
    print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))

    class_names = load_class_names(namesfile)
    plot_boxes(orig_img, boxes, 'predictions.jpg', class_names,scale=scale)
Пример #2
0
def load_model(model_config_file, weight_file, frame_size):
    model = Darknet(model_config_file, inference=True)
    checkpoint = torch.load(
        weight_file, map_location=torch.device('cuda'))
    model.load_state_dict(checkpoint['state_dict'])
    
    model.eval()
    model.cuda()
    return model
Пример #3
0
def detect_cv2_camera(cfgfile, weightfile):
    import cv2
    m = Darknet(cfgfile)
    # mot_tracker = Sort()

    m.print_network()
    m.load_weights(weightfile)
    if args.torch:
        m.load_state_dict(torch.load(weightfile))
    else:
        m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_cuda:
        m.cuda()

    # cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture('rtsp://192.168.1.75:8554/mjpeg/1')
    # cap = cv2.VideoCapture("./test.mp4")
    cap.set(3, 1280)
    cap.set(4, 720)
    print("Starting the YOLO loop...")

    num_classes = m.num_classes
    if num_classes == 20:
        namesfile = 'data/voc.names'
    elif num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/x.names'
    class_names = load_class_names(namesfile)

    while True:
        ret, img = cap.read()
        sized = cv2.resize(img, (m.width, m.height))
        sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
        # piling = Image.fromarray(sized)

        start = time.time()
        boxes = do_detect(m, sized, 0.4, 0.6, use_cuda)
        if boxes is not None:
            # tracked_object = mot_tracker.update(tensorQ)
            finish = time.time()
            print('Predicted in %f seconds.' % (finish - start))
            result_img = plot_boxes_cv2(img,
                                        boxes[0],
                                        savename=None,
                                        class_names=class_names)

        cv2.imshow('Yolo demo', result_img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #4
0
def detect_cv2_camera(cfgfile, weightfile):
    import cv2
    m = Darknet(cfgfile)

    m.print_network()
    if args.torch:
        m.load_state_dict(torch.load(weightfile))
    else:
        m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_cuda:
        m.cuda()

    cap = cv2.VideoCapture(0)
    # cap = cv2.VideoCapture("./test.mp4")
    cap.set(3, 1280)
    cap.set(4, 720)
    print("Starting the YOLO loop...")

    num_classes = m.num_classes
    if num_classes == 20:
        namesfile = 'data/voc.names'
    elif num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/x.names'
    class_names = load_class_names(namesfile)

    while True:
        ret, img = cap.read()
        sized = cv2.resize(img, (m.width, m.height))
        sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)

        start = time.time()
        boxes = do_detect(m, sized, 0.4, 0.6, use_cuda)
        finish = time.time()
        print('Predicted in %f seconds.' % (finish - start))

        result_img = plot_boxes_cv2(img,
                                    boxes[0],
                                    savename=None,
                                    class_names=class_names)

        cv2.imshow('Yolo demo', result_img)
        cv2.waitKey(1)

    cap.release()
Пример #5
0
def detect_cv2(cfgfile, weightfile, imgfile):
    import cv2
    m = Darknet(cfgfile)

    m.print_network()
    m.load_weights(weightfile)
    if args.torch:
        m.load_state_dict(torch.load(weightfile))
    else:
        m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_cuda:
        m.cuda()

    num_classes = m.num_classes
    if num_classes == 20:
        namesfile = 'data/voc.names'
    elif num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/x.names'
    class_names = load_class_names(namesfile)

    while True:
        val = input("\n numero da imagem: ")
        pred_init_time = time.time()
        named_file = "../fotos_geladeira_4/opencv_frame_" + val + ".png"
        print(named_file)
        img = cv2.imread(named_file)
        # img = cv2.imread(imgfile)
        sized = cv2.resize(img, (m.width, m.height))
        sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
        for i in range(2):
            start = time.time()
            boxes = do_detect(m, sized, 0.4, 0.6, use_cuda)
            finish = time.time()
            if i == 1:
                print('%s: Predicted in %f seconds.' % (imgfile,
                                                        (finish - start)))

        plot_boxes_cv2(img,
                       boxes[0],
                       savename='predictions.jpg',
                       class_names=class_names)
        count_total_in_image(boxes[0], class_names)
        print("\n Total inference time {0} seconds".format(time.time() -
                                                           pred_init_time))
Пример #6
0
def load_model(opts, frame_size):
    cfg_file_path = opts.model_config_dir + \
        "/yolov4_" + str(frame_size) + ".cfg"
    model = Darknet(cfg_file_path, inference=True)
    weight_file = os.path.join(
        opts.weights_dir, "yolov4_{}.pth".format(frame_size))
    checkpoint = torch.load(
        weight_file, map_location='cuda:{}'.format(opts.gpu_id))
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    if not opts.no_cuda:
        model.cuda(opts.gpu_id)

    # Zero grad for parameters
    for param in model.parameters():
        param.grad = None
    return model
Пример #7
0
def detect(cfgfile, weightfile, imgfile):
    m = Darknet(cfgfile)

    checkpoint = torch.load(weightfile)
    model_dict = m.state_dict()
    pretrained_dict = checkpoint
    keys = []
    for k, v in pretrained_dict.items():
        keys.append(k)
    i = 0
    for k, v in model_dict.items():
        if v.size() == pretrained_dict[keys[i]].size():
            model_dict[k] = pretrained_dict[keys[i]]
            i = i + 1
    m.load_state_dict(model_dict)

    # m.load_state_dict(torch.load(weightfile))

    # m.print_network()
    # m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    namesfile = 'data/mydata.names'

    use_cuda = 1
    if use_cuda:
        m.cuda()

    input_img = cv2.imread(imgfile)
    # orig_img = Image.open(imgfile).convert('RGB')

    start = time.time()
    boxes, scale = do_detect(m, input_img, 0.5, 0.4, use_cuda)
    finish = time.time()
    print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))

    class_names = load_class_names(namesfile)

    # draw_boxes(input_img,boxes,scale=scale)
    plot_boxes_cv2(input_img,
                   boxes,
                   'predictions1.jpg',
                   class_names=class_names,
                   scale=scale)
Пример #8
0
def train(model,
          device,
          config,
          epochs=5,
          batch_size=1,
          save_cp=True,
          log_step=20,
          img_scale=0.5):
    train_dataset = Yolo_dataset(config.train_label, config, train=True)
    val_dataset = Yolo_dataset(config.val_label, config, train=False)

    n_train = len(train_dataset)
    n_val = len(val_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch // config.subdivisions,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True,
                              drop_last=True,
                              collate_fn=collate)

    val_loader = DataLoader(val_dataset,
                            batch_size=config.batch // config.subdivisions,
                            shuffle=True,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=True,
                            collate_fn=val_collate)

    writer = SummaryWriter(
        log_dir=config.TRAIN_TENSORBOARD_DIR,
        filename_suffix=
        f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}',
        comment=
        f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}'
    )
    # writer.add_images('legend',
    #                   torch.from_numpy(train_dataset.label2colorlegend2(cfg.DATA_CLASSES).transpose([2, 0, 1])).to(
    #                       device).unsqueeze(0))
    max_itr = config.TRAIN_EPOCHS * n_train
    # global_step = cfg.TRAIN_MINEPOCH * n_train
    global_step = 0
    logging.info(f'''Starting training:
        Epochs:          {epochs}
        Batch size:      {config.batch}
        Subdivisions:    {config.subdivisions}
        Learning rate:   {config.learning_rate}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images size:     {config.width}
        Optimizer:       {config.TRAIN_OPTIMIZER}
        Dataset classes: {config.classes}
        Train label path:{config.train_label}
        Pretrained:
    ''')

    # learning rate setup
    def burnin_schedule(i):
        if i < config.burn_in:
            factor = pow(i / config.burn_in, 4)
        elif i < config.steps[0]:
            factor = 1.0
        elif i < config.steps[1]:
            factor = 0.1
        else:
            factor = 0.01
        return factor

    if config.TRAIN_OPTIMIZER.lower() == 'adam':
        optimizer = optim.Adam(
            model.parameters(),
            lr=config.learning_rate / config.batch,
            betas=(0.9, 0.999),
            eps=1e-08,
        )
    elif config.TRAIN_OPTIMIZER.lower() == 'sgd':
        optimizer = optim.SGD(
            params=model.parameters(),
            lr=config.learning_rate / config.batch,
            momentum=config.momentum,
            weight_decay=config.decay,
        )
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    criterion = Yolo_loss(device=device,
                          batch=config.batch // config.subdivisions,
                          n_classes=config.classes)
    # scheduler = ReduceLROnPlateau(optimizer, mode='max', verbose=True, patience=6, min_lr=1e-7)
    # scheduler = CosineAnnealingWarmRestarts(optimizer, 0.001, 1e-6, 20)

    save_prefix = 'Yolov4_epoch'
    saved_models = deque()
    model.train()
    for epoch in range(epochs):
        # model.train()
        epoch_loss = 0
        epoch_step = 0

        with tqdm(total=n_train,
                  desc=f'Epoch {epoch + 1}/{epochs}',
                  unit='img',
                  ncols=50) as pbar:
            for i, batch in enumerate(train_loader):
                global_step += 1
                epoch_step += 1
                images = batch[0]
                bboxes = batch[1]

                images = images.to(device=device, dtype=torch.float32)
                bboxes = bboxes.to(device=device)

                bboxes_pred = model(images)
                loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = criterion(
                    bboxes_pred, bboxes)
                # loss = loss / config.subdivisions
                loss.backward()

                epoch_loss += loss.item()

                if global_step % config.subdivisions == 0:
                    optimizer.step()
                    scheduler.step()
                    model.zero_grad()

                if global_step % (log_step * config.subdivisions) == 0:
                    writer.add_scalar('train/Loss', loss.item(), global_step)
                    writer.add_scalar('train/loss_xy', loss_xy.item(),
                                      global_step)
                    writer.add_scalar('train/loss_wh', loss_wh.item(),
                                      global_step)
                    writer.add_scalar('train/loss_obj', loss_obj.item(),
                                      global_step)
                    writer.add_scalar('train/loss_cls', loss_cls.item(),
                                      global_step)
                    writer.add_scalar('train/loss_l2', loss_l2.item(),
                                      global_step)
                    writer.add_scalar('lr',
                                      scheduler.get_lr()[0] * config.batch,
                                      global_step)
                    pbar.set_postfix(
                        **{
                            'loss (batch)': loss.item(),
                            'loss_xy': loss_xy.item(),
                            'loss_wh': loss_wh.item(),
                            'loss_obj': loss_obj.item(),
                            'loss_cls': loss_cls.item(),
                            'loss_l2': loss_l2.item(),
                            'lr': scheduler.get_lr()[0] * config.batch
                        })
                    logging.debug(
                        'Train step_{}: loss : {},loss xy : {},loss wh : {},'
                        'loss obj : {},loss cls : {},loss l2 : {},lr : {}'.
                        format(global_step, loss.item(), loss_xy.item(),
                               loss_wh.item(), loss_obj.item(),
                               loss_cls.item(), loss_l2.item(),
                               scheduler.get_lr()[0] * config.batch))

                pbar.update(images.shape[0])

            if cfg.use_darknet_cfg:
                eval_model = Darknet(cfg.cfgfile, inference=True)
            else:
                eval_model = Yolov4(cfg.pretrained,
                                    n_classes=cfg.classes,
                                    inference=True)
            # eval_model = Yolov4(yolov4conv137weight=None, n_classes=config.classes, inference=True)
            if torch.cuda.device_count() > 1:
                eval_model.load_state_dict(model.module.state_dict())
            else:
                eval_model.load_state_dict(model.state_dict())
            eval_model.to(device)
            evaluator = evaluate(eval_model, val_loader, config, device)
            del eval_model

            stats = evaluator.coco_eval['bbox'].stats
            writer.add_scalar('train/AP', stats[0], global_step)
            writer.add_scalar('train/AP50', stats[1], global_step)
            writer.add_scalar('train/AP75', stats[2], global_step)
            writer.add_scalar('train/AP_small', stats[3], global_step)
            writer.add_scalar('train/AP_medium', stats[4], global_step)
            writer.add_scalar('train/AP_large', stats[5], global_step)
            writer.add_scalar('train/AR1', stats[6], global_step)
            writer.add_scalar('train/AR10', stats[7], global_step)
            writer.add_scalar('train/AR100', stats[8], global_step)
            writer.add_scalar('train/AR_small', stats[9], global_step)
            writer.add_scalar('train/AR_medium', stats[10], global_step)
            writer.add_scalar('train/AR_large', stats[11], global_step)

            if save_cp:
                try:
                    # os.mkdir(config.checkpoints)
                    os.makedirs(config.checkpoints, exist_ok=True)
                    logging.info('Created checkpoint directory')
                except OSError:
                    pass
                save_path = os.path.join(config.checkpoints,
                                         f'{save_prefix}{epoch + 1}.pth')
                torch.save(model.state_dict(), save_path)
                logging.info(f'Checkpoint {epoch + 1} saved !')
                saved_models.append(save_path)
                if len(saved_models) > config.keep_checkpoint_max > 0:
                    model_to_remove = saved_models.popleft()
                    try:
                        os.remove(model_to_remove)
                    except:
                        logging.info(f'failed to remove {model_to_remove}')

    writer.close()
Пример #9
0
    cfg = get_args(**Cfg)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')

    if cfg.use_darknet_cfg:
        model = Darknet(cfg.cfgfile)
        if cfg.pretrained:
            model.load_weights(cfg.pretrained)

    else:
        model = Yolov4(cfg.pretrained, n_classes=cfg.classes)
        if cfg.load:
            pretrained_dict = torch.load(cfg.load,
                                         map_location=torch.device('cuda'))
            model.load_state_dict(pretrained_dict)

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
    model.to(device=device)

    try:
        train(
            model=model,
            config=cfg,
            epochs=cfg.TRAIN_EPOCHS,
            device=device,
        )
    except KeyboardInterrupt:
        torch.save(model.state_dict(), 'INTERRUPTED.pth')
        logging.info('Saved interrupt')
Пример #10
0
def train(model, device, config, epochs=5, batch_size=1, save_cp=True, log_step=20, img_scale=0.5):
    # TODO:加上resume功能,resume需要什么信息?
    # config的所有信息、yolov4-custom.cfg的所有信息,权重,epoch序号,学习率到哪了
    
    
    # 创建dataset
    # config.train_label为data/coins.txt标签文本的路径
    train_dataset = Yolo_dataset(config.train_label, config, train=True)
    val_dataset = Yolo_dataset(config.val_label, config, train=False)

    # 获得dataset的长度
    n_train = len(train_dataset)
    n_val = len(val_dataset)

    # 创建dataloader
    # 当pin_memory=False,num_workers=0(子进程数量为0,即只有主进程)时,正常
    # 当pin_memory=True,num_workers=8时,卡住
    # 当pin_memory=False,num_workers=8时,卡住
    # 当pin_memory=True,num_workers=0时,正常
    # 综上,原因在于num_workers大于0开启多线程导致
    # 经查,dataset加载图片中使用OpenCV,OpenCV某些函数默认也会开多线程,
    # 多线程套多线程,容易导致线程卡住(是否会卡住可能与不同操作系统有关)
    # 解决方法:法一,在dataset的前面import cv2时加上cv2.setNumThreads(0)禁用OpenCV多进程(推荐)
    #          法二,使用PIL加载和预处理图片(不推荐,PIL速度不如OpenCV)
    train_loader = DataLoader(train_dataset, batch_size=config.batch // config.subdivisions, shuffle=True,
                              num_workers=8, pin_memory=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch // config.subdivisions, shuffle=False,
                              num_workers=8, pin_memory=True, drop_last=False, collate_fn=val_collate)
                            
    if config.only_evaluate or config.evaluate_when_train:
        tgtFile = makeTgtJson(val_loader, config.categories)

    writer = SummaryWriter(log_dir=config.TRAIN_TENSORBOARD_DIR,
                           filename_suffix=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}',
                           comment=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}')
    
    # 计算迭代次数的最大值
    max_itr = config.TRAIN_EPOCHS * n_train
    
    # 迭代次数的全局计数器
    global_step = 0

    logging.info(f'''Starting training:
        Epochs:          {epochs}
        Batch size:      {config.batch}
        Subdivisions:    {config.subdivisions}
        Learning rate:   {config.learning_rate}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images size:     {config.width}
        Optimizer:       {config.TRAIN_OPTIMIZER}
        Dataset classes: {config.classes}
        Train label path:{config.train_label}
        Pretrained:      {config.pretrainedWeight is not None or config.Pretrained is not None}
    ''')
    if config.only_evaluate:
        if config.use_darknet_cfg:
            eval_model = Darknet(config.cfgfile)
        else:
            raise NotImplementedError
        if torch.cuda.device_count() > 1:
            eval_model.load_state_dict(model.module.state_dict())
        else:
            eval_model.load_state_dict(model.state_dict())
        eval_model.to(device)
        eval_model.eval()
        resFile = evaluate(eval_model, config.val_label, config.dataset_dir, device==torch.device("cuda"))
        if resFile is None:
            debugPrint("detect 0 boxes in the val set")
            return
        cocoEvaluate(tgtFile, resFile)
        return

    # learning rate setup
    # 自定义的学习率调整函数,先递增,然后阶梯性降低
    def burnin_schedule(i):
        # i表示iter,而不是epoch
        if i < config.burn_in:  # 按4次方递增阶段
            # factor表示乘在学习率上的倍数
            factor = pow(i / config.burn_in, 4)
        elif i < config.steps[0]:  # 第一阶段
            factor = 1.0
        elif i < config.steps[1]:  # 第二阶段
            factor = 0.1
        else:  # 第三阶段
            factor = 0.01
        return factor

    if config.TRAIN_OPTIMIZER.lower() == 'adam':  # 默认是adam
        optimizer = optim.Adam(
            model.parameters(),
            lr=config.learning_rate / config.batch,  # 学习率的实际值是设置值/batch_size
            betas=(0.9, 0.999),  # adam的特殊参数,一般用默认即可
            eps=1e-08,  # adam的特殊参数,一般用默认即可
        )
    elif config.TRAIN_OPTIMIZER.lower() == 'sgd':
        optimizer = optim.SGD(
            params=model.parameters(),
            lr=config.learning_rate / config.batch,
            momentum=config.momentum,
            weight_decay=config.decay,
        )

    # pytorch调整学习率的专用接口
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # 计算loss的对象,这个模块是在yolo网络后专门求解loss的(yolo主网络只负责接收图片,然后输出三路张量),这个模块不需要权重等参数
    criterion = Yolo_loss(device=device, batch=config.batch // config.subdivisions, n_classes=config.classes)

    save_prefix = 'Yolov4_epoch'
    saved_models = deque()
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_step = 0
        model.train()
        logging.info("===Train===")
        for i, batch in enumerate(train_loader):
            global_step += 1
            epoch_step += 1
            images = batch[0]
            bboxes = batch[1]

            images = images.to(device=device, dtype=torch.float32)
            bboxes = bboxes.to(device=device)

            bboxes_pred = model(images)
            loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = criterion(bboxes_pred, bboxes)
            loss.backward()

            epoch_loss += loss.item()

            if global_step % config.subdivisions == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()
            
            logging.info("Epoch:[{:3}/{}],step:[{:3}/{}],total loss:{:.2f}|lr:{:.5f}".format(epoch + 1, epochs, i + 1, len(train_loader), loss.item(), scheduler.get_last_lr()[0]))

            if global_step % (log_step * config.subdivisions) == 0:  # log_step默认为20,这里指的是迭代次数
                
                writer.add_scalar('train/Loss', loss.item(), global_step)
                writer.add_scalar('train/loss_xy', loss_xy.item(), global_step)
                writer.add_scalar('train/loss_wh', loss_wh.item(), global_step)
                writer.add_scalar('train/loss_obj', loss_obj.item(), global_step)
                writer.add_scalar('train/loss_cls', loss_cls.item(), global_step)
                writer.add_scalar('train/loss_l2', loss_l2.item(), global_step)
                writer.add_scalar('lr', scheduler.get_last_lr()[0] * config.batch, global_step)
                
                logging.debug('Train step_{}: loss : {},loss xy : {},loss wh : {},'
                            'loss obj : {},loss cls : {},loss l2 : {},lr : {}'
                            .format(global_step, loss.item(), loss_xy.item(),
                                    loss_wh.item(), loss_obj.item(),
                                    loss_cls.item(), loss_l2.item(),
                                    scheduler.get_last_lr()[0] * config.batch))
        if save_cp:  # True
            # 创建checkpoints文件夹
            if not os.path.exists(config.checkpoints):
                os.makedirs(config.checkpoints, exist_ok=True)  # exist_ok=True表示可以接受已经存在该文件夹,当exist_ok=False时文件夹存在会抛出错误
                logging.info('Created checkpoint directory')
            save_path = os.path.join(config.checkpoints, f'{save_prefix}{epoch + 1}.weights')                
            # 考虑torch.nn.DataParallel特殊情况
            if torch.cuda.device_count() > 1:
                model.module.save_weights(save_path)
            else:
                model.save_weights(save_path)                
            logging.info(f'Checkpoint {epoch + 1} saved !')
            # 只保留最新keep_checkpoint_max个checkpoint,自动删除较早的checkpoint
            saved_models.append(save_path)
            if len(saved_models) > config.keep_checkpoint_max > 0:
                model_to_remove = saved_models.popleft()
                try:
                    os.remove(model_to_remove)
                except:
                    logging.info(f'failed to remove {model_to_remove}')

        if config.evaluate_when_train:
            try:
                model.eval()
                resFile = evaluate(model, config.val_label, config.dataset_dir, device==torch.device("cuda"), config.width, config.height)
                if resFile is None:
                    continue
                stats = cocoEvaluate(tgtFile, resFile)

                logging.info("===Val===")
                logging.info("Epoch:[{:3}/{}],AP:{:.3f}|AP50:{:.3f}|AP75:{:.3f}|APs:{:.3f}|APm:{:.3f}|APl:{:.3f}".format(
                    epoch + 1, epochs, stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]))
                logging.info("Epoch:[{:3}/{}],AR1:{:.3f}|AR10:{:.3f}|AR100:{:.3f}|ARs:{:.3f}|ARm:{:.3f}|ARl:{:.3f}".format(
                    epoch + 1, epochs, stats[6], stats[7], stats[8], stats[9], stats[10], stats[11]))


                writer.add_scalar('train/AP', stats[0], global_step)
                writer.add_scalar('train/AP50', stats[1], global_step)
                writer.add_scalar('train/AP75', stats[2], global_step)
                writer.add_scalar('train/AP_small', stats[3], global_step)
                writer.add_scalar('train/AP_medium', stats[4], global_step)
                writer.add_scalar('train/AP_large', stats[5], global_step)
                writer.add_scalar('train/AR1', stats[6], global_step)
                writer.add_scalar('train/AR10', stats[7], global_step)
                writer.add_scalar('train/AR100', stats[8], global_step)
                writer.add_scalar('train/AR_small', stats[9], global_step)
                writer.add_scalar('train/AR_medium', stats[10], global_step)
                writer.add_scalar('train/AR_large', stats[11], global_step)
            except Exception as e:
                debugPrint("evaluate meets an exception, here is the exception info:")
                traceback.print_exc()
                debugPrint("ignore error in evaluate and continue training")

    writer.close()
Пример #11
0
    logging = init_logger(log_dir='log')
    cfg = get_args(**Cfg)
    assert cfg.batch_size >= cfg.subdivisions, 'Batch size should be >= subdivisions'
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')

    # Initialise model
    model = Darknet(cfg.cfgfile)
    start_epoch = cfg.start_epoch
    _, extension = os.path.splitext(cfg.pretrained)
    if extension == '.weights':
        model.load_weights(cfg.pretrained)
    elif extension == '.pth':
        ckpt = torch.load(cfg.pretrained)
        model.load_state_dict(ckpt['state_dict'])
        if 'epoch' in ckpt:
            start_epoch = ckpt['epoch']

    # if torch.cuda.device_count() > 1:
    #     model = torch.nn.DataParallel(model)
    model.to(device=device)

    try:
        train(model=model,
              config=cfg,
              epochs=cfg.num_epochs,
              device=device,
              start_epoch=start_epoch,
              batch_size=cfg.batch_size)
    except KeyboardInterrupt:
Пример #12
0
    return logging


if __name__ == "__main__":
    logging = init_logger(log_dir='log')
    cfg = get_args(**Cfg)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')

    model = Darknet(cfg.model_config, inference=True)
    model.print_network()

    checkpoint = torch.load(
        cfg.weights_file, map_location=torch.device('cuda'))
    model.load_state_dict(checkpoint['state_dict'])
    # model.load_weights(cfg.weights_file)

    model.eval()  # set model away from training

    # if torch.cuda.device_count() > 1:
    #    model = torch.nn.DataParallel(model)
    model.to(device=device)

    annotations_file_path = cfg.gt_annotations_path
    with open(annotations_file_path) as annotations_file:
        try:
            annotations = json.load(annotations_file)
        except:
            print("annotations file not a json")
            exit()
Пример #13
0
    elif len(sys.argv) == 7:
        n_classes = int(sys.argv[1])
        weightfile = sys.argv[2]
        imgfile = sys.argv[3]
        height = sys.argv[4]
        width = int(sys.argv[5])
        namesfile = int(sys.argv[6])
    else:
        print('Usage: ')
        print('  python models.py num_classes weightfile imgfile namefile')

    # model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)
    model = Darknet('../cfg/yolov4.cfg', inference=True)

    pretrained_dict = torch.load(weightfile, map_location=torch.device('cuda'))
    model.load_state_dict(pretrained_dict['state_dict'])
    # model.load_weights(weightfile)

    use_cuda = True
    if use_cuda:
        model.cuda()
    if os.path.isdir(imgfile):
        names = os.listdir(imgfile)
        abs_names = list(map(lambda x: os.path.join(imgfile, x), names))
    else:
        abs_names = [imgfile]

    for i, abs_name in tqdm(enumerate(abs_names)):
        img = cv2.imread(abs_name)

        # Inference input size is 416*416 does not mean training size is the same
Пример #14
0
def train(
    model,
    device,
    config,
    epochs=5,
    save_cp=True,
    log_step=20,
):
    # Get dataloaders
    train_dataset = Yolo_BEV_dataset(config, split="train")
    val_dataset = Yolo_BEV_dataset(config, split="val")

    train_loader = DataLoader(
        train_dataset,
        batch_size=config.batch // config.subdivisions,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True,
        collate_fn=collate,
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=config.batch // config.subdivisions,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True,
        collate_fn=collate,
    )

    # define summary writer
    writer = SummaryWriter(
        log_dir=config.TRAIN_TENSORBOARD_DIR,
        filename_suffix=
        f"OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}",
        comment=
        f"OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}",
    )

    # log
    n_train = len(train_dataset)
    n_val = len(val_dataset)
    global_step = 0
    logging.info(f"""Starting training:
        Epochs:          {config.epochs}
        Batch size:      {config.batch}
        Subdivisions:    {config.subdivisions}
        Learning rate:   {config.learning_rate}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Input height:    {config.height}
        Input width:     {config.width}
        Optimizer:       {config.TRAIN_OPTIMIZER}
        Dataset classes: {config.classes}
    """)

    # learning rate setup
    def burnin_schedule(i):
        if i < config.burn_in:
            factor = pow(i / config.burn_in, 4)
        elif i < config.steps[0]:
            factor = 1.0
        elif i < config.steps[1]:
            factor = 0.1
        else:
            factor = 0.01
        return factor

    # optimizer + scheduler
    if config.TRAIN_OPTIMIZER.lower() == "adam":
        optimizer = optim.Adam(
            model.parameters(),
            lr=config.learning_rate / config.batch,
            betas=(0.9, 0.999),
            eps=1e-08,
        )
    elif config.TRAIN_OPTIMIZER.lower() == "sgd":
        optimizer = optim.SGD(
            params=model.parameters(),
            lr=config.learning_rate / config.batch,
            momentum=config.momentum,
            weight_decay=config.decay,
        )

    # scheduler multiplies learning rate by a factor calculated on epoch
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # loss function
    criterion = Yolo_loss(
        cfg=config,
        device=device,
    )

    # start training
    save_prefix = "Yolov4_BEV_flat_epoch"
    saved_models = deque()
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_step = 0

        with tqdm(total=n_train,
                  desc=f"Epoch {epoch + 1}/{epochs}",
                  unit="img",
                  ncols=75) as pbar:
            for i, batch in enumerate(train_loader):
                # get batch
                global_step += 1
                epoch_step += 1
                images = batch[0].float().to(device=device)
                labels = batch[1]

                # compute loss
                preds = model(images)[0]
                loss, loss_xy, loss_wl, loss_rot, loss_obj, loss_noobj = criterion(
                    preds, labels)
                loss.backward()

                epoch_loss += loss.item()

                # update weights
                if global_step % config.subdivisions == 0:
                    optimizer.step()
                    scheduler.step()
                    model.zero_grad()

                # log
                if global_step % (log_step * config.subdivisions) == 0:
                    writer.add_scalar("train/Loss", loss.item(), global_step)
                    writer.add_scalar("train/loss_xy", loss_xy.item(),
                                      global_step)
                    writer.add_scalar("train/loss_wl", loss_wl.item(),
                                      global_step)
                    writer.add_scalar("train/loss_rot", loss_rot.item(),
                                      global_step)
                    writer.add_scalar("train/loss_obj", loss_obj.item(),
                                      global_step)
                    writer.add_scalar("train/loss_noobj", loss_noobj.item(),
                                      global_step)
                    writer.add_scalar("lr",
                                      scheduler.get_lr()[0] * config.batch,
                                      global_step)
                    pbar.set_postfix({
                        "loss (batch)":
                        loss.item(),
                        "loss_xy":
                        loss_xy.item(),
                        "loss_wl":
                        loss_wl.item(),
                        "loss_rot":
                        loss_rot.item(),
                        "loss_obj":
                        loss_obj.item(),
                        "loss_noobj":
                        loss_noobj.item(),
                        "lr":
                        scheduler.get_lr()[0] * config.batch,
                    })
                    logging.debug(
                        "Train step_{}: loss : {},loss xy : {},loss wl : {},"
                        "loss rot : {},loss obj : {},loss noobj : {},lr : {}".
                        format(
                            global_step,
                            loss.item(),
                            loss_xy.item(),
                            loss_wl.item(),
                            loss_rot.item(),
                            loss_obj.item(),
                            loss_noobj.item(),
                            scheduler.get_lr()[0] * config.batch,
                        ))

                pbar.update(images.shape[0])

            # evaluate models
            min_eval_loss = math.inf
            if epoch % 2 == 0:
                eval_model = Darknet(cfg.cfgfile,
                                     inference=True,
                                     model_type="BEV_flat")
                if torch.cuda.device_count() > 1:
                    eval_model.load_state_dict(model.module.state_dict())
                else:
                    eval_model.load_state_dict(model.state_dict())
                eval_model.to(device)
                eval_model.eval()

                eval_loss = 0.0
                eval_loss_xy = 0.0
                eval_loss_wl = 0.0
                eval_loss_rot = 0.0
                eval_loss_obj = 0.0
                eval_loss_noobj = 0.0
                with tqdm(total=n_val,
                          desc=f"Eval {(epoch + 1) // 2}",
                          unit="img",
                          ncols=75) as epbar:
                    for i, batch in enumerate(val_loader):
                        # get batch
                        global_step += 1
                        epoch_step += 1
                        images = batch[0].float().to(device=device)
                        labels = batch[1]

                        # compute loss
                        labels_pred = model(images)[0]
                        loss, loss_xy, loss_wl, loss_rot, loss_obj, loss_noobj = criterion(
                            labels_pred, labels)
                        eval_loss += loss.item()
                        eval_loss_xy += loss_xy.item()
                        eval_loss_wl += loss_wl.item()
                        eval_loss_rot += loss_rot.item()
                        eval_loss_rot += loss_obj.item()
                        eval_loss_noobj += loss_noobj.item()

                        epbar.update(images.shape[0])

                # log
                logging.debug(
                    "Val step_{}: loss : {},loss xy : {},loss wl : {},"
                    "loss rot : {},loss obj : {},loss noobj : {},lr : {}".
                    format(
                        global_step,
                        eval_loss.item(),
                        eval_loss_xy.item(),
                        eval_loss_wl.item(),
                        eval_loss_rot.item(),
                        eval_loss_obj.item(),
                        eval_loss_noobj.item(),
                        scheduler.get_lr()[0] * config.batch,
                    ))

                del eval_model

            # save checkpoint
            if save_cp and eval_loss < min_eval_loss:
                min_eval_loss = eval_loss
                try:
                    os.makedirs(config.checkpoints, exist_ok=True)
                    logging.info("Created checkpoint directory")
                except OSError:
                    pass
                save_path = os.path.join(config.checkpoints,
                                         f"{save_prefix}{epoch + 1}.pth")
                torch.save(model.state_dict(), save_path)
                logging.info(f"Checkpoint {epoch + 1} saved !")
                saved_models.append(save_path)
                if len(saved_models) > config.keep_checkpoint_max > 0:
                    model_to_remove = saved_models.popleft()
                    try:
                        os.remove(model_to_remove)
                    except:
                        logging.info(f"failed to remove {model_to_remove}")

    writer.close()
Пример #15
0
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.info(f"Using device {device}")
    cfg.device = device

    # load model and push to device
    model = Darknet(cfg.cfgfile, model_type="BEV_flat")
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
    model.to(device=device)

    # load weights
    if cfg.load is None:
        model.load_weights(cfg.backbone, cut_off=53)
    else:
        model.load_state_dict(cfg.load)

    # freeze backbone
    model.freeze_layers([i for i in range(54)])

    try:
        train(
            model=model,
            config=cfg,
            epochs=cfg.epochs,
            device=device,
        )
    except KeyboardInterrupt:
        torch.save(model.state_dict(), "checkpoints/INTERRUPTED.pth")
        logging.info("Saved interrupt to checkpoints/INTERRUPTED.pth")
        try: