def dump_csv(predictor, true_output):
    results = []
    n_predictors = len(predictor.index2combine_name)
    path_json_dumps = [
        '%s/%s/result_%s_%s.json' %
        (args.root_path, predictor.index2combine_name[i],
         predictor.index2policy[i], args.flag) for i in range(n_predictors)
    ]
    print('Start eval predictor...')
    predictions = predictor.fusion_prediction(top=1, return_with_prob=True)

    if len(results) == 0:
        for i in range(len(predictions)):
            results.append([])
    for index, prediction in enumerate(predictions):
        results[index] = prediction
    assert len(results) == len(
        path_json_dumps
    ), 'The result length is not equal with path_json_dumps\'s.'

    for result, save_path in zip(results, path_json_dumps):
        dir = os.path.dirname(save_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        if args.flag == 'valid':
            class_2_index = {0: 'normal', 1: 'phone', 2: 'smoke'}
            dets_info = {}
            pred_idx = []
            with open("./txts/v-info-new.json", 'r', encoding="utf-8") as f:
                shape_dict = json.load(f)
            for i, res in enumerate(result):
                name = dataset.filenames()[i].split('/')[-1].split('.')[0]
                dets_info[name] = [
                    class_2_index[res[0][0]],
                    float(res[0][1]), shape_dict[name][1], shape_dict[name][2]
                ]
                pred_idx.append(res[0][0])
            with open(save_path, "w", encoding="utf-8") as f:
                json.dump(dets_info, f)

            acc = accuracy_score(y_true=true_output, y_pred=pred_idx)
            test_map = eval_map(detFolder=save_path,
                                gtFolder="txts/v-info-new.json")
            print("acc score: %.4f, map score: %.4f" % (acc, test_map))
        else:
            class_2_index = {0: 'normal', 1: 'calling', 2: 'smoking'}
            result_list = []
            with open(save_path, 'w', encoding="utf-8") as out_file:
                filenames = dataset.filenames()
                for i, res in enumerate(result):
                    filename = filenames[i].split('/')[-1].strip()
                    name = class_2_index[res[0][0]]
                    result_data = {
                        "image_name": str(filename),
                        "category": name,
                        "score": float(res[0][1])
                    }
                    result_list.append(result_data)
                json.dump(result_list, out_file)
        print('Dump %s finished.' % save_path)
def classifier_pred(classifier, shape_path, feature, id, model_name, data_type="valid"):

    class_2_index = {0: 'normal', 1: 'phone', 2: 'smoke'}
    dets_info = {}

    features = pickle.load(open(feature, 'rb'))
    ids = pickle.load(open(id, 'rb'))
    predict = classifier.predict(features)

    # predicted_test_scores = classifier.decision_function(features)
    # probs = softmax(predicted_test_scores)
    # prob_list = [prob[int(predict[i])] for i, prob in enumerate(probs)]

    probs = classifier.predict_proba(features)
    prob_list = [round(prob[int(predict[i])], 4) for i, prob in enumerate(probs)]

    prediction = predict.tolist()
    total_pred_idx = [int(pred) for pred in prediction]
    total_true_idx = [int(label) for label in ids]

    with open("./txts/%s.json" % shape_path, 'r', encoding="utf-8") as f:
        shape_dict = json.load(f)

    dataset = Dataset(os.path.join(BASE, data_type))
    filenames = dataset.filenames()

    for i, filename in enumerate(filenames):
        name = filename.split('/')[-1].split('.')[0]
        dets_info[name] = [class_2_index[int(prediction[i])], prob_list[i], shape_dict[name][1], shape_dict[name][2]]

    with open("%s/%s.json" % (feature_path, shape_path.split('-')[0]), "w", encoding="utf-8") as f:
        json.dump(dets_info, f)
    accuracy = round(accuracy_score(total_true_idx, total_pred_idx), 4)

    test_map, ap_list = eval_map(detFolder="%s/v.json" % feature_path, gtFolder="txts/v-info-new.json", return_each_ap=True)
    print("Accuracy: %s, map: %.4f" % (accuracy, test_map))

    with open("weights/%s-valid.json" % model_name, 'w', encoding="utf-8") as f:
        prob_dict = {}
        prob_dict["prob"] = probs
        prob_dict["model_weight"] = test_map
        prob_dict["label_weight"] = ap_list

        json.dump(prob_dict, f, cls=MyEncoder)

    return accuracy, round(test_map, 4)
示例#3
0
def validate(args):
    # might as well try to validate something
    args.prefetcher = not args.no_prefetcher

    # create model
    model = create_model(
        args.model,
        num_classes=args.num_classes,
        in_chans=3,
        global_pool=args.gp)

    if args.checkpoint:
        load_checkpoint(model, args.checkpoint)

    param_count = sum([m.numel() for m in model.parameters()])
    _logger.info('Model %s created, param count: %d' % (args.model, param_count))

    data_config = resolve_data_config(vars(args), model=model)
    # model, test_time_pool = apply_test_time_pool(model, data_config, args)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    dataset = Dataset(args.data)

    crop_pct = data_config['crop_pct']
    loader = create_loader(
        dataset,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        crop_pct=crop_pct)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    f1_m = AverageMeter()

    end = time.time()
    total_pred_idx = []
    total_truth_idx = []
    mistake_image = []
    mistake_image_dict = {'calling': [], 'normal': [], 'smoking': [], 'smoking_calling': []}
    # class_2_index = {0: 'normal', 1: 'phone', 2: 'smoke'}
    class_2_index = {0: 'calling', 1: 'normal', 2: 'smoking', 3: 'smoking_calling'}
    with open("./txts/%s.json" % json_name, 'r', encoding="utf-8") as f:
        shape_dict = json.load(f)
    dets_info = {}

    model.eval()
    with torch.no_grad():
        # warmup, reduce variability of first batch time, especially for comparing torchscript vs non
        input = torch.randn((args.batch_size,) + data_config['input_size'])
        if torch.cuda.is_available():
            input = input.cuda()

        model(input)
        end = time.time()
        for batch_idx, (input, target) in enumerate(loader):
            if args.no_prefetcher and torch.cuda.is_available():
                target = target.cuda()
                input = input.cuda()

            # compute output
            # t0 = time.time()
            output = model(input)
            # print("time0: %.8f s" % ((time.time() - t0)))
            # t1 = time.time()
            # out = output.detach().cpu()
            # print("time1: %.8f s" % ((time.time() - t1) / 64))
            # print("time2: %.8f s" % ((time.time() - t0) / 64))
            # t2 = time.time()
            # out = out.cuda().cpu()
            # print("time3: %.8f s" % ((time.time() - t2) / 64))
            # get prediction index and ground turth index
            prob = torch.max(F.softmax(output, -1), -1)[0]
            idx = torch.max(F.softmax(output, -1), -1)[1]

            target_idx = target.cpu().numpy()
            predict_idx = idx.cpu().numpy()

            for j in range(len(target_idx)):
                total_truth_idx.append(target_idx[j])
                total_pred_idx.append(predict_idx[j])

                class_dict = loader.dataset.class_to_idx

                target_class = list(class_dict.keys())[list(class_dict.values()).index(int(target_idx[j]))]
                pred_class = list(class_dict.keys())[list(class_dict.values()).index(int(predict_idx[j]))]

                filename = loader.dataset.filenames()[batch_idx * args.batch_size + j]
                name = filename.split('/')[-1].split('.')[0]

                dets_info[name] = [pred_class, float(prob[j]), shape_dict[name][1], shape_dict[name][2]]

                if target_idx[j] != predict_idx[j]:
                    mistake_image.append(
                        [loader.dataset.filenames()[batch_idx * args.batch_size + j], target_class, pred_class,
                         np.round(prob[j].cpu().numpy(), 4)])

                    mistake_image_dict[class_2_index[predict_idx[j]]].append(
                        loader.dataset.filenames()[batch_idx * args.batch_size + j])

            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 3))

            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_freq == 0:
                _logger.info(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Acc@1: {top1.val:>7.2f} ({top1.avg:>7.2f})  '
                    'Acc@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format(
                        batch_idx, len(loader), batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses, top1=top1, top5=top5))

    with open("%s/%s.json" % (output_path, json_name.split('-')[0]), "w", encoding="utf-8") as f:
        json.dump(dets_info, f)

    top1a, top5a = top1.avg, top5.avg
    results = OrderedDict(
        top1=round(top1a, 4), top1_err=round(100 - top1a, 4),
        top5=round(top5a, 4), top5_err=round(100 - top5a, 4),
        param_count=round(param_count / 1e6, 2),
        img_size=data_config['input_size'][-1],
        cropt_pct=crop_pct,
        interpolation=data_config['interpolation'],
        mistake_image_dict=mistake_image_dict,
        pred_idx=total_pred_idx, truth_idx=total_truth_idx)

    _logger.info(' * Acc@1 {:.2f} ({:.2f}) Acc@5 {:.2f} ({:.2f})'.format(
       results['top1'], results['top1_err'], results['top5'], results['top5_err']))

    map, each_ap = eval_map(detFolder="%s/%s.json" % (output_path, json_name.split('-')[0]),
                   gtFolder="txts/%s.json" % json_name, return_each_ap=True)
    _logger.info('Valid mAP: {}, each ap: {}'.format(round(map, 4), each_ap))

    return results
示例#4
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    args.prefetcher = not args.no_prefetcher
    torch.manual_seed(args.seed)

    model = create_model(args.model,
                         pretrained=True,
                         num_classes=args.num_classes,
                         drop_rate=args.drop,
                         drop_path_rate=args.drop_path,
                         drop_block_rate=args.drop_block,
                         checkpoint_path=args.initial_checkpoint)

    if args.local_rank == 0:
        _logger.info('Model %s created, param count: %d' %
                     (args.model, sum([m.numel()
                                       for m in model.parameters()])))

    data_config = resolve_data_config(vars(args),
                                      model=model,
                                      verbose=args.local_rank == 0)

    if args.num_gpu > 1:
        model = nn.DataParallel(model,
                                device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    optimizer = create_optimizer(args, model)

    loss_scaler = None
    # optionally resume from a checkpoint
    resume_epoch = None
    if args.resume:
        resume_epoch = resume_checkpoint(
            model,
            args.resume,
            optimizer=None if args.no_resume_opt else optimizer,
            loss_scaler=None if args.no_resume_opt else loss_scaler,
            log_info=args.local_rank == 0)

    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        _logger.info('Scheduled epochs: {}'.format(num_epochs))

    train_dir = os.path.join(args.data, 'train')
    if not os.path.exists(train_dir):
        _logger.error(
            'Training folder does not exist at: {}'.format(train_dir))
        exit(1)
    dataset_train = Dataset(train_dir)

    collate_fn = None
    mixup_fn = None
    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
    if mixup_active:
        mixup_args = dict(mixup_alpha=args.mixup,
                          cutmix_alpha=args.cutmix,
                          cutmix_minmax=args.cutmix_minmax,
                          prob=args.mixup_prob,
                          switch_prob=args.mixup_switch_prob,
                          elementwise=args.mixup_elem,
                          label_smoothing=args.smoothing,
                          num_classes=args.num_classes)
        if args.prefetcher:
            collate_fn = FastCollateMixup(**mixup_args)
        else:
            mixup_fn = Mixup(**mixup_args)

    loader_train = create_loader(
        dataset_train,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        color_jitter=args.color_jitter,
        auto_augment=args.aa,
        num_workers=args.workers,
        collate_fn=collate_fn,
    )

    eval_dir = os.path.join(args.data, 'valid')
    if not os.path.isdir(eval_dir):
        eval_dir = os.path.join(args.data, 'validation')
        if not os.path.isdir(eval_dir):
            _logger.error(
                'Validation folder does not exist at: {}'.format(eval_dir))
            exit(1)
    dataset_eval = Dataset(eval_dir)

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        num_workers=args.workers,
        crop_pct=data_config['crop_pct'],
    )

    if mixup_active:
        # smoothing is handled with mixup target transform
        train_loss_fn = SoftTargetCrossEntropy().cuda()
    elif args.smoothing:
        train_loss_fn = LabelSmoothingCrossEntropy(
            smoothing=args.smoothing).cuda()
        train_loss_ce = nn.CrossEntropyLoss().cuda()
    else:
        train_loss_fn = nn.CrossEntropyLoss().cuda()
    validate_loss_fn = nn.CrossEntropyLoss().cuda()

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    plateau_num = 0
    if args.local_rank == 0:
        output_base = args.output if args.output else './output'
        exp_name = '-'.join([
            datetime.now().strftime("%Y%m%d-%H%M%S"), args.model,
            str(data_config['input_size'][-1])
        ])
        output_dir = get_outdir(output_base, exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(model=model,
                                optimizer=optimizer,
                                args=args,
                                amp_scaler=loss_scaler,
                                checkpoint_dir=output_dir,
                                recovery_dir=output_dir,
                                decreasing=decreasing,
                                max_history=2)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

        with open("./txts/%s.json" % json_name, 'r', encoding="utf-8") as f:
            shape_dict = json.load(f)
    try:
        for epoch in range(start_epoch, num_epochs):

            train_metrics = train_epoch(epoch,
                                        model,
                                        loader_train,
                                        optimizer,
                                        [train_loss_fn, train_loss_ce],
                                        args,
                                        lr_scheduler=lr_scheduler,
                                        output_dir=output_dir,
                                        mixup_fn=mixup_fn)

            eval_metrics, dets_info = validate(model,
                                               loader_eval,
                                               validate_loss_fn,
                                               args,
                                               shape_dict=shape_dict)

            with open("%s/v.json" % output_dir, "w", encoding="utf-8") as f:
                json.dump(dets_info, f)
            map = round(
                eval_map(detFolder="%s/v.json" % output_dir,
                         gtFolder="txts/%s.json" % json_name), 4)
            eval_metrics["map"] = map
            _logger.info('Valid mAP: {}'.format(map))

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            update_summary(epoch,
                           train_metrics,
                           eval_metrics,
                           os.path.join(output_dir, 'summary.csv'),
                           write_header=best_metric is None)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                saver.save_prefix = "%.2f-%s" % (eval_metrics["top1"], map)
                best_metric, best_epoch = saver.save_checkpoint(
                    epoch, metric=save_metric)

            if eval_metrics[eval_metric] >= best_metric:
                plateau_num = 0
            else:
                plateau_num += 1

            # 超过30个epoch还没有更新metric,停止运行
            if plateau_num == 30:
                break

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        _logger.info('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))
        best_map = 0
        for i in range(150):
            save_path = feature_path + '%s.m' % classifier
            fier = classifier_training(train_features, classifier, train_label_path)

            acc, test_map = classifier_pred(fier, "v-info-new", valid_features, valid_label_path, model_name)
            # test_map = eval_map(detFolder="%s/v.json" % feature_path, gtFolder="txts/v-info-new.json")
            # print(classifier, test_map)
            test_map = round(test_map, 4)
            if test_map > best_map:
                if i != 0:
                    last_path = save_path.replace(classifier, classifier + "-%s" % best_map)
                    os.remove(last_path)
                best_map = test_map
                save_path = save_path.replace(classifier, classifier + "-%s" % test_map)
                joblib.dump(fier, save_path)
    elif stage == "valid":
        save_path = glob.glob('%s%s*.m' % (feature_path, classifier))[0]
        fier = joblib.load(save_path)

        classifier_pred(fier, "v-info-new", valid_feature_path, valid_label_path, save_name)

    elif stage == "eval":
        valid_model(model_name, checkpoints)
        test_map = eval_map(detFolder="%s/v.json" % feature_path, gtFolder="txts/v-info-new.json")
        print(test_map)
        # save_feature_batch(model_name, checkpoints, train_feature_path, train_label_path, "valid")
    else:
        save_path = glob.glob('%s%s*.m' % (feature_path, classifier))[0]
        classifier_test(save_path, test_feature_path, test_imgs)
def perform_predict(predictor,
                    loader,
                    model_weight,
                    label_weight,
                    weights,
                    save_weights=True):
    temp_weight = {}
    total_true_output = []
    total_pred_output = []
    total_pred_idx = []
    total_true_idx = []
    right_count = 0
    n_labels = np.zeros((3, )) + 1e-5
    n_right_labels = np.zeros((3, ))
    with torch.no_grad():
        for i, (input, target) in enumerate(tqdm(loader)):
            test_pred_tta = []
            for j in range(8):
                output = predictor(input.cuda())
                output = output.data.cpu().numpy()
                test_pred_tta.append(output)
            output_data = softmax(np.mean(test_pred_tta, axis=0))

            # output = predictor(input.cuda())
            # output_data = test_pred_tta.cpu().numpy()
            if save_weights:
                predict_idx = np.argmax(output, axis=-1)
                target_idx = target.cpu().numpy()

                total_pred_idx.extend(predict_idx)
                total_true_idx.extend(target_idx)
                for j in range(len(target_idx)):
                    # 统计预测中预测对的数量,相当于precision
                    n_labels[predict_idx[j]] += 1
                    # 统计真实中预测对的数量,相当于recall
                    # n_labels[target_idx[j]] += 1

                    if predict_idx[j] == target_idx[j]:
                        right_count += 1
                        n_right_labels[predict_idx[j]] += 1
                    total_true_output.append(target_idx[j])
                    total_pred_output.append(output_data[j])
            else:
                total_pred_output.extend(output_data)

    model_name = model.default_cfg['model_name'].split('-')[1]
    if save_weights:
        # model_weight[predictor.default_cfg['model_name']] = np.array([float(right_count) / len(total_true_output)])
        # label_weight[predictor.default_cfg['model_name']] = n_right_labels / n_labels
        #
        # temp_weight['model_weight'] = float(right_count) / len(total_true_output)
        # temp_weight['label_weight'] = list(n_right_labels / n_labels)
        # weights[predictor.default_cfg['model_name']] = temp_weight
        #
        # with open('./weights/fusion_weights_tta.json', 'w') as json_file:
        #     json.dump(weights, json_file, indent=4)

        with open("./txts/v-info-new.json", 'r', encoding="utf-8") as f:
            shape_dict = json.load(f)

        filenames = dataset.filenames()
        dets_info = {}
        class_2_index = {0: 'normal', 1: 'phone', 2: 'smoke'}

        probs = np.max(softmax(np.array(total_pred_output)), axis=-1)
        for i, filename in enumerate(filenames):
            name = filename.split('/')[-1].split('.')[0]
            dets_info[name] = [
                class_2_index[int(total_pred_idx[i])], probs[i],
                shape_dict[name][1], shape_dict[name][2]
            ]

        with open("fusions/fv.json", "w", encoding="utf-8") as f:
            json.dump(dets_info, f, cls=MyEncoder)
        accuracy = round(accuracy_score(total_true_idx, total_pred_idx), 4)

        test_map, ap_list = eval_map(detFolder="fusions/fv.json",
                                     gtFolder="txts/v-info-new.json",
                                     return_each_ap=True)
        print("Accuracy: %s, map: %.4f" % (accuracy, test_map))

        with open("weights/fusion_weights_map.json", 'w',
                  encoding="utf-8") as f:
            weights[predictor.default_cfg['model_name']] = {}
            weights[
                predictor.default_cfg['model_name']]["model_weight"] = test_map
            weights[
                predictor.default_cfg['model_name']]["label_weight"] = ap_list

            json.dump(weights, f, cls=MyEncoder, indent=2)

        model_weight[predictor.default_cfg['model_name']] = test_map
        label_weight[predictor.default_cfg['model_name']] = ap_list
    else:
        with open('./weights/fusion_weights_tta.json', 'r') as json_file:
            json_data = json.load(json_file)
        model_weight[predictor.default_cfg['model_name']] = np.array(
            [json_data[predictor.default_cfg['model_name']]['model_weight']])
        label_weight[predictor.default_cfg['model_name']] = np.array(
            [json_data[predictor.default_cfg['model_name']]['label_weight']])

    return total_pred_output, total_true_output