Exemplo n.º 1
0
    def fuse(self, result_dir):
        train_json = self.load_json_file('./data/train_fusesize.json')

        train_df = pd.DataFrame({'label_id': [x['class'] for x in train_json]})
        results_file = glob(os.path.join(result_dir, '*.val.*.pkl'))
        results_file.sort()
        print('Total {} ensemble files.'.format(len(results_file)))
        new_dict = {}
        weight = [0.111111, 0.333333, 0.333333, 0.222222]
        weight = [0.2] * 5
        for i, res in tqdm(enumerate(results_file)):
            res_dict = load_dict(res)
            for k,v in res_dict.items():
                v = v['logit']
                if not k in new_dict.keys():
                    new_dict[k] = v / v.sum() * weight[i]
                else:
                    new_dict[k] += v / v.sum() * weight[i]
                    # tmp = np.sqrt(new_dict[k] * (v / v.sum()))
                    # new_dict[k] = tmp / tmp.sum()
        all_id_list = []
        all_predicted = []
        correct_count = 0
        prob_list = []
        label_list = []
        for k,v in tqdm(new_dict.items(), desc='Fusing result'):
            v /= v.sum()
            prob_list.append(v)
            top3_result = np.argsort(-v, axis=-1)[:3]
            all_predicted.append('{:d} {:d} {:d}'.format(top3_result[0], top3_result[1], top3_result[2]))
            all_id_list.append(k)
            label = res_dict[k]['label']
            label_list.append([label])
            a = -np.sort(-v)[:10] / 6
            correct_count += np.sum(top3_result == label)
        datafame = pd.DataFrame({'id': all_id_list, 'predicted': all_predicted})
        predict_csv = os.path.join(result_dir, 'ensemble.csv')
        # datafame.to_csv(predict_csv, index=False, sep=',')
        print('Top3 error rate: {:.6f}'.format(1 - correct_count / float(len(all_id_list))))
        probs = np.stack(prob_list)
        label = np.array(label_list)

        # probability calibration
        # calibrated_prob = self.calibrate_all(probs, train_df)
        calibrated_prob = self.calibrate_probs(probs, train_df)
        topk_predict_calib = np.argsort(-calibrated_prob, axis=-1)[:, 0:3]
        err_calib = 1. - np.sum((topk_predict_calib == label), axis=-1).mean()
        print('Top3 error rate (calibration): {:.6f}'.format(err_calib))

        if False:
            calibrated_prob_sp = self.calibrate_superlabel(train_json, calibrated_prob)
            topk_predict_calib_sp = np.argsort(-calibrated_prob_sp, axis=-1)[:, 0:3]
            err_calib_sp = 1. - np.sum((topk_predict_calib_sp == label), axis=-1).mean()
            print('Top3 error rate (calibration hirechical label): {:.6f}'.format(err_calib_sp))

        all_predicted_calib = []
        for item in topk_predict_calib:
            all_predicted_calib.append('{:d} {:d} {:d}'.format(item[0], item[1], item[2]))
        datafame_calib = pd.DataFrame({'id': all_id_list, 'predicted': all_predicted_calib})
        predict_csv_calib = os.path.join(result_dir, 'calib_ensemble_4.csv')
Exemplo n.º 2
0
def cover_dict():
    src = './log/ensemble/ep11.logit.test.pkl'
    dst = './log/ensemble/ep11.test.ens100.logit.pkl'
    cur_dict= load_dict(src)
    new_dict = {}
    for k,v in tqdm(cur_dict.items()):
        new_dict[k] = {'logit': v, 'label': -1}
    save_dict(new_dict, dst)
Exemplo n.º 3
0
 def load_feat_single(self, filename):
     dict_feat_list = []
     filename_list = glob(filename)
     filename_list.sort()
     for fn in filename_list:
         print('Loading feature from {:s}'.format(fn))
         dict_feat = load_dict(fn)
         dict_feat_list.append(dict_feat)
     return dict_feat_list
Exemplo n.º 4
0
    def knn(self, result_dir):
        train_json = self.load_json_file('./data/train_fusesize.json')

        train_df = pd.DataFrame({'label_id': [x['class'] for x in train_json]})

        results_file_train = os.path.join(result_dir, 'train.logit.pkl')
        print('Loading training file...')
        dict_train = load_dict(results_file_train)
        train_data = []
        train_label = []
        for k, v in dict_train.items():
            train_data.append(v['logit'] / v['logit'].sum())
            train_label.append(v['label'])
        train_data = np.stack(train_data, axis=0)
        train_label = np.array(train_label)

        results_file_val = os.path.join(result_dir, 'val.logit.pkl')
        print('Loading validation file...')
        dict_val = load_dict(results_file_val)
        val_data = []
        val_label = []
        for k, v in dict_val.items():
            val_data.append(v['logit'] / v['logit'].sum())
            val_label.append(v['label'])
        val_data = np.stack(val_data, axis=0)
        val_label = np.array(val_label)

        print('Building KD tree...')
        kdt = KDTree(train_data, leaf_size=30, metric='euclidean')
        print('Predicting...')
        idx = kdt.query(val_data, k=500, return_distance=False)
        pred_tmp = train_label[idx]
        count = np.stack([np.bincount(x, minlength=2019) for x in pred_tmp],
                         axis=0)
        pred = np.argsort(-count, axis=-1)[:, 0:3]
        err = 1. - np.sum(val_label[:, np.newaxis] == pred, axis=-1).mean()
        print('Error: {:f}'.format(err))
        pass
Exemplo n.º 5
0
 def calibrate_superlabel(self, train_json, probs):
     splabel = load_dict('./data/fglabel2splabel.pkl')
     sp_lab2idx_dict = {}
     dict_fg2sp = {}
     count = 0
     for k, v in splabel.items():
         tmp_str = ''.join([str(x) for x in v])
         splabel[k] = tmp_str
         if tmp_str not in sp_lab2idx_dict.keys():
             sp_lab2idx_dict[tmp_str] = count
             count += 1
     for k, v in splabel.items():
         tmp_str = ''.join([str(x) for x in v])
         dict_fg2sp[k] = sp_lab2idx_dict[tmp_str]
     topk_predict = np.argsort(-probs, axis=-1)[:, 0:3]
     fgidx = np.array([dict_fg2sp[x] for x in range(self.num_class)])
     new_probs = np.zeros_like(probs)
     for i in tqdm(range(fgidx[-1])):
         mask = fgidx == i
         mask_float = mask / np.sum(mask)
         mask_float = mask_float[np.newaxis, :]
         tmpp = np.matmul(probs, mask_float.transpose())
         new_probs += (probs + tmpp) * mask
     return new_probs
Exemplo n.º 6
0
    def __init__(self, phase, is_transform=True, DEBUG=False):
        self.img_size = 'fusesize'
        self.img_root = '/data1/zhaoj/data/challenge/iMaterialist/image_' + self.img_size
        self.img_list_root = './data'
        self.phase = phase
        self.fglabel2splabel_dict = load_dict('./data/fglabel2splabel.pkl')

        reimgsize = 256
        self.transform_dict = {
            'resnet': {
                'train':
                transforms.Compose([
                    transforms.Resize(reimgsize),
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
                ]),
                'test':
                transforms.Compose([
                    transforms.Resize(reimgsize),
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
                ])
            },
            # 'test': transforms.Compose([transforms.Resize(reimgsize), transforms.CenterCrop(224), transforms.ToTensor(),
            #                             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])},
            'senet': {
                'train':
                transforms.Compose([
                    transforms.Resize(256),
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
                ]),
                'test':
                transforms.Compose([
                    transforms.Resize(256),
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
                ])
            },
            'inceptionresnet': {
                'train':
                transforms.Compose([
                    transforms.Resize(342),
                    transforms.RandomResizedCrop(299),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
                ]),
                'test':
                transforms.Compose([
                    transforms.Resize(342),
                    transforms.RandomResizedCrop(299),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
                ])
            },
            'nasnet': {
                'train':
                transforms.Compose([
                    transforms.Resize(378),
                    transforms.RandomResizedCrop(331),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
                ]),
                'test':
                transforms.Compose([
                    transforms.Resize(378),
                    transforms.RandomResizedCrop(331),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
                ])
            }
        }

        if phase == 'train' and is_transform:
            self.transform = self.transform_dict['inceptionresnet']['train']
            # self.transform = transforms.Compose([transforms.Resize(512), transforms.RandomResizedCrop(448), transforms.RandomHorizontalFlip(), transforms.ToTensor(),
            #                                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        else:
            self.transform = self.transform_dict['inceptionresnet']['test']
            # self.transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
            #                                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
            # self.transform = transforms.Compose([transforms.Resize(512), transforms.RandomCrop(448), transforms.ToTensor(),
            #                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

        self.img_list = self.load_json_file()
        if DEBUG:
            self.img_list = self.img_list[:301]