def predict(model, root: Path, df: pd.DataFrame, out_path: Path, batch_size: int, tta_code: list, workers: int, use_cuda: bool): loader = DataLoader( dataset=TTADataset(root, df, tta_code=tta_code), shuffle=False, batch_size=batch_size, num_workers=workers, ) model.eval() all_outputs, all_ids = [], [] with torch.no_grad(): for inputs, ids in tqdm.tqdm(loader, desc='Predict'): if use_cuda: inputs = inputs.cuda() outputs = torch.sigmoid(model(inputs)) #_, outputs = outputs.topk(1, dim=1, largest=True, sorted=True) all_outputs.append(outputs.data.cpu().numpy()) all_ids.extend(ids) df = pd.DataFrame(data=np.concatenate(all_outputs), index=all_ids, columns=map(str, range(N_CLASSES))) df = mean_df(df) df.to_hdf(out_path, 'prob', index_label='id') print(f'Saved predictions to {out_path}')
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--threshold', type=float, default=0.1) args = parser.parse_args() dirs = [ r'/home/ubuntu/pytorch_code/2class_clf_pytorch/result/se50_talking_0.8/12tta', ] weight = [ 1 ] predicts = get_predicts(dirs,weight) dfs = [] sum_w = 0 use_pred = 0 for prediction,w in predicts: print('pred', prediction) df = pd.read_hdf(prediction, index_col='id') sum_w += w use_pred += 1 dfs.append(df) df = pd.concat(dfs) df_mean = mean_df(df) del df df_mean.index = pd.read_hdf(prediction, index_col='id').index df_mean = df_mean.reset_index() df_mean.columns = ['id','pred'] df_mean.to_csv('tta_result.csv', index = False)
def main(*args): parser = argparse.ArgumentParser() arg = parser.add_argument # Pdb().set_trace() arg('--predictions', nargs='+') arg('--output') arg('--threshold', type=float, default=0.2) if is_env_notebook(): args = parser.parse_args(args=args[0]) else: args = parser.parse_args() sample_submission = pd.read_csv(DATA_ROOT / 'sample_submission.csv', index_col='id_code') dfs = [] for prediction in args.predictions: #Pdb().set_trace() df = pd.read_hdf(prediction, index_col='id_code') df = df.reindex(sample_submission.index) dfs.append(df) df = pd.concat(dfs) df = mean_df(df) # Pdb().set_trace() # df[:] = binarize_prediction(df.values, threshold=args.threshold) df["diagnosis"] = df.values.argmax(axis=1) # df = df.apply(get_classes, axis=1) #df.name = 'diagnosis' df.loc[:, ["diagnosis"]].to_csv(args.output, header=True)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('predictions', nargs='+') arg('output') arg('--threshold', type=float, default=0.2) arg('--max-labels', type=int, default=10) args = parser.parse_args() sample_submission = pd.read_csv(DATA_ROOT / 'sample_submission.csv', index_col='id') dfs = [] for prediction in args.predictions: df = pd.read_hdf(prediction, index_col='id') df = df.reindex(sample_submission.index) dfs.append(df) df = pd.concat(dfs) df = mean_df(df) df[:] = binarize_prediction(df.values, threshold=args.threshold, max_labels=args.max_labels) df = df.apply(get_classes, axis=1) df.name = 'attribute_ids' df.to_csv(args.output, header=True)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--threshold', type=float, default=0.1) args = parser.parse_args() args.output = './all_tmp.csv' sample_submission = pd.read_csv(DATA_ROOT + '/' + 'sample_submission.csv', index_col='id') dirs = [ r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold0/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold1/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold2/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold3/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold4/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold5/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold6/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold7/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold8/112tta', r'/data1/shentao/competitions_py3/imet/result/se101_384_ratio_0.6_0.99_re_fold9/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold0/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold1/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold2/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold3/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold4/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold5/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold6/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold7/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold8/112tta', r'/data1/shentao/competitions_py3/imet/result/se50_384_ratio_0.6_0.99_re_fold9/112tta', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold0', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold1', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold2', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold3', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold4', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold5', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold6', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold7', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold8', r'/data1/shentao/competitions_py3/imet/result/venn/112tta_se101_skf_fold9', ] weight = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, ] predicts = get_predicts(dirs, weight) print(len(predicts)) dfs = [] sum_w = 0 for prediction, w in predicts: print('pred', prediction) df = pd.read_hdf(prediction, index_col='id') df = df.reindex(sample_submission.index) * w sum_w += w dfs.append(df) ratio = sum_w / len(predicts) print(ratio) df = pd.concat(dfs) df = mean_df(df) / ratio df[:] = binarize_prediction(df.values, threshold=args.threshold) df = df.apply(get_classes, axis=1) df.name = 'attribute_ids' df.to_csv(args.output, header=True)