def _predict(): """予測。""" logger = tk.log.get(__name__) X_test = _data.load_test_data() threshold = float((MODELS_DIR / 'threshold.txt').read_text()) logger.info(f'threshold = {threshold:.3f}') pred = predict_all('test', X_test) > threshold _data.save_submission(MODELS_DIR / 'submission.csv', pred)
def predict(): logger.info(f"source_models = {source_models}") test_set = _data.load_test_data() pred = np.average( [_load_pred(n) for n in model_names], weights=model_weights, axis=0, ) _data.save_prediction(models_dir, test_set, pred)
def _predict(): """予測。""" X_test = _data.load_test_data() pred = predict_all('test', X_test) _data.save_submission(MODELS_DIR / 'submission.csv', pred > 0.50) _data.save_submission(MODELS_DIR / 'submission_0.40.csv', pred > 0.40) _data.save_submission(MODELS_DIR / 'submission_0.45.csv', pred > 0.45) _data.save_submission(MODELS_DIR / 'submission_0.55.csv', pred > 0.55) _data.save_submission(MODELS_DIR / 'submission_0.60.csv', pred > 0.60)
def _train(args, fine=False): logger = tk.log.get(__name__) logger.info(f'args: {args}') split_seed = int(MODEL_NAME.encode('utf-8').hex(), 16) % 10000000 MODELS_DIR.mkdir(parents=True, exist_ok=True) (MODELS_DIR / 'split_seed.txt').write_text(str(split_seed)) X, y = _data.load_train_data() ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False) (X_train, y_train), (X_val, y_val) = (X[ti], y[ti]), (X[vi], y[vi]) logger.info(f'cv_index={args.cv_index}: train={len(y_train)} val={len(y_val)}') network, lr_multipliers = _create_network() gen = tk.generator.Generator() if fine: pseudo_size = len(y_train) // 2 X_train = np.array(list(X_train) + [None] * pseudo_size) y_train = np.array(list(y_train) + [None] * pseudo_size) X_test = _data.load_test_data() _, pi = tk.ml.cv_indices(X_test, np.zeros((len(X_test),)), cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False) #pred_test = predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT] # cross-pseudo-labeling import stack_res pred_test = stack_res.predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT] # cross-pseudo-labeling gen.add(tk.generator.RandomPickData(X_test[pi], pred_test[pi])) gen.add(tk.image.RandomFlipLR(probability=0.5, with_output=True)) gen.add(tk.image.Padding(probability=1, with_output=True)) gen.add(tk.image.RandomRotate(probability=0.25, with_output=True)) gen.add(tk.image.RandomCrop(probability=1, with_output=True)) gen.add(tk.image.RandomAugmentors([ tk.image.RandomBlur(probability=0.125), tk.image.RandomUnsharpMask(probability=0.125), tk.image.RandomBrightness(probability=0.25), tk.image.RandomContrast(probability=0.25), ], probability=0.125)) gen.add(tk.image.Resize((101, 101), with_output=True)) model = tk.dl.models.Model(network, gen, batch_size=BATCH_SIZE) if fine: model.load_weights(MODELS_DIR / f'model.fold{args.cv_index}.h5') model.compile(sgd_lr=0.001 / 128 if fine else 0.1 / 128, loss=tk.dl.losses.lovasz_hinge_elup1, metrics=[tk.dl.metrics.binary_accuracy], lr_multipliers=lr_multipliers, clipnorm=10.0) model.fit( X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS // 3 if fine else EPOCHS, cosine_annealing=True, mixup=False) model.save(MODELS_DIR / f'model.fold{args.cv_index}.h5', include_optimizer=False) if tk.dl.hvd.is_master(): _evaluation.log_evaluation(y_val, model.predict(X_val))
def predict_all(data_name, X, use_cache=False): """予測。""" cache_path = CACHE_DIR / data_name / f'{MODEL_NAME}.pkl' if use_cache and cache_path.is_file(): return joblib.load(cache_path) if data_name == 'val': X_list, vi_list = [], [] split_seed = int((MODELS_DIR / 'split_seed.txt').read_text()) for cv_index in range(CV_COUNT): _, vi = tk.ml.cv_indices(X, None, cv_count=CV_COUNT, cv_index=cv_index, split_seed=split_seed, stratify=False) X_list.append(X[vi]) vi_list.append(vi) else: X = _data.load_test_data() X_list = [X] * CV_COUNT gen = tk.generator.SimpleGenerator() model = tk.dl.models.Model.load(MODELS_DIR / f'model.fold0.h5', gen, batch_size=BATCH_SIZE, multi_gpu=True) pred_list = [] for cv_index in tk.tqdm(range(CV_COUNT), desc='predict'): if cv_index != 0: model.load_weights(MODELS_DIR / f'model.fold{cv_index}.h5') X_t = X_list[cv_index] pred = _evaluation.predict_tta(model, X_t, mode='bin') pred_list.append(pred) if data_name == 'val': pred = np.empty((len(X), 1), dtype=np.float32) for vi, p in zip(vi_list, pred_list): pred[vi] = p else: pred = pred_list cache_path.parent.mkdir(parents=True, exist_ok=True) joblib.dump(pred, cache_path, compress=3) return pred
def load_test_data(): dataset = _data.load_test_data() return dataset
def _predict(): """予測。""" X_test = _data.load_test_data() pred_list = predict_all('test', X_test) pred = np.mean(pred_list, axis=0) > 0.5 _data.save_submission(MODELS_DIR / 'submission.csv', pred)
def _predict(): """予測。""" X_test = _data.load_test_data() predict_all('test', X_test)
def predict(): test_set = _data.load_test_data() model = create_model().load(models_dir) pred_list = model.predict_all(test_set) pred = np.mean(pred_list, axis=0) _data.save_prediction(models_dir, test_set, pred)