def predict_on_test(model_name, fold, data_model_name=None, data_fold=None):
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    if data_model_name is None:
        data_model_name = model_name

    if data_fold is None:
        data_fold = fold

    with utils.timeit_context('load data'):
        X = load_test_data('../output/prediction_test_frames/',
                           data_model_name, data_fold)
        print(X.shape)

    model = model_nn(input_size=X.shape[1])
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.load_weights(f"../output/nn1_{model_name}_{fold}_full.pkl")

    with utils.timeit_context('predict'):
        prediction = model.predict(X)

    for col, cls in enumerate(classes):
        ds[cls] = np.clip(prediction[:, col], 0.001, 0.999)
    os.makedirs('../submissions', exist_ok=True)
    ds.to_csv(
        f'../submissions/submission_one_model_nn_{model_name}_{data_fold}.csv',
        index=False,
        float_format='%.7f')
def predict_on_test(model_name, fold, use_cache=False):
    model = pickle.load(
        open(f"../output/xgb_{model_name}_{fold}_full.pkl", "rb"))
    print(model)
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]
    print(classes)

    data_dir = f'../output/prediction_test_frames'
    with utils.timeit_context('load data'):
        cache_fn = f'../output/prediction_test_frames/{model_name}_{fold}_cache.npy'
        if use_cache:
            X = np.load(cache_fn)
        else:
            X = load_test_data(data_dir, model_name, fold)
            np.save(cache_fn, X)
        print(X.shape)
    with utils.timeit_context('predict'):
        prediction = model.predict_proba(X)

    if prediction.shape[1] == 23:
        prediction = np.insert(prediction, obj=12, values=0.0, axis=1)

    for col, cls in enumerate(classes):
        ds[cls] = np.clip(prediction[:, col], 0.001, 0.999)
    os.makedirs('../submissions', exist_ok=True)
    ds.to_csv(f'../submissions/submission_one_model_{model_name}_{fold}.csv',
              index=False,
              float_format='%.7f')
def train_model_lgb_combined_folds(combined_model_name, model_with_folds):
    X_combined = []
    y_combined = []

    for model_name, fold in model_with_folds:
        with utils.timeit_context('load data'):
            X, y, video_ids = load_train_data(model_name, fold)
            X_combined.append(X)
            y_combined.append(y)

    X = np.row_stack(X_combined)
    y = np.row_stack(y_combined)

    y_cat = np.argmax(y, axis=1)
    print(X.shape, y.shape)
    print(np.unique(y_cat))

    with utils.timeit_context('fit 200 est'):
        param = {
            'num_leaves': 50,
            'objective': 'multiclass',
            'max_depth': 5,
            'learning_rate': .05,
            'max_bin': 200,
            'num_class': NB_CAT,
            'metric': ['multi_logloss']
        }
        model = lgb.train(param,
                          lgb.Dataset(X, label=y_cat),
                          num_boost_round=200)

    pickle.dump(
        model,
        open(f"../output/lgb_combined_folds_{combined_model_name}.pkl", "wb"))
def predict_unused_clips(data_model_name, data_fold, combined_model_name):
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    data_dir = f'../output/prediction_unused_frames/'
    video_ids = [fn[:-4] for fn in os.listdir(data_dir) if fn.endswith('.csv')]

    with utils.timeit_context('load data'):
        X = load_test_data_uncached(data_dir, data_model_name, data_fold,
                                    video_ids)

    model = model_nn(input_size=X.shape[1])
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.load_weights(f"../output/nn1_{combined_model_name}_0_full.pkl")

    with utils.timeit_context('predict'):
        prediction = model.predict(X)

    ds = pd.DataFrame(data={'filename': video_ids})

    for col, cls in enumerate(classes):
        ds[cls] = prediction[:,
                             col]  # np.clip(prediction[:, col], 0.001, 0.999)
    # os.makedirs('../submissions', exist_ok=True)
    ds.to_csv(
        f'../output/prediction_unused_frames/{data_model_name}_{data_fold}.csv',
        index=False,
        float_format='%.7f')
def train_model_xgboost_combined_folds(combined_model_name, model_with_folds):
    X_combined = []
    y_combined = []

    for model_name, fold in model_with_folds:
        with utils.timeit_context('load data'):
            X, y, video_ids = load_train_data(model_name, fold)
            X_combined.append(X)
            y_combined.append(y)

    X = np.row_stack(X_combined)
    y = np.row_stack(y_combined)

    y_cat = np.argmax(y, axis=1)
    print(X.shape, y.shape)
    print(np.unique(y_cat))

    model = XGBClassifier(n_estimators=500,
                          objective='multi:softprob',
                          learning_rate=0.1,
                          silent=True)
    with utils.timeit_context('fit 500 est'):
        model.fit(X, y_cat)
    pickle.dump(
        model,
        open(f"../output/xgb_combined_folds_{combined_model_name}.pkl", "wb"))
示例#6
0
def uncompress_zar(fn_src, fn_dst):
    print(fn_src)
    print(fn_dst)
    print(zarr.storage.default_compressor)
    zarr.storage.default_compressor = None
    ds = ChunkedDataset(fn_src).open(cached=False)

    dst_dataset = ChunkedDataset(fn_dst)
    dst_dataset.initialize()
    #     'w',
    #     # num_scenes=len(ds.scenes),
    #     # num_frames=len(ds.frames),
    #     # num_agents=len(ds.agents),
    #     # num_tl_faces=len(ds.tl_faces)
    # )

    with utils.timeit_context("copy scenes"):
        dst_dataset.scenes.append(ds.scenes[:])
    with utils.timeit_context("copy frames"):
        dst_dataset.frames.append(ds.frames[:])
    with utils.timeit_context("copy agents"):
        for i in tqdm(range(0, len(ds.agents), 1024 * 1024)):
            dst_dataset.agents.append(ds.agents[i:i + 1024 * 1024])
    with utils.timeit_context("copy tl_faces"):
        dst_dataset.tl_faces.append(ds.tl_faces[:])
def train_all_models_lgb_combined(combined_model_name, models_with_folds):
    X_all_combined = []
    y_all_combined = []

    requests = []
    results = []
    for model_with_folds in models_with_folds:
        for model_name, fold in model_with_folds:
            requests.append((model_name, fold))
            # results.append(load_one_model(requests[-1]))

    pool = Pool(40)
    with utils.timeit_context('load all data'):
        results = pool.starmap(load_train_data, requests)

    for model_with_folds in models_with_folds:
        X_combined = []
        y_combined = []
        for model_name, fold in model_with_folds:
            X, y, video_ids = results[requests.index((model_name, fold))]
            print(model_name, fold, X.shape)
            X_combined.append(X)
            y_combined.append(y)

        X_all_combined.append(np.row_stack(X_combined))
        y_all_combined.append(np.row_stack(y_combined))

    X = np.column_stack(X_all_combined)
    y = y_all_combined[0]

    print(X.shape, y.shape)

    y_cat = np.argmax(y, axis=1)
    print(X.shape, y.shape)
    print(np.unique(y_cat))

    with utils.timeit_context('fit'):
        param = {
            'num_leaves': 50,
            'objective': 'multiclass',
            'max_depth': 5,
            'learning_rate': .05,
            'max_bin': 300,
            'num_class': NB_CAT,
            'metric': ['multi_logloss']
        }
        model = lgb.train(param,
                          lgb.Dataset(X, label=y_cat),
                          num_boost_round=260)

    pickle.dump(
        model, open(f"../output/lgb_combined_{combined_model_name}.pkl", "wb"))
def predict_on_test_combined(combined_model_name, models_with_folds):
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]
    folds = [1, 2, 3, 4]

    X_combined = {fold: [] for fold in folds}
    try:
        X_combined = pickle.load(
            open(f"../output/X_combined_xgb_{combined_model_name}.pkl", 'rb'))
    except FileNotFoundError:
        requests = []

        for model_with_folds in models_with_folds:
            for data_model_name, data_fold in model_with_folds:
                data_dir = f'../output/prediction_test_frames'
                with utils.timeit_context('load data'):
                    requests.append((data_dir, data_model_name, data_fold))
                    # X_combined[data_fold].append(load_test_data(data_dir, ds.filename))
                    # print(X_combined[-1].shape)
        pool = Pool(40)
        results = pool.map(load_test_data_one_model, requests)
        for data_fold, X in results:
            X_combined[data_fold].append(X)
        pickle.dump(
            X_combined,
            open(f"../output/X_combined_xgb_{combined_model_name}.pkl", "wb"))

    model = pickle.load(
        open(f"../output/xgb_combined_{combined_model_name}.pkl", "rb"))
    print(model)

    predictions = []
    with utils.timeit_context('predict'):
        for fold in [1, 2, 3, 4]:
            X = np.column_stack(X_combined[fold])
            predictions.append(model.predict_proba(X))
            print('prediction', predictions[-1].shape)

    prediction = np.mean(np.array(predictions).astype(np.float64), axis=0)
    os.makedirs('../submissions', exist_ok=True)
    print('predictions', prediction.shape)

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(prediction[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_combined_models_xgboost_{combined_model_name}_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
def predict_all_single_fold_models():
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    total_weight = 0.0
    result = np.zeros((ds.shape[0], NB_CAT))

    requests = []
    for model_with_folds in config.ALL_MODELS:
        for model_name, fold in model_with_folds:
            requests.append((model_name, fold))
    pool = Pool(8)
    with utils.timeit_context('load all data'):
        results = pool.starmap(load_test_data_from_std_path, requests)

    for models in config.ALL_MODELS:
        for model_name, fold in models:
            model = pickle.load(
                open(f"../output/xgb_{model_name}_{fold}_full.pkl", "rb"))
            print(model_name, fold, model)

            with utils.timeit_context('load data'):
                X = results[requests.index((model_name, fold))]
                # X = load_test_data_from_std_path(model_name, fold)
                print(X.shape)

            with utils.timeit_context('predict'):
                prediction = model.predict_proba(X)
                if prediction.shape[1] == 23:
                    prediction = np.insert(prediction,
                                           obj=12,
                                           values=0.0,
                                           axis=1)
                weight = config.MODEL_WEIGHTS[model_name]
                result += prediction * weight
                total_weight += weight

    os.makedirs('../submissions', exist_ok=True)
    result /= total_weight

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_single_folds_models_xgboost_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
示例#10
0
def check_unet(weights):
    dataset = Dataset()
    model = model_unet(INPUT_SHAPE)
    model.load_weights(weights)
    batch_size = 16

    for batch_x, batch_y in dataset.generate_validation(batch_size=batch_size):
        print(batch_x.shape, batch_y.shape)
        with utils.timeit_context('predict 16 images'):
            prediction = model.predict_on_batch(batch_x)

        for i in range(batch_size):
            # plt.imshow(unprocess_input(batch_x[i]))
            # plt.imshow(prediction[i, :, :, 0], alpha=0.75)
            img = batch_x[i].astype(np.float32)
            mask = prediction[i, :, :, 0]

            utils.print_stats('img', img)
            utils.print_stats('mask', mask)

            img[:, :, 0] *= mask
            img[:, :, 1] *= mask
            img[:, :, 2] *= mask
            img = unprocess_input(img)
            plt.imshow(img)
            plt.show()
def check_performance(dataset, name="", num_samples=64 * 20, random_order=False):
    with utils.timeit_context(f"iterate {name} dataset"):
        sample = dataset[63]
        # print("image shape", sample["image"]["image_sem"].shape, sample["image"]["image_sem"].dtype)
        print("Keys:", sample.keys())

        target_positions = sample["target_positions"]
        target_positions_world = transform_points(target_positions, sample["world_from_agent"])
        # output_mask = sample["output_mask"]

        img = dataset.rasterizer.to_rgb(sample["image"].transpose(1, 2, 0))
        plt.imshow(img)

        agents_history = sample["agents_history"]
        cur_frame_positions = agents_history[-1, :, :2] * 100.0
        cur_frame_velocity = agents_history[-1, :, 2:4] * 10.0
        cur_frame_positions_img = transform_points(cur_frame_positions, sample["raster_from_agent"])
        plt.scatter(cur_frame_positions_img[:, 0], cur_frame_positions_img[:, 1])

        plt.scatter(cur_frame_positions_img[:, 0] + cur_frame_velocity[:, 0] * 1.0,
                    cur_frame_positions_img[:, 1] + cur_frame_velocity[:, 1] * 1.0,
                    c='red')

        plt.show()

        nb_samples = len(dataset)
        for i in tqdm(range(num_samples)):
            if random_order:
                sample = dataset[np.random.randint(0, nb_samples)]
            else:
                sample = dataset[i]
            target_positions = sample["target_positions"]
示例#12
0
def main():
    video = VideoCapture(video_sources.video_2)

    frame = video.read()
    backSubtractor = BackgroundSubtractorAVG(0.2, denoise(frame))

    for frame in video.frames():
        with utils.timeit_context():
            frame = denoise(frame)
            foreGround = backSubtractor.getForeground(frame)
            # Apply thresholding on the background and display the resulting mask
            ret, mask = cv2.threshold(foreGround, 15, 255, cv2.THRESH_BINARY)

        cv2.imshow('input', frame)
        cv2.imshow('foreground', foreGround)
        # Note: The mask is displayed as a RGB image, you can
        # display a grayscale image by converting 'foreGround' to
        # a grayscale before applying the threshold.
        cv2.imshow('mask', mask)

        if cv2.waitKey(10) & 0xFF == 27:
            break

    video.release()
    cv2.destroyAllWindows()
示例#13
0
def check_dataset():
    with utils.timeit_context('load ds'):
        ds = NihDataset(fold=0, is_training=True, img_size=512, verbose=True)

    # print(ds.annotations(ds.patient_ids[0]))

    # patient_id = 10056  #ds.patient_ids[0]
    # plt.imshow(ds.images[patient_id])
    #
    # annotation_list = ds.training_samples.loc[[patient_id]]
    #
    # for _, row in annotation_list.iterrows():
    #     plt.plot(
    #         [row[f'p{i}_x'] for i in [1, 2, 3, 4, 1]],
    #         [row[f'p{i}_y'] for i in [1, 2, 3, 4, 1]],
    #         c='y'
    #     )
    # plt.show()

    ds.is_training = False
    plt.imshow(ds[0]['img'])

    plt.figure()
    ds.is_training = True

    for sample in ds:
        print(sample['categories'])
        print(np.array(ds.categories)[sample['categories'] > 0.5])
        plt.cla()
        plt.imshow(sample['img'])
        plt.show()
def try_train_model_nn(model_name, fold):
    with utils.timeit_context('load data'):
        X, y, video_ids = load_train_data(model_name, fold)

    print(X.shape, y.shape)
    model = model_nn(input_size=X.shape[1])
    model.compile(optimizer=Adam(lr=1e-3),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)
    batch_size = 64

    model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=128,
        verbose=1,
        validation_data=[X_test, y_test],
        callbacks=[ReduceLROnPlateau(factor=0.2, verbose=True, min_lr=1e-6)])

    prediction = model.predict(X_test)

    print(y_test.shape, prediction.shape)
    print(metrics.pri_matrix_loss(y_test, prediction))
    print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.001, 0.999)))
    delta = prediction - y_test
    print(np.min(delta), np.max(delta), np.mean(np.abs(delta)),
          np.sum(np.abs(delta) > 0.5))
示例#15
0
def main():
    from skimage.feature import peak_local_max
    from skimage.morphology import watershed
    import scipy.ndimage as ndi

    img = realImage()
    # img = testImage()
    img = fillHoles(img)

    thresh = img.copy()

    with utils.timeit_context():
        dst = ndi.distance_transform_edt(img)
        localMax = peak_local_max(dst, indices=False, min_distance=1, labels=thresh)
        markers = ndi.label(localMax)[0]
        labels = watershed(-dst, markers, mask=thresh)

    segmImg = (labels * (255 / labels.max())).astype(np.uint8)

    wnd = CvNamedWindow(flags=cv2.WINDOW_NORMAL)
    segmWnd = CvNamedWindow('segm', flags=cv2.WINDOW_NORMAL)

    wnd.imshow(img)
    segmWnd.imshow(segmImg)

    cvWaitKeys()
def train_model_nn(model_name, fold, load_cache=True):
    with utils.timeit_context('load data'):
        X, y, video_ids = load_train_data(model_name, fold)

    print(X.shape, y.shape)
    model = model_nn(input_size=X.shape[1])
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    batch_size = 64

    def cheduler(epoch):
        if epoch < 32:
            return 1e-3
        if epoch < 48:
            return 4e-4
        if epoch < 80:
            return 1e-4
        return 1e-5

    model.fit(X,
              y,
              batch_size=batch_size,
              epochs=128,
              verbose=1,
              callbacks=[LearningRateScheduler(schedule=cheduler)])

    model.save_weights(f"../output/nn1_{model_name}_{fold}_full.pkl")
def try_train_all_models_nn_combined(models_with_folds):
    X_all_combined = []
    y_all_combined = []

    for model_with_folds in models_with_folds:
        X_combined = []
        y_combined = []
        for model_name, fold in model_with_folds:
            with utils.timeit_context('load data'):
                X, y, video_ids = load_train_data(model_name, fold)
                X_combined.append(X)
                y_combined.append(y)

        X_all_combined.append(np.row_stack(X_combined))
        y_all_combined.append(np.row_stack(y_combined))

    X = np.column_stack(X_all_combined)
    y = y_all_combined[0]

    print(X.shape, y.shape)
    model = model_nn_combined(input_size=X.shape[1])
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    batch_size = 64

    def cheduler(epoch):
        if epoch < 32:
            return 1e-3
        if epoch < 48:
            return 4e-4
        if epoch < 80:
            return 1e-4
        return 1e-5

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)

    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              epochs=128,
              verbose=1,
              validation_data=[X_test, y_test],
              callbacks=[LearningRateScheduler(schedule=cheduler)])

    prediction = model.predict(X_test)

    print(y_test.shape, prediction.shape)
    print(metrics.pri_matrix_loss(y_test, prediction))
    print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.001, 0.999)))
    print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.0001, 0.9999)))
    delta = prediction - y_test
    print(np.min(delta), np.max(delta), np.mean(np.abs(delta)),
          np.sum(np.abs(delta) > 0.5))
def train_all_models_xgboost_combined(combined_model_name, models_with_folds):
    X_all_combined = []
    y_all_combined = []

    requests = []
    results = []
    for model_with_folds in models_with_folds:
        for model_name, fold in model_with_folds:
            requests.append((model_name, fold))
            # results.append(load_one_model(requests[-1]))

    pool = Pool(40)
    with utils.timeit_context('load all data'):
        results = pool.starmap(load_train_data, requests)

    for model_with_folds in models_with_folds:
        X_combined = []
        y_combined = []
        for model_name, fold in model_with_folds:
            X, y, video_ids = results[requests.index((model_name, fold))]
            print(model_name, fold, X.shape)
            X_combined.append(X)
            y_combined.append(y)

        X_all_combined.append(np.row_stack(X_combined))
        y_all_combined.append(np.row_stack(y_combined))

    X = np.column_stack(X_all_combined)
    y = y_all_combined[0]

    print(X.shape, y.shape)

    y_cat = np.argmax(y, axis=1)
    print(X.shape, y.shape)
    print(np.unique(y_cat))

    model = XGBClassifier(n_estimators=1600,
                          objective='multi:softprob',
                          learning_rate=0.03,
                          silent=False)
    with utils.timeit_context('fit 1600 est'):
        model.fit(
            X, y_cat
        )  # , eval_set=[(X_test, y_test)], early_stopping_rounds=20, verbose=True)
    pickle.dump(
        model, open(f"../output/xgb_combined_{combined_model_name}.pkl", "wb"))
def predict_combined_folds_models():
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    total_weight = 0.0
    result = np.zeros((ds.shape[0], NB_CAT))

    data_dir = '../output/prediction_test_frames/'
    pool = ThreadPool(8)

    for models in config.ALL_MODELS:
        combined_model_name = models[0][0] + '_combined'

        def load_data(request):
            model_name, fold = request
            return load_test_data(data_dir, model_name, fold)

        with utils.timeit_context('load 4 folds data'):
            X_for_folds = pool.map(load_data, models)

        model = model_nn(input_size=X_for_folds[0].shape[1])
        model.compile(optimizer=Adam(lr=1e-4),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.load_weights(
            f"../output/nn_combined_folds_{combined_model_name}.pkl")

        for (model_name, fold), X in zip(models, X_for_folds):
            with utils.timeit_context('predict'):
                prediction = model.predict(X)
                weight = config.MODEL_WEIGHTS[model_name]
                result += prediction * weight
                total_weight += weight

    os.makedirs('../submissions', exist_ok=True)
    result /= total_weight

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_combined_folds_models_nn_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
示例#20
0
def check_color_dataset_aug():
    with utils.timeit_context('load ds'):
        ds = ClassificationDataset(fold=0, is_training=True)

    while True:
        sample = ds[1]
        plt.imshow(sample['img'])
        print(ds.samples[sample['idx']], sample['scale'])
        plt.show()
def check_performance():
    import pytorch_retinanet.dataloader
    import torch
    with utils.timeit_context('load ds'):
        ds = DetectionDataset(fold=0, is_training=True, img_size=512)

    dataloader_train = torch.utils.data.DataLoader(
        ds,
        num_workers=16,
        batch_size=12,
        shuffle=True,
        collate_fn=pytorch_retinanet.dataloader.collater2d)
    data_iter = tqdm(enumerate(dataloader_train), total=len(dataloader_train))

    with utils.timeit_context('1000 batches:'):
        for iter_num, data in data_iter:
            if iter_num > 1000:
                break
示例#22
0
def predict_masks(fold):
    # weights = '../output/checkpoints/mask_unet/model_unet1/checkpoint-best-019-0.0089.hdf5'
    weights = '../output/ruler_masks_unet.h5'
    model = model_unet(INPUT_SHAPE)
    model.load_weights(weights)
    batch_size = 16

    input_samples = []
    processed_samples = 0

    dest_dir = '../output/ruler_masks'

    for dir_name in os.listdir(IMAGES_DIR):
        clip_dir = os.path.join(IMAGES_DIR, dir_name)
        os.makedirs(os.path.join(dest_dir, dir_name), exist_ok=True)

        for frame_name in os.listdir(clip_dir):
            if not frame_name.endswith('.jpg'):
                continue
            input_samples.append((dir_name, frame_name))

    if fold == 1:
        input_samples = input_samples[:len(input_samples) // 2]
    elif fold == 2:
        input_samples = input_samples[len(input_samples) // 2:]

    pool = ThreadPool(processes=8)
    save_batch_size = 64
    for batch_input_samples in utils.chunks(input_samples,
                                            batch_size * save_batch_size):

        def process_sample(sample):
            img_data = scipy.misc.imread(
                os.path.join(IMAGES_DIR, sample[0], sample[1]))
            img_data = scipy.misc.imresize(img_data, 0.5, interp='cubic')
            return preprocess_input(img_data)

        def generate_x():
            while True:
                for samples in utils.chunks(batch_input_samples, batch_size):
                    yield np.array(pool.map(process_sample, samples))

        with utils.timeit_context('predict {} images, {}/{}, {:.1}%'.format(
                batch_size * save_batch_size, processed_samples,
                len(input_samples),
                100.0 * processed_samples / len(input_samples))):
            predictions = model.predict_generator(generate_x(),
                                                  steps=save_batch_size,
                                                  verbose=1)

        for i in range(predictions.shape[0]):
            dir_name, fn = input_samples[processed_samples]
            processed_samples += 1
            fn = fn.replace('jpg', 'png')
            scipy.misc.imsave(os.path.join(dest_dir, dir_name, fn),
                              (predictions[i, :, :, 0] * 255.0).astype(
                                  np.uint8))
示例#23
0
def check_dataset_aug():
    with utils.timeit_context('load ds'):
        ds = ClassificationDataset(fold=0, is_training=True, img_aug_level=20, geometry_aug_level=10)

    while True:
        sample = ds[1]
        utils.print_stats('img', sample['img'])
        plt.imshow(np.moveaxis(sample['img'], 0, 2)[:, :, :3])
        plt.show()
示例#24
0
def check_dataset():
    with utils.timeit_context('load ds'):
        ds = ClassificationDataset(fold=0, is_training=False)

    for sample in ds:
        plt.cla()
        plt.imshow(sample['img'])
        print(ds.samples[sample['idx']], sample['scale'])
        plt.show()
def train_all_single_fold_models():
    for models in config.ALL_MODELS:
        for model_name, fold in models:
            weights_fn = f"../output/xgb_{model_name}_{fold}_full.pkl"
            print(model_name, fold, weights_fn)
            if os.path.exists(weights_fn):
                print('skip existing file')
            else:
                with utils.timeit_context('train'):
                    model_xgboost(model_name, fold)
def predict_on_test_combined(combined_model_name, models_with_folds):
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]
    folds = [1, 2, 3, 4]

    X_combined = {fold: [] for fold in folds}
    for model_with_folds in models_with_folds:
        for data_model_name, data_fold in model_with_folds:
            data_dir = f'../output/prediction_test_frames/'
            with utils.timeit_context('load data'):
                X_combined[data_fold].append(
                    load_test_data(data_dir, data_model_name, data_fold))
                # print(X_combined[-1].shape)
    pickle.dump(X_combined,
                open(f"../output/X_combined_{combined_model_name}.pkl", "wb"))

    # X_combined = pickle.load(open(f"../output/X_combined_{combined_model_name}.pkl", 'rb'))

    model = model_nn_combined(
        input_size=np.column_stack(X_combined[1]).shape[1])
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.load_weights(f"../output/nn_{combined_model_name}_full.pkl")

    predictions = []
    with utils.timeit_context('predict'):
        for fold in [1, 2, 3, 4]:
            X = np.column_stack(X_combined[fold])
            predictions.append(model.predict(X))

    prediction = np.mean(np.array(predictions).astype(np.float64), axis=0)
    os.makedirs('../submissions', exist_ok=True)

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(prediction[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_combined_models_nn_{combined_model_name}_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
def generate_train_benchmark(use_watershed):
    data = dataset.UVectorNetDataset(fold=1,
                                     batch_size=8,
                                     output_watershed=use_watershed)
    count = 0
    with utils.timeit_context('generate 100 batches'):
        for X, y in data.generate_train():
            count += 1
            if count >= 10:
                break
def predict_combined_folds_models():
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    total_weight = 0.0
    result = np.zeros((ds.shape[0], NB_CAT))

    pool = Pool(16)

    for models in config.ALL_MODELS:
        combined_model_name = models[0][0] + '_combined'

        # def load_data(request):
        #     model_name, fold = request
        #     return load_test_data(data_dir, model_name, fold)

        with utils.timeit_context('load 4 folds data'):
            X_for_folds = pool.starmap(load_test_data_from_std_path, models)

        model = pickle.load(
            open(f"../output/xgb_combined_folds_{combined_model_name}.pkl",
                 "rb"))

        for (model_name, fold), X in zip(models, X_for_folds):
            with utils.timeit_context('predict'):
                prediction = model.predict_proba(X)
                weight = config.MODEL_WEIGHTS[model_name]
                result += prediction * weight
                total_weight += weight

    os.makedirs('../submissions', exist_ok=True)
    result /= total_weight

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_combined_folds_models_xgboost_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
def predict_all_single_fold_models():
    ds = pd.read_csv(config.SUBMISSION_FORMAT)
    classes = list(ds.columns)[1:]

    total_weight = 0.0
    result = np.zeros((ds.shape[0], NB_CAT))

    data_dir = '../output/prediction_test_frames/'

    for models in config.ALL_MODELS:
        for model_name, fold in models:
            weights_fn = f"../output/nn1_{model_name}_{fold}_full.pkl"
            print(model_name, fold, weights_fn)

            with utils.timeit_context('load data'):
                X = load_test_data(data_dir, model_name, fold)
                print(X.shape)

            model = model_nn(input_size=X.shape[1])
            model.compile(optimizer=Adam(lr=1e-4),
                          loss='binary_crossentropy',
                          metrics=['accuracy'])
            model.load_weights(weights_fn)

            with utils.timeit_context('predict'):
                prediction = model.predict(X)
                weight = config.MODEL_WEIGHTS[model_name]
                result += prediction * weight
                total_weight += weight

    os.makedirs('../submissions', exist_ok=True)
    result /= total_weight

    for clip10 in [5, 4, 3, 2]:
        clip = 10**(-clip10)
        for col, cls in enumerate(classes):
            ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip,
                              1.0 - clip)
        ds.to_csv(
            f'../submissions/submission_single_folds_models_nn_clip_{clip10}.csv',
            index=False,
            float_format='%.8f')
示例#30
0
def packer(map_name='wmap', save_to=None):
    import texture
    import packer
    utils.pg_init()
    textures = texture.TextureGroup.get_group(map_name)
    with utils.timeit_context('Packing'):
        pack = packer.ImagePack(img.image for _, img in textures.iter_all())
    if save_to:
        pg.image.save(pack.image, save_to)
    else:
        utils.show_surface(pack.image)
示例#31
0
def packer(map_name='wmap', save_to=None):
    import texture
    import packer
    utils.pg_init()
    textures = texture.TextureGroup.get_group(map_name)
    with utils.timeit_context('Packing'):
        pack = packer.ImagePack(img.image for _, img in textures.iter_all())
    if save_to:
        pg.image.save(pack.image, save_to)
    else:
        utils.show_surface(pack.image)
示例#32
0
def pack_all():
    import texture
    import packer
    utils.pg_init()
    allNames = [
        'smap', 'wmap', 'mmap'
    ] + ['fight{:03d}'.format(i) for i in range(110)]
    rates = []
    for name in allNames:
        with utils.timeit_context('Load and pack ' + name):
            try:
                textures = texture.TextureGroup.get_group(name)
                pack = packer.ImagePack(img.image for _, img in textures.iter_all())
                rates.append(pack.rate)
            except FileNotFoundError:
                pack = None
                pass
        # if pack:
        #     utils.show_surface(pack.image)
    pg.quit()
    import matplotlib.pyplot as plt
    plt.hist(rates)
    plt.show()
示例#33
0
def load_image(name='smap.png'):
    utils.pg_init()
    with utils.timeit_context('Load image: ' + name):
        pg.image.load(name)