le = preprocessing.LabelEncoder() train_test[c] = le.fit_transform( train_test[c].astype(str).fillna('unk').values) test = train_test[~train_test['row_num'].isnull()] test_trips = test[test['city_id'] != test['city_id'].shift(1)].query( 'city_id!=0').groupby('utrip_id')['city_id'].apply( lambda x: x.values).reset_index() X_test = test[test['city_id'] != test['city_id'].shift(1)].query( 'city_id!=0').groupby( 'utrip_id')[categorical_cols].last().reset_index() X_test['city_id'] = test_trips['city_id'] X_test = X_test.reset_index(drop=True) test_dataset = BookingDataset(X_test, is_train=False) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=1) del train_test, test, test_trips gc.collect() model_paths = [ '../input/booking-bi-lstm-ep1/logdir_nn000', ] for mp in model_paths: for fold_id in (0, ): runner = CustomRunner(device=device) model = BookingNN(len(target_le.classes_)) pred = [] for prediction in tqdm( runner.predict_loader(
train[train['city_id'] != train['city_id'].shift(1)].groupby( "utrip_id")[c].apply(list)) X_test.append( test[test['city_id'] != test['city_id'].shift(1)].groupby( "utrip_id")[c].apply(list)) X_train = pd.concat(X_train, axis=1) X_test = pd.concat(X_test, axis=1) X_train['n_trips'] = X_train['city_id'].map(lambda x: len(x)) X_train = X_train.query('n_trips > 2').sort_values('n_trips').reset_index( drop=True) X_test = X_test.reset_index(drop=True) cv = StratifiedKFold(n_splits=5, shuffle=False) test_dataset = BookingDataset(X_test, is_train=False) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, num_workers=os.cpu_count(), pin_memory=True, collate_fn=MyCollator(is_train=False), shuffle=False, ) del train_test, train, test, X_test gc.collect() for fold_id, (tr_idx, va_idx) in enumerate( cv.split(X_train, pd.cut(X_train['n_trips'], 5, labels=False))): if fold_id in (0, 1, 2, 3, 4): X_tr = X_train.loc[tr_idx, :]