batch_size=len(idx2song), pl_length=dp_train.out_size - 1, feat_dim=model.FEAT_DIM, ) # initialize network my_net = Network(net) # load previously fit parameters params_file = os.path.join(out_dir, '{}_params.pkl'.format(model.name)) my_net.load(file_path=params_file) # shape withheld continuations for evaluation _, Y_cont = shape_data(playlists_idx, songs_idx, idx2song, features, subset=cont_idx) # songs in the "query" playlists need to be masked to make sure that # they are not recommended as continuations _, Y_query = shape_data(playlists_idx, songs_idx, idx2song, features, subset=query_idx) # get num of song occurrences when model was fit for cold-start analysis # Y_fit = Y_query train_occ = np.asarray(Y_query.sum(axis=1)).flatten()
# provide data information show_data_splits(playlists_idx, songs_idx, idx2song, song2artist, train_idx, valid_idx, fit_idx, query_idx, cont_idx) # provide model information print('\nNetwork:') show_design(model) if args.train: # # train the hybrid model while validating on withheld playlists # # prepare input song features and playlist targets at training X_train, Y_train = shape_data( playlists_idx, songs_idx, idx2song, features, mode='train', subset=train_idx ) # prepare input song features and playlist targets at validation X_valid, Y_valid = shape_data( playlists_idx, songs_idx, idx2song, features, mode='test', subset=valid_idx ) # preprocess input features if required # use the training song features to standardize the validation data if model.standardize: scaler = prep.RobustScaler() X_train = scaler.fit_transform(X_train) X_valid = scaler.transform(X_valid)
query_idx = np.intersect1d(test_idx_dsj, train_idx_cnt) cont_idx = np.intersect1d(test_idx_dsj, test_idx_cnt) # provide data information show_data_splits(playlists_idx, songs_idx, idx2song, song2artist, train_idx, valid_idx, fit_idx, query_idx, cont_idx) # # extend the playlists in the query split and evaluate the # continuations by comparing them to actual withheld continuations # # prepare song-playlist matrix in test continuations _, Y_cont = shape_data(playlists_idx, songs_idx, idx2song=None, features=None, subset=cont_idx) # prepare song-playlist matrix in test queries # used to mask songs from queries _, Y_query = shape_data(playlists_idx, songs_idx, idx2song=None, features=None, subset=query_idx) # prepare song-playlist matrix in fit playlists _, Y_fit = shape_data(playlists_idx, songs_idx, idx2song=None,
print('\nMatrix Factorization:') show_design(model) if args.train: # # train the factorization while validating on withheld playlists # for tuning purposes, one fold is enough # if fold > 0: print('\nI won\'t train anymore. Just did it for one fold.') break # prepare song-playlist matrix in training _, Y_train = shape_data( playlists_idx, songs_idx, idx2song=None, features=None, subset=train_idx ) # prepare song-playlist matrix in validation _, Y_valid = shape_data( playlists_idx, songs_idx, idx2song=None, features=None, subset=valid_idx ) # train the model train( model=model, train_target=Y_train, valid_target=Y_valid, out_dir=out_dir, use_gpu=args.use_gpu