from parameterTunning.AbstractClassSearch import DictionaryKeys from loader.loader import save_dataframe, train_data, target_data, test_data, tracks_data from utils.auxUtils import Evaluator, buildURMMatrix, filter_seen import pandas as pd from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython import matplotlib.pyplot as plt from parameterTunning.GridSearch import GridSearch from sklearn.model_selection import GridSearchCV URM_train = buildURMMatrix(train_data) URM_test = buildURMMatrix(test_data) rs = SLIM_BPR_Cython(train_data) grid_param = { 'lambda_i': [1e-1, 1e-2, 1e-3, 1e-4], 'lambda_j': [1e-1, 1e-2, 1e-3, 1e-4], 'topK': [300, 400, 500] } evaluator = Evaluator() gd_sr = GridSearchCV(estimator=rs, param_grid=grid_param, scoring=evaluator.evaluate(rs.recommend(target_data["playlist_id"]), test_data), n_jobs=2) gd_sr.fit(URM_train)
from cbfRS.cbfRS import CbfRS from loader.loader import save_dataframe, train_data, target_data, full_data, test_data, tracks_data from utils.auxUtils import Evaluator import pandas as pd import matplotlib.pyplot as plt evaluator = Evaluator() df = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr']) top_50 = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr']) shrinkage = 0 plot_graph = False while shrinkage < 50: map_list = [] knn_list = [] k = 10 while k < 100: rs = CbfRS(tracks_data, 10, k, shrinkage, tf_idf=False, bm25=True) rs.fit(train_data) print('knn: ', k, ' shrinkage: ', shrinkage) predictions = rs.recommend(target_data['playlist_id']) map_ = (evaluator.evaluate(predictions, test_data)) map_list.append(map_) df = df.append( pd.DataFrame([[k, map_, shrinkage]], columns=['knn', 'map', 'shr'])) top_50 = df.sort_values(by=['map']).tail(50) knn_list.append(k) k += 10
from loader.loader import train_data, test_data, tracks_data, target_data, full_data, save_dataframe from utils.auxUtils import Evaluator from graphBased.rp3betaRS import RP3betaRecommender r = RP3betaRecommender(train_data) r.fit() pred = r.recommend(target_data['playlist_id']) e = Evaluator() e.evaluate(pred, test_data)
from loader.loader import train_data, test_data, tracks_data, full_data, target_data, save_dataframe from utils.auxUtils import buildURMMatrix, Evaluator import numpy as np from svdRS.pureSVD import PureSVDRecommender from collaborative_filtering_RS.col_user_userRS import ColBfUURS from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS from MatrixFactorization.mf_skl import MfNnz from cbfRS.cbfRS import CbfRS import matplotlib.pyplot as pyplot from slimRS.slimElasticNet import SLIMElasticNetRecommender from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython from utils.auxUtils import buildICMMatrix from FW_boosting.CFW_D_Similarity import CFW_D_Similarity_Linalg URM_train = buildURMMatrix(train_data) evaluator = Evaluator() profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True) rs_i_i_cf.fit(train_data) # predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id']) map_item_item = [] ''' rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True) rs_u_u_cf.fit(train_data) predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id']) map_user_user = [] ''' rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True)
from hybrid_similarities.new_hybrid import HybridRS from loader.loader import train_data, test_data, tracks_data, target_data, full_data, save_dataframe from utils.auxUtils import Evaluator, submit_dataframe_to_kaggle import pandas as pd from mail_notification.notify import NotifyMail # so far best hybrid with pureSVD = alpha=0.3 beta=10 gamma=1 eta=10 r = HybridRS(tracks_data) e = Evaluator() r.fit(train_data) # content filter gammas = [0, 0.8, 1] # collaborative user user alphas = [0, 0.2, 0.3] # collaborative item item betas = [0, 10] # pureSVD etas = [0, 10] # graph based thetas = [0, 20, 30] # slim BPR deltas = [0, 0.8, 1] # slim EN omegas = [0, 10, 30] list_res = [] # 0.2 10 1.0 10 1 40.0 30 sigmas = [0, 20] for gamma in gammas: for alpha in alphas:
from hybrid_col_cbf_RS.hybridRS import HybridRS from loader.loader import save_dataframe, train_data, target_data, full_data, test_data, tracks_data from utils.auxUtils import Evaluator import pandas as pd import matplotlib.pyplot as plt df = pd.DataFrame([[0, 0, 0, 0]], columns=['alpha', 'beta', 'gamma', 'map']) top_50 = pd.DataFrame([[0, 0, 0, 0]], columns=['alpha', 'beta', 'gamma', 'map']) top_50_p = pd.DataFrame([[0, 0, 0, 0]], columns=['alpha', 'beta', 'gamma', 'map']) # Hybrid (cbf - colf) rs = HybridRS(tracks_data, 10, tf_idf=True) evaluator = Evaluator() rs.fit(train_data) alpha = 1 while alpha <= 10: beta = 1 while beta <= 10: gamma = 1 while gamma <= 19: hybrid = rs.recommend(target_data['playlist_id'], alpha, beta, gamma) print("Alpha: ", alpha, " Beta: ", beta, "Gamma: ", gamma) temp_map = evaluator.evaluate(hybrid, test_data) df = df.append(
from loader.loader import train_data, test_data, tracks_data, full_data, target_data from utils.auxUtils import buildURMMatrix, Evaluator import numpy as np from svdRS.pureSVD import PureSVDRecommender from collaborative_filtering_RS.col_user_userRS import ColBfUURS from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS from MatrixFactorization.mf_skl import MfNnz from cbfRS.cbfRS import CbfRS import matplotlib.pyplot as pyplot from slimRS.slimElasticNet import SLIMElasticNetRecommender from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython URM_train = buildURMMatrix(full_data) evaluator = Evaluator() profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True) rs_i_i_cf.fit(train_data) predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id']) map_item_item = [] rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True) rs_u_u_cf.fit(train_data) predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id']) map_user_user = [] rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True) rs_content.fit(train_data) predictions_content = rs_content.recommend(target_data['playlist_id'])
def evaluateRecommendations(self, URM_test, at=10, minRatingsPerUser=1, exclude_seen=True, mode='parallel', filterTopPop=False, filterCustomItems=np.array([], dtype=np.int), filterCustomUsers=np.array([], dtype=np.int)): """ Speed info: - Sparse weights: batch mode is 2x faster than sequential - Dense weights: batch and sequential speed are equivalent :param URM_test_new: URM to be used for testing :param at: 10 Length of the recommended items :param minRatingsPerUser: 1 Users with less than this number of interactions will not be evaluated :param exclude_seen: True Whether to remove already seen items from the recommended items :param mode: 'sequential', 'parallel', 'batch' :param filterTopPop: False or decimal number Percentage of items to be removed from recommended list and testing interactions :param filterCustomItems: Array, default empty Items ID to NOT take into account when recommending :param filterCustomUsers: Array, default empty Users ID to NOT take into account when recommending :return: """ if len(filterCustomItems) == 0: self.filterCustomItems = False else: self.filterCustomItems = True self.filterCustomItems_ItemsID = np.array(filterCustomItems) ''' if filterTopPop != False: self.filterTopPop = True _,_, self.filterTopPop_ItemsID = removeTopPop(self.URM_train, URM_2 = URM_test_new, percentageToRemove=filterTopPop) print("Filtering {}% TopPop items, count is: {}".format(filterTopPop*100, len(self.filterTopPop_ItemsID))) # Zero-out the items in order to be considered irrelevant URM_test_new = check_matrix(URM_test_new, format='lil') URM_test_new[:,self.filterTopPop_ItemsID] = 0 URM_test_new = check_matrix(URM_test_new, format='csr') ''' # During testing CSR is faster self.URM_test = check_matrix(URM_test, format='csr') self.evaluator = Evaluator() self.URM_train = check_matrix(self.URM_train, format='csr') self.at = at self.minRatingsPerUser = minRatingsPerUser self.exclude_seen = exclude_seen nusers = self.URM_test.shape[0] # Prune users with an insufficient number of ratings rows = self.URM_test.indptr numRatings = np.ediff1d(rows) mask = numRatings >= minRatingsPerUser usersToEvaluate = np.arange(nusers)[mask] if len(filterCustomUsers) != 0: print("Filtering {} Users".format(len(filterCustomUsers))) usersToEvaluate = set(usersToEvaluate) - set(filterCustomUsers) usersToEvaluate = list(usersToEvaluate) if mode == 'sequential': return self.evaluateRecommendationsSequential(usersToEvaluate) elif mode == 'parallel': return self.evaluateRecommendationsParallel(usersToEvaluate) elif mode == 'batch': return self.evaluateRecommendationsBatch(usersToEvaluate) elif mode == 'cython': return self.evaluateRecommendationsCython(usersToEvaluate) # elif mode=='random-equivalent': # return self.evaluateRecommendationsRandomEquivalent(usersToEvaluate) else: raise ValueError("Mode '{}' not available".format(mode))
def default_validation_function(self, playlist_ids): e = Evaluator() return e.evaluate_tuning(self.recommend(playlist_ids), self.URM_validation)
class SLIM_BPR_Cython(): def __init__(self, train_data, URM_validation=None, recompile_cython=True, final_model_sparse_weights=True, train_with_sparse_weights=False, symmetric=True): # super(SLIM_BPR_Cython, self).__init__() self.URM_train = check_matrix(buildURMMatrix(train_data), 'csr') self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.n_users = self.URM_train.shape[0] self.n_items = self.URM_train.shape[1] self.normalize = False self.train_with_sparse_weights = train_with_sparse_weights self.sparse_weights = final_model_sparse_weights if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None if self.train_with_sparse_weights: self.sparse_weights = True self.URM_mask = self.URM_train.copy() self.URM_mask.eliminate_zeros() self.symmetric = symmetric if not self.train_with_sparse_weights: n_items = self.URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=160, logFile=None, playlist_ids=None, filterTopPop=False, batch_size=1000, lambda_i=0.001, lambda_j=0.001, learning_rate=0.001, topK=200, sgd_mode='sgd', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=10, validation_metric="map", validation_function=None, validation_every_n=10): ''' :param epochs: :param filterTopPop: :param batch_size: :param lambda_i: parameter for weighting the SLIM, proposed by paper: 0.0025 :param lambda_j: parameter for weighting the SLIM, proposed by paper: 0.00025 :param learning_rate: how much the algorithm is learning for each epoch :param topK: knn similarity :param sgd_mode: adagrad, rmsprop, adam, sgd :param gamma: rmsprop value :param beta_1: adam value proposed by paper: 0.9 :param beta_2: adam value proposed by paper: 0.999 ''' print('Fitting..') # Import compiled module from slimRS.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch print('Cython module imported') # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK self.logFile = logFile if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf if validation_function is None: validation_function = self.default_validation_function print('After validation') self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate start_time = time.time() print('Time has started') best_validation_metric = None lower_validatons_count = 0 convergence = False self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() self.epochs_best = 0 currentEpoch = 0 while currentEpoch < self.epochs and not convergence: if self.batch_size > 0: self.cythonEpoch.epochIteration_Cython() else: print("No batch not available") # Determine whether a validaton step is required if self.URM_validation is not None and ( currentEpoch + 1) % self.validation_every_n == 0: print("SLIM_BPR_Cython: Validation begins...") self.get_S_incremental_and_set_W() results_run = validation_function(playlist_ids) print("SLIM_BPR_Cython: {}".format(results_run)) # Update the D_best and V_best # If validation is required, check whether result is better if stop_on_validation: current_metric_value = results_run # results_run[validation_metric] if best_validation_metric is None or best_validation_metric < current_metric_value: best_validation_metric = current_metric_value self.S_best = self.S_incremental.copy() self.epochs_best = currentEpoch + 1 lower_validatons_count = 0 else: lower_validatons_count += 1 if lower_validatons_count >= lower_validatons_allowed: convergence = True print( "SLIM_BPR_Cython: Convergence reached! Terminating at epoch {}. Best value for '{}' at epoch {} is {:.4f}. Elapsed time {:.2f} min" .format(currentEpoch + 1, validation_metric, self.epochs_best, best_validation_metric, (time.time() - start_time) / 60)) # If no validation required, always keep the latest if not stop_on_validation: self.S_best = self.S_incremental.copy() print("SLIM_BPR_Cython: Epoch {} of {}. Elapsed time {:.2f} min". format(currentEpoch + 1, self.epochs, (time.time() - start_time) / 60)) currentEpoch += 1 self.get_S_incremental_and_set_W() print('Finishing...') sys.stdout.flush() def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'lambda_i': self.lambda_i, 'lambda_j': self.lambda_j, 'batch_size': self.batch_size, 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch } print("Test case: {}\nResults {}\n".format(current_config, results_run)) # print("Weights: {}\n".format(str(list(self.weights)))) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) # logFile.write("Weights: {}\n".format(str(list(self.weights)))) logFile.flush() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/slimRS/Cython" #fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx'] fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: if platform.system() == 'Windows': cmd = 'python' else: cmd = 'python3' command = [ cmd, 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script # python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace # Command to generate html report # cython -a SLIM_BPR_Cython_Epoch.pyx def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental def get_weight_matrix(self): if self.train_with_sparse_weights: matrix_w = self.W_sparse else: if self.sparse_weights: matrix_w = self.W_sparse else: matrix_w = self.W return csr_matrix(matrix_w, shape=(self.n_items, self.n_items)) def get_estimated_ratings(self): matrix_W = self.get_weight_matrix() return check_matrix(self.URM_train.dot(matrix_W), 'csr') def get_sym_matrix(self, weight): return check_matrix(self.get_weight_matrix() * weight, 'csr') def recommend(self, playlist_ids): print("Recommending...") final_prediction = {} if self.train_with_sparse_weights: matrix_W = self.W_sparse else: if self.sparse_weights: matrix_W = self.W_sparse else: matrix_W = self.W # what dimension does W have? self.W = csr_matrix(matrix_W, shape=(self.n_items, self.n_items)) estimated_ratings = check_matrix(self.URM_train.dot(self.W), 'csr') counter = 0 for k in playlist_ids: row = estimated_ratings[k] # aux contains the indices (track_id) of the most similar songs indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.URM_train[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:10] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df def default_validation_function(self, playlist_ids): e = Evaluator() return e.evaluate_tuning(self.recommend(playlist_ids), self.URM_validation) def get_URM_train(self): return self.URM_train def evaluateRecommendations(self, URM_test, at=10, minRatingsPerUser=1, exclude_seen=True, mode='parallel', filterTopPop=False, filterCustomItems=np.array([], dtype=np.int), filterCustomUsers=np.array([], dtype=np.int)): """ Speed info: - Sparse weights: batch mode is 2x faster than sequential - Dense weights: batch and sequential speed are equivalent :param URM_test_new: URM to be used for testing :param at: 10 Length of the recommended items :param minRatingsPerUser: 1 Users with less than this number of interactions will not be evaluated :param exclude_seen: True Whether to remove already seen items from the recommended items :param mode: 'sequential', 'parallel', 'batch' :param filterTopPop: False or decimal number Percentage of items to be removed from recommended list and testing interactions :param filterCustomItems: Array, default empty Items ID to NOT take into account when recommending :param filterCustomUsers: Array, default empty Users ID to NOT take into account when recommending :return: """ if len(filterCustomItems) == 0: self.filterCustomItems = False else: self.filterCustomItems = True self.filterCustomItems_ItemsID = np.array(filterCustomItems) ''' if filterTopPop != False: self.filterTopPop = True _,_, self.filterTopPop_ItemsID = removeTopPop(self.URM_train, URM_2 = URM_test_new, percentageToRemove=filterTopPop) print("Filtering {}% TopPop items, count is: {}".format(filterTopPop*100, len(self.filterTopPop_ItemsID))) # Zero-out the items in order to be considered irrelevant URM_test_new = check_matrix(URM_test_new, format='lil') URM_test_new[:,self.filterTopPop_ItemsID] = 0 URM_test_new = check_matrix(URM_test_new, format='csr') ''' # During testing CSR is faster self.URM_test = check_matrix(URM_test, format='csr') self.evaluator = Evaluator() self.URM_train = check_matrix(self.URM_train, format='csr') self.at = at self.minRatingsPerUser = minRatingsPerUser self.exclude_seen = exclude_seen nusers = self.URM_test.shape[0] # Prune users with an insufficient number of ratings rows = self.URM_test.indptr numRatings = np.ediff1d(rows) mask = numRatings >= minRatingsPerUser usersToEvaluate = np.arange(nusers)[mask] if len(filterCustomUsers) != 0: print("Filtering {} Users".format(len(filterCustomUsers))) usersToEvaluate = set(usersToEvaluate) - set(filterCustomUsers) usersToEvaluate = list(usersToEvaluate) if mode == 'sequential': return self.evaluateRecommendationsSequential(usersToEvaluate) elif mode == 'parallel': return self.evaluateRecommendationsParallel(usersToEvaluate) elif mode == 'batch': return self.evaluateRecommendationsBatch(usersToEvaluate) elif mode == 'cython': return self.evaluateRecommendationsCython(usersToEvaluate) # elif mode=='random-equivalent': # return self.evaluateRecommendationsRandomEquivalent(usersToEvaluate) else: raise ValueError("Mode '{}' not available".format(mode)) def evaluateOneUser(self, test_user): # Being the URM CSR, the indices are the non-zero column indexes # relevant_items = self.URM_test_relevantItems[test_user] relevant_items = self.URM_test[test_user].indices # this will rank top n items recommended_items = self.recommend(test_user) is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True) # evaluate the recommendation list with ranking metrics ONLY map_ = self.evaluator.map(is_relevant, relevant_items) return map_ def evaluateRecommendationsParallel(self, usersToEvaluate): print("Evaluation of {} users begins".format(len(usersToEvaluate))) pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) resultList = pool.map(self.evaluateOneUser, usersToEvaluate) # for i, _ in enumerate(pool.imap_unordered(self.evaluateOneUser, usersToEvaluate), 1): # if(i%1000 == 0): # sys.stderr.write('\rEvaluated {} users ({0:%})'.format(i , i / usersToEvaluate)) # Close the pool to avoid memory leaks pool.close() n_eval = len(usersToEvaluate) map_ = 0.0 # Looping is slightly faster then using the numpy vectorized approach, less data transformation for result in resultList: map_ += result[0] if (n_eval > 0): map_ /= n_eval else: print( "WARNING: No users had a sufficient number of relevant items") results_run = {} results_run["map"] = map_ return (results_run)
from lightfm import LightFM from lightfm.data import Dataset from loader.loader import test_data, train_data, target_data, tracks_data from utils.auxUtils import buildFMMatrix, buildURMMatrix, Evaluator from scipy.sparse import coo_matrix import numpy as np import pandas as pd from tqdm import tqdm e = Evaluator() # Instantiate and train the model alpha = 1e-3 model = LightFM(no_components=30, loss='warp', learning_rate=0.01) # todo add latent factors weights # todo force the dimention of the data matrix urm = coo_matrix(buildURMMatrix(train_data)) print('Fitting...') # todo: item features model.fit(urm, epochs=30, num_threads=4, item_features=item_feature) final_prediction = {} tracks = np.array(tracks_data['track_id'], dtype='int32') for k in tqdm(target_data['playlist_id']): # user_index = np.full(len(tracks), k, dtype='int32') predictions = model.predict(k, tracks) ranking = (np.argsort(predictions)[::-1])[:10] string = ' '.join(str(e) for e in ranking) final_prediction.update({k: string}) df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) print(df)