def makeCV(kfolds,X,Labels,User,Meta,clf,opts): users = np.unique(User) toPredData=[] Gauc = [] for train_users,test_users in kfolds[1]: allProb = 0 test_index = np.array([True if u in set(users[test_users]) else False for u in User]) if opts.has_key('bagging'): bagging = baggingIterator(opts,[users[i] for i in train_users]) else: bagging = [[-1]] for bag in bagging: bagUsers = np.array([True if u in set(bag) else False for u in User]) train_index = np.logical_xor(np.negative(test_index), bagUsers) try: # train updateMeta(clf,Meta[train_index]) clf.fit(X[train_index,:,:],Labels[train_index]) # predict prob = [] for ut in np.unique(users[test_users]): updateMeta(clf,Meta[User==ut,...]) prob.extend(clf.predict(X[User==ut,...])) prob = np.array(prob) allProb += prob/len(bagging) except: print kfolds[0] print [users[i] for i in train_users] print bag continue
Meta = np.load('./preproc/meta_leak.npy') if 'leak' in opts else np.load( './preproc/meta.npy') X_test = np.load('./preproc/test_epochs.npy') feedbackid, User_test = np.load('./preproc/test_infos.npy') User_test = np.array(list(map(int, User_test))) users_test = np.unique(User_test) Meta_test = np.load('./preproc/test_meta_leak.npy' ) if 'leak' in opts else np.load('./preproc/test_meta.npy') ### training np.random.seed(5) allProb = 0 if 'bagging' in opts: bagging = baggingIterator(opts, users) else: bagging = [[-1]] t = time() pBaggingFunc = partial(BaggingFunc, Labels=Labels, X=X, Meta=Meta, User=User, X_test=X_test, Meta_test=Meta_test, User_test=User_test) pool = Pool(processes=cores) allProb = pool.map(pBaggingFunc, bagging, chunksize=1) allProb = np.vstack(allProb)
def makeCV(kfolds, X, Labels, User, Meta, clf, opts): users = np.unique(User) toPredData = [] Gauc = [] for train_users, test_users in kfolds[1]: allProb = 0 test_index = np.array( [True if u in set(users[test_users]) else False for u in User]) if opts.has_key('bagging'): bagging = baggingIterator(opts, [users[i] for i in train_users]) else: bagging = [[-1]] for bag in bagging: bagUsers = np.array( [True if u in set(bag) else False for u in User]) train_index = np.logical_xor(np.negative(test_index), bagUsers) try: # train updateMeta(clf, Meta[train_index]) clf.fit(X[train_index, :, :], Labels[train_index]) # predict prob = [] for ut in np.unique(users[test_users]): updateMeta(clf, Meta[User == ut, ...]) prob.extend(clf.predict(X[User == ut, ...])) prob = np.array(prob) allProb += prob / len(bagging) except: print kfolds[0] print[users[i] for i in train_users] print bag continue # save & return predictions = OrderedDict() predictions['user'] = User[test_index] predictions['label'] = Labels[test_index] predictions['prediction'] = allProb if opts.has_key('leak'): predictions['prediction'] += opts['leak']['coeff'] * ( 1 - Meta[test_index, -1]) predictions = pd.DataFrame(predictions) Gauc.append(roc_auc_score(predictions.label, predictions.prediction)) toPredData.append(predictions) predData = pd.concat(toPredData) Sauc = [ roc_auc_score(predData.loc[predData.user == i].label, predData.loc[predData.user == i].prediction) for i in np.unique(predData.user) ] print 'Rep %d: gAUC (mean of folds) %0.5f, sAUC %0.5f (%0.5f)' % ( kfolds[0], np.mean(Gauc), np.mean(Sauc), np.std(Sauc)) return [Gauc, Sauc]
Labels,User = np.load('./preproc/infos.npy') users = np.unique(User) Meta = np.load('./preproc/meta_leak.npy') if opts.has_key('leak') else np.load('./preproc/meta.npy') X_test = np.load('./preproc/test_epochs.npy') feedbackid,User_test = np.load('./preproc/test_infos.npy') User_test = np.array(map(int, User_test)) users_test = np.unique(User_test) Meta_test = np.load('./preproc/test_meta_leak.npy') if opts.has_key('leak') else np.load('./preproc/test_meta.npy') ### training np.random.seed(5) allProb = 0 if opts.has_key('bagging'): bagging = baggingIterator(opts,users) else: bagging = [[-1]] t = time() pBaggingFunc = partial(BaggingFunc,Labels=Labels,X=X,Meta=Meta,User=User,X_test=X_test,Meta_test=Meta_test,User_test=User_test) pool = Pool(processes = cores) allProb = pool.map(pBaggingFunc,bagging,chunksize=1) allProb = np.vstack(allProb) allProb = np.mean(allProb,axis=0) if opts.has_key('leak'): allProb += opts['leak']['coeff']*(1-Meta_test[:,-1]) print "Done in " + str(time()-t) + " second"