def test_pfa(self): # Test PFA encoding X_pfa = df_to_sparse(self.data, self.q_mat, ["skills", "wins", "fails"]).toarray() # Sort array X_pfa = X_pfa[X_pfa[:, 4].argsort(), 5:] # Collect only sparse columns pfa_features = np.array(pd.read_csv("data/dummy/pfa.csv", sep=';')) self.assertSequenceEqual(X_pfa.tolist(), pfa_features.tolist(), "Inconsistent PFA features")
def test_ui(self): # Test IRT/MIRT encoding X_ui = df_to_sparse(self.data, self.q_mat, ["users", "items"]).toarray() # Sort array X_ui = X_ui[X_ui[:, 4].argsort(), 5:] # Collect only sparse columns irt_features = np.array(pd.read_csv("data/dummy/irt.csv", sep=';')) self.assertSequenceEqual(X_ui.tolist(), irt_features.tolist(), "Inconsistent IRT features")
def test_afm(self): # Test AFM encoding X_afm = df_to_sparse(self.data, self.q_mat, ["skills", "attempts"]).toarray() # Sort array X_afm = X_afm[X_afm[:, 4].argsort(), 5:] # Collect only sparse columns afm_features = np.array(pd.read_csv("data/dummy/afm.csv", sep=';')) self.assertSequenceEqual(X_afm.tolist(), afm_features.tolist(), "Inconsistent AFM features")
def test_dash(self): # Test DASH encoding X_uiwat2 = df_to_sparse(self.data, self.q_mat, ["users", "items", "wins", "attempts"], tw="tw_items").toarray() # Sort array X_uiwat2 = X_uiwat2[X_uiwat2[:, 4].argsort(), 5:] # Collect only sparse columns # Convert to simple counters to avoid using assertAlmostEqual and floats X_uiwat2[:, -10:] = np.exp(X_uiwat2[:, -10:]) - 1 dash_features = np.array(pd.read_csv("data/dummy/dash.csv", sep=';')) self.assertSequenceEqual(X_uiwat2.tolist(), dash_features.tolist(), "Inconsistent DASH features")
from encode import df_to_sparse from train_lr import compute_metrics data_path = 'data/new_sqai/' original_df = pd.read_csv(os.path.join(data_path, "ElemMATHdata.csv")) stats = [] for i in range(5, 18): print('using user: '******'preprocessed_data.csv'), sep="\t") df = df[["user_id", "item_id", "timestamp", "correct", "skill_id"]] Q_mat = sparse.load_npz(os.path.join(data_path, 'q_mat.npz')).toarray() active_features = ['i', 's', 'ic', 'sc', 'tc', 'w', 'a'] X = df_to_sparse(df, Q_mat, active_features) # sparse.save_npz(os.path.join(data_path, 'X-features'), X) # # parser = argparse.ArgumentParser(description='Train logistic regression on sparse feature matrix.') # parser.add_argument('--X_file', type=str) # parser.add_argument('--dataset', type=str) # parser.add_argument('--iter', type=int, default=1000) # args = parser.parse_args() # # features_suffix = (args.X_file.split("-")[-1]).split(".")[0] # # # Load sparse dataset # X = csr_matrix(load_npz(args.X_file)) # print('encoded') train_df = pd.read_csv(os.path.join(data_path,