import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import crossvalidate, accuracy, rmse, auc, check_data, data_helper, ktidem_skills
import copy
np.seterr(divide='ignore', invalid='ignore')
num_fit_initializations = 20
seed, folds = 2020, 5 #can customize to anything, keep same seed and # folds over all trials
results = {} #create dictionary to store accuracy and rmse results

df, skill_list, student_count, data_count, template_count = ktidem_skills.find_skills()
for i in range(10):
    skill_name = skill_list[i]
    results[skill_name]=[student_count[i], data_count[i], template_count[i]]
    
    data = data_helper.convert_data(df, skill_name)
    check_data.check_data(data)
    results[skill_name].append((np.sum(data["data"][0]) - len(data["data"][0]))/len(data["data"][0]))
    print("creating simple model")
    results[skill_name].append(crossvalidate.crossvalidate(data, folds=folds, seed=seed)[2])

    data_multiguess = data_helper.convert_data(df, skill_name, multiguess=True)
    check_data.check_data(data_multiguess)
    print("creating kt_idem model")
    results[skill_name].append(crossvalidate.crossvalidate(data_multiguess, folds=folds, seed=seed)[2])
    #print(results)

print("Model\tNum Students\tNum Data\tNum Templates\tCorrect Percent\tSimple AUC\tKT_IDEM AUC")
for k, v in results.items():
    print("%s\t%d\t%d\t%d\t%.5f\t%.5f\t%.5f" % (k, v[0], v[1], v[2], v[3], v[4], v[5]))
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import data_helper, check_data
np.seterr(divide='ignore', invalid='ignore')

num_fit_initializations = 20
skill_name = "Table"

data = data_helper.convert_data("as.csv", skill_name, multilearn=True)
check_data.check_data(data)
num_gs = len(data["gs_names"])
num_learns = len(data["resource_names"])

num_fit_initializations = 5
best_likelihood = float("-inf")

for i in range(num_fit_initializations):
    fitmodel = random_model_uni.random_model_uni(num_learns, num_gs) # include this line to randomly set initial param values
    (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
    print(log_likelihoods[-1])
    if(log_likelihoods[-1] > best_likelihood):
        best_likelihood = log_likelihoods[-1]
        best_model = fitmodel

# compare the fit model to the true model
print('')
print('Trained model for %s skill given %d learning rates, %d guess/slip rate' % (skill_name, num_learns, num_gs))
print('\t\tlearned')
Пример #3
0
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import crossvalidate, accuracy, rmse, auc, check_data, data_helper
import copy
np.seterr(divide='ignore', invalid='ignore')
num_fit_initializations = 20
skill_name = "Box and Whisker"
seed, folds = 2020, 5 #can customize to anything, keep same seed and # folds over all trials
results = {} #create dictionary to store accuracy and rmse results

#data!
print("starting simple model data collection")
data, df = data_helper.convert_data("as.csv", skill_name, return_df=True)#save dataframe for further trials
check_data.check_data(data)
print("creating simple model")
results["Simple Model"] = crossvalidate.crossvalidate(data, folds=folds, seed=seed)

print("starting majority class calculation")
majority = 0
if np.sum(data["data"][0]) - len(data["data"][0]) > len(data["data"][0]) - (np.sum(data["data"][0]) - len(data["data"][0])):
    majority = 1
pred_values = np.zeros((len(data["data"][0]),))
pred_values.fill(majority)
true_values = data["data"][0].tolist()
pred_values = pred_values.tolist()
results["Majority Class"] = (accuracy.compute_acc(true_values,pred_values), rmse.compute_rmse(true_values,pred_values), auc.compute_auc(true_values, pred_values))

Пример #4
0
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import data_helper, check_data
np.seterr(divide='ignore', invalid='ignore')

skill_name = "Box and Whisker"

#data!
data = data_helper.convert_data("as.csv", skill_name, multiprior=True)
check_data.check_data(data)
num_learns = len(data["resource_names"])
num_gs = len(data["gs_names"])

num_fit_initializations = 5
best_likelihood = float("-inf")

for i in range(num_fit_initializations):
    fitmodel = random_model_uni.random_model_uni(
        num_learns,
        num_gs)  # include this line to randomly set initial param values
    #set prior to 0
    fitmodel["pi_0"] = np.array([[1], [0]])
    fitmodel["prior"] = 0
    (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
    if (log_likelihoods[-1] > best_likelihood):
        best_likelihood = log_likelihoods[-1]
        best_model = fitmodel
Пример #5
0
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import data_helper, check_data
from copy import deepcopy
np.seterr(divide='ignore', invalid='ignore')

num_fit_initializations = 20

#data!
data = data_helper.convert_data("as.csv", "Box and Whisker")
check_data.check_data(data)
num_learns = len(data["resource_names"])
num_gs = len(data["gs_names"])
num_fit_initializations = 5
best_likelihood = float("-inf")

for i in range(num_fit_initializations):
    fitmodel = random_model_uni.random_model_uni(
        num_learns,
        num_gs)  # include this line to randomly set initial param values
    (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
    if (log_likelihoods[-1] > best_likelihood):
        best_likelihood = log_likelihoods[-1]
        best_model = fitmodel

print('')
print('Trained model given %d learning rates, %d guess/slip rate' %
      (num_learns, num_gs))
df, skill_list, student_count, data_count, template_count = ktidem_skills_ct.find_skills()

ct_default={'order_id': 'Row',
            'skill_name': 'KC(SubSkills)',
            'correct': 'Correct First Attempt',
            'user_id': 'Anon Student Id',
            'multiguess': 'Problem Name',
                     }
                     

for i in range(12):
    skill_name = skill_list[i]
    results[skill_name]=[student_count[i], data_count[i], template_count[i]]
    
    data = data_helper.convert_data(df, skill_name, defaults=ct_default)
    check_data.check_data(data)
    results[skill_name].append((np.sum(data["data"][0]) - len(data["data"][0]))/len(data["data"][0]))
    print("creating simple model")
    results[skill_name].append(crossvalidate.crossvalidate(data, folds=folds, seed=seed)[2])

    data_multiguess = data_helper.convert_data(df, skill_name, defaults=ct_default, multiguess=True)
    check_data.check_data(data_multiguess)
    print("creating kt_idem model")
    results[skill_name].append(crossvalidate.crossvalidate(data_multiguess, folds=folds, seed=seed)[2])
    #print(results)

print("Model\tNum Students\tNum Data\tNum Problems\tCorrect Percent\tSimple AUC\tKT_IDEM AUC")
for k, v in results.items():
    print("%s\t%d\t%d\t%d\t%.5f\t%.5f\t%.5f" % (k, v[0], v[1], v[2], v[3], v[4], v[5]))
Пример #7
0
#may take a while to run since model has to account for all existing pairs (on the order of (# unique questions)^2)
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import data_helper, check_data
np.seterr(divide='ignore', invalid='ignore')
skill_name = "Finding the intersection, Mixed"

#data!
data = data_helper.convert_data("ct.csv", skill_name, multipair=True)
check_data.check_data(data)
num_learns = len(data["resource_names"])
num_gs = len(data["gs_names"])

num_fit_initializations = 5
best_likelihood = float("-inf")

for i in range(num_fit_initializations):
	fitmodel = random_model_uni.random_model_uni(num_learns, num_gs) # include this line to randomly set initial param values
	(fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
	if(log_likelihoods[-1] > best_likelihood):
		best_likelihood = log_likelihoods[-1]
		best_model = fitmodel

print('')
print('Trained model for %s skill given %d learning rates, %d guess/slip rate' % (skill_name, num_learns, num_gs))
print('\t\tlearned')
print('prior\t\t%.4f' % (best_model["pi_0"][1][0]))
for key, value in data["resource_names"].items():
import sys
sys.path.append('../')
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from utils import crossvalidate, data_helper, check_data
from copy import deepcopy
np.seterr(divide='ignore', invalid='ignore')

num_fit_initializations = 20
skill_name = "Range"

#data!
data = data_helper.convert_data("as.csv", skill_name)

check_data.check_data(data)

#specifying verbose allows data from all iterations of crossvalidation to be printed out
crossvalidate.crossvalidate(data, verbose=True)