num_resources = 2
num_fit_initializations = 10
observation_sequence_lengths = np.full(500, 100, dtype=np.int)

#generate synthetic model and data.
#model is really easy.
truemodel = {}

truemodel["As"] = np.zeros((2, 2, num_resources), dtype=np.float_)
for i in range(num_resources):
    truemodel["As"][i, :, :] = np.transpose([[0.7, 0.3], [0.01, 0.99]])
truemodel["learns"] = truemodel["As"][:, 1, 0]
truemodel["forgets"] = truemodel["As"][:, 0, 1]

truemodel["pi_0"] = np.array([[0.9], [0.1]])
truemodel["prior"] = truemodel["pi_0"][1][0]

truemodel["guesses"] = np.full(num_subparts, 0.1, dtype=np.float_)
truemodel["slips"] = np.full(num_subparts, 0.03, dtype=np.float_)

#data!
print("generating data...")
data = synthetic_data.synthetic_data(truemodel, observation_sequence_lengths)

(correct_predictions, state_predictions) = predict_onestep.run(truemodel, data)

print(correct_predictions)
print(state_predictions)

print("finishing...")
Пример #2
0
    check_data.check_data(pps_data)

    # first, generate the basic model and run accuracy tests using MAE as evaluator
    num_fit_initializations = 20
    best_likelihood = float("-inf")
    for i in range(num_fit_initializations):
        fitmodel = random_model_uni.random_model_uni(1, 1)
        (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
        if (log_likelihoods[-1] > best_likelihood):
            best_likelihood = log_likelihoods[-1]
            best_model = fitmodel

    data["lengths"] = data["lengths_full"]

    (correct_predictions,
     state_predictions) = predict_onestep.run(best_model, data)

    kt_mae = 0
    for i in data["starts"]:
        true = data["data"][0][i + 2] - 1
        predicted = correct_predictions[i + 2]
        kt_values[str(data["data"][0][i - 1]) + str(data["data"][0][i]) +
                  str(data["data"][0][i + 1]) + str(round(predicted) + 1)] += 1
        kt_mae += abs(true - predicted)
        if (true == 1 and predicted > 0.5) or (true == 0 and predicted < 0.5):
            kt_correct[str(data["data"][0][i - 1]) + str(data["data"][0][i]) +
                       str(data["data"][0][i + 1]) +
                       str(data["data"][0][i + 2])] += 1
        elif (true == 0 and predicted > 0.5) or (true == 1
                                                 and predicted < 0.5):
            kt_incorrect[str(data["data"][0][i - 1]) +
Пример #3
0
        #print(" ")
        #print('\tlearned')
        #print('prior\t%.4f' % (best_model["pi_0"][1][0]))
        #for r in range(1):
        #    print('learn%d\t%.4f' % (r+1, best_model['As'][r, 1, 0].squeeze()))
        #for r in range(1):
        #    print('forget%d\t%.4f' % (r+1, best_model['As'][r, 0, 1].squeeze()))

        #for s in range(1):
        #    print('guess%d\t%.4f' % (s+1, best_model['guesses'][s]))
        #for s in range(1):
        #    print('slip%d\t%.4f' % (s+1, best_model['slips'][s]))

        if len(test_data[skill]["resources"]) > 0:
            (correct_predictions,
             state_predictions) = predict_onestep.run(best_model,
                                                      test_data[skill])
            if len(
                    np.unique(test_data[skill]["data"])
            ) > 1:  #auc for single skill only calculated when there are 2+ classifiers
                curr_auc = auc.compute_auc(test_data[skill]["data"][0],
                                           correct_predictions)
            else:
                curr_auc = 0

            all_true.extend(test_data[skill]["data"][0])
            all_pred.extend(correct_predictions)
            print("Skill %s of %s calculation completed with AUC of %.4f" %
                  (skill, skill_count, curr_auc))
        else:
            print("No test data for skill %s" % skill)
total_auc = auc.compute_auc(all_true, all_pred)
Пример #4
0
 def _predict(self, model, data):
     """ Helper function for predicting. """
     return predict_onestep.run(model, data)
Пример #5
0
def crossvalidate(data, folds=5, verbose=False, seed=0, return_arrays=False):

    if "resource_names" in data:
        num_learns = len(data["resource_names"])
    else:
        num_learns = 1

    if "gs_names" in data:
        num_gs = len(data["gs_names"])
    else:
        num_gs = 1

    total = 0
    acc = 0
    area_under_curve = 0
    num_fit_initializations = 20
    split_size = (len(data["starts"]) // folds)
    #create random permutation to act as indices for folds for crossvalidation
    shuffle = np.random.RandomState(seed=seed).permutation(len(data["starts"]))
    all_true, all_pred = [], []

    # crossvalidation on students which are identified by the starts array
    for iteration in range(folds):
        #create training/test data based on random permutation from earlier
        train = np.concatenate(
            (shuffle[0:iteration * split_size],
             shuffle[(iteration + 1) * split_size:len(data["starts"])]))
        test = shuffle[iteration * split_size:(iteration + 1) * split_size]
        training_data = fix_data(data, train)
        num_fit_initializations = 5
        best_likelihood = float("-inf")

        for i in range(num_fit_initializations):
            fitmodel = random_model_uni.random_model_uni(
                num_learns, num_gs
            )  # include this line to randomly set initial param values
            (fitmodel,
             log_likelihoods) = EM_fit.EM_fit(fitmodel, training_data)
            if (log_likelihoods[-1] > best_likelihood):
                best_likelihood = log_likelihoods[-1]
                best_model = fitmodel

        if verbose:
            print(" ")
            print('Iteration %d' % (iteration))
            print('\tlearned')
            print('prior\t%.4f' % (best_model["pi_0"][1][0]))
            for r in range(num_learns):
                print('learn%d\t%.4f' %
                      (r + 1, best_model['As'][r, 1, 0].squeeze()))
            for r in range(num_learns):
                print('forget%d\t%.4f' %
                      (r + 1, best_model['As'][r, 0, 1].squeeze()))

            for s in range(num_gs):
                print('guess%d\t%.4f' % (s + 1, best_model['guesses'][s]))
            for s in range(num_gs):
                print('slip%d\t%.4f' % (s + 1, best_model['slips'][s]))

        test_data = fix_data(data, test)

        # run model predictions from training data on test data
        (correct_predictions,
         state_predictions) = predict_onestep.run(best_model, test_data)

        flat_true_values = np.zeros((len(test_data["data"][0]), ),
                                    dtype=np.intc)
        for i in range(len(test_data["data"])):
            for j in range(len(test_data["data"][0])):
                if test_data["data"][i][j] != 0:
                    flat_true_values[j] = test_data["data"][i][j]
        flat_true_values = flat_true_values.tolist()

        # print(len(flat_true_values))
        # print(len(correct_predictions))
        # print(auc.compute_auc(flat_true_values, correct_predictions))
        all_true.extend(flat_true_values)
        all_pred.extend(correct_predictions)

    if return_arrays:
        return (all_true, all_pred)

# print(len(all_true))
    print(len(all_pred))
    total += rmse.compute_rmse(all_true, all_pred)
    acc += accuracy.compute_acc(all_true, all_pred)
    area_under_curve += auc.compute_auc(all_true, all_pred)
    if verbose:
        print("Average RMSE: ", total)
        print("Average Accuracy: ", acc)
        print("Average AUC: ", area_under_curve)
    return (acc, total, area_under_curve)