def difficulty_vs_time(data, the_skill, concepts=False):
    data.filter_data(0, 100)
    pk, level = data.get_skill_id(the_skill)
    data.trim_times()
    data.add_log_response_times()
    m = EloPriorCurrentModel(KC=2, KI=0.5)
    items = data.get_items_df()
    items = items[items["visualization"] != "pairing"]
    items = items.join(get_difficulty(data, m))
    items = items.join(pd.Series(data.get_dataframe_all().groupby(["item"])["log_response_time"].mean(), name="log_response_time_mean"))
    items = items[items["skill_lvl_"+str(level)] == pk]

    if concepts:
        skills = data.get_skills_df()
        skills = skills.join(items.groupby("skill_lvl_3")["difficulty"].mean())
        skills = skills.join(items.groupby("skill_lvl_3")["log_response_time_mean"].mean())
        skills = skills[skills.index.isin(items["skill_lvl_3"].unique())]
        for id, skill in skills.iterrows():
            plt.plot(skill["difficulty"], skill["log_response_time_mean"], "ok")
            plt.text(skill["difficulty"], skill["log_response_time_mean"], skill["name"])
    else:
        colors = "rgbyk"
        visualizations = list(items["visualization"].unique())
        for id, item in items.iterrows():
            plt.plot(item["difficulty"], item["log_response_time_mean"], "o", color=colors[visualizations.index(item["visualization"])])
            plt.text(item["difficulty"], item["log_response_time_mean"], item["name"])
        for i, vis in enumerate(visualizations):
            plt.plot(-1, 2, "o", color=colors[i], label=vis)
    plt.xlabel("difficulty according to " + str(m))
    plt.ylabel("mean of log time")
    plt.legend(loc=0)
    plt.title(the_skill)
Пример #2
0
################################################################################
################################################################################
################################################################################

################################################################################
## MAIN ########################################################################
################################################################################

if __name__ == '__main__':

    ## RANDOM TESTING ##############################################################

    data, classes = load_data_from_csv('../data/classifier-data.csv', 4, float)
    data, classes = arr(data), arr(classes)

    bd1, bc1 = filter_data(data, classes, lambda x, y: y == 2)
    bd1, bc1 = np.array(bd1), np.array(bc1)

    def test(trd, trc, ted, tec):
        print('tc', '\n')
        tc = TreeClassify(trd, trc)
        print(tc, '\n')
        #	print(tc.predict(ted), '\n')
        #	print(tc.predict_soft(ted), '\n')
        #	print(tc.confusion(ted, tec), '\n')
        #	print(tc.auc(ted, tec), '\n')
        #	print(tc.roc(ted, tec), '\n')
        err = tc.err(ted, tec)
        print(err, '\n')
        return err
################################################################################
	
		
################################################################################
## MAIN ########################################################################
################################################################################


if __name__ == '__main__':

## RANDOM TESTING ##############################################################

	data,classes = load_data_from_csv('../data/classifier-data.csv', 4, float)
	data,classes = arr(data), arr(classes)

	bd1,bc1 = filter_data(data, classes, lambda x,y: y == 2)
	bd1,bc1 = np.array(bd1), np.array(bc1)

	def test(trd, trc, ted, tec):
		print('gbc', '\n')
		gbc = GaussBayesClassify(trd, trc)
		print(gbc, '\n')
#		print(gbc.predict(ted), '\n')
#		print(gbc.predict_soft(ted), '\n')
#		print(gbc.confusion(ted, tec), '\n')
#		print(gbc.auc(ted, tec), '\n')
#		print(gbc.roc(ted, tec), '\n')
		err = gbc.err(ted, tec)
		print(err, '\n')
		return err