from utils.data import Data import seaborn as sns import pandas as pd import matplotlib.pylab as plt TRASHOLD = 25 d = Data("../data/matmat/2015-11-20/answers.pd") answers = d.get_dataframe_all() items = d.get_items_df() skills = d.get_skills_df() items = items.join(skills, on="skill_lvl_1") concepts = items["name"].unique() sts = {} for concept in concepts: print(concept) its = list(items[items["name"] == concept].index) students = answers[answers["item"].isin(its)].groupby("student").size() students = students[students >= TRASHOLD] print(len(students)) sts[concept] = students data = pd.DataFrame(index=concepts, columns=concepts, dtype=float) for concept1 in concepts: for concept2 in concepts: count = len(set(sts[concept1]) & set(sts[concept2])) print(concept1, concept2, count) data[concept1][concept2] = count
lambda l: Data(filename, response_modification=TimeLimitResponseModificator([(7, 0.5)])), lambda l: Data(filename, response_modification=ExpDrop(5, 0.9)), lambda l: Data(filename, response_modification=LinearDrop(14)), ], [basic_model, basic_model, basic_model, basic_model], ["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"], 10, runs=5, data_ratio=ratio, # eval_data=data_test ) if 1: ratio = 1 model1 = basic_model(None) model2 = basic_model(None) data1 = Data(filename, train_size=ratio) median = data1.get_dataframe_all()['response_time'].median() print('time median', median) data2 = Data(filename, response_modification=LinearDrop(median * 2), train_size=ratio) # data2 = Data(filename, response_modification=TimeLimitResponseModificator([(median, 0.5)]), train_size=ratio) # data2 = Data(filename, response_modification=ExpDrop(median / 2, 0.9), train_size=ratio) Runner(data1, model1).run(force=True, only_train=True) Runner(data2, model2).run(force=True, only_train=True) items_ids = data1.get_items() items_ids = list(items_in_concept(data(None), 'division')) v1 = model1.get_difficulties(items_ids) v2 = model2.get_difficulties(items_ids) for item, x, y in zip(items_ids, v1, v2): plt.plot(x, y, ".")