results = defaultdict(lambda: defaultdict(lambda : {})) for response_modification in [ None, # TimeLimitResponseModificator([(time_median, 0.5)]), LinearDrop(time_median * 2), ]: data = basic_data(response_modification) model = basic_model(None) data.trim_times() answers = data.get_dataframe_all() Runner(data, model).run(force=True) for concept in ['numbers', 'addition', 'subtraction', 'multiplication', 'division']: items = list(set(data.get_items()) & set(items_in_concept(data, concept))) concept_answers = answers[answers['item'].isin(items)] students = concept_answers.groupby('student').apply(len) students = students[students >= min_answers] results[concept][str(response_modification)] = ( items, model.get_skills(students.index, concept), model.get_difficulties(items), concept_answers.groupby('student')['response_time'].median().loc[students.index], concept_answers.groupby('student')['correct'].mean().loc[students.index], concept_answers.groupby('item')['response_time'].median().loc[items], concept_answers.groupby('item')['correct'].mean().loc[items], ) for concept, r in results.items():
data = d.Data("../data/matmat/2016-11-28/answers.pd") data.trim_times() answers_all = data.get_dataframe_all() difficulties = get_difficulties(answers_all) time_intensity = get_difficulties(answers_all, model=TimePriorCurrentModel(alpha=0.4, beta=0.04, KC=0.3, KI=0.3, first_level=False), data=data, name="time_intensity") # answers_all = answers_all[~answers_all['answer'].isnull()] # filter missing answers output = defaultdict(lambda: '') for concept in ['numbers', 'addition', 'subtraction', 'multiplication', 'division']: answers = answers_all[answers_all['item'].isin(items_in_concept(data, concept))] time_median = answers['response_time'].median() for transformation, answers_transform in { "C + lin T": LinearDrop(time_median * 2).modify(answers.copy()), "C + binary T": TimeLimitResponseModificator([(time_median, 0.5)]).modify(answers.copy()), }.items(): metrics = {'C': answers['correct'], transformation: answers_transform['correct']} orders = get_order(answers, metrics) # sns.jointplot('C', transformation, data=orders, kind='kde') output[transformation] += '{:<15} -- spearman: {:.3f}, kendall: {:.3f}, sessions: {:>4}, time median: {:.1f}\n'.format( concept, orders.corr('spearman')['C'][transformation], orders.corr('kendall')['C'][transformation],
10, runs=5, data_ratio=ratio, # eval_data=data_test ) if 1: ratio = 1 model1 = basic_model(None) model2 = basic_model(None) data1 = Data(filename, train_size=ratio) median = data1.get_dataframe_all()['response_time'].median() print('time median', median) data2 = Data(filename, response_modification=LinearDrop(median * 2), train_size=ratio) # data2 = Data(filename, response_modification=TimeLimitResponseModificator([(median, 0.5)]), train_size=ratio) # data2 = Data(filename, response_modification=ExpDrop(median / 2, 0.9), train_size=ratio) Runner(data1, model1).run(force=True, only_train=True) Runner(data2, model2).run(force=True, only_train=True) items_ids = data1.get_items() items_ids = list(items_in_concept(data(None), 'division')) v1 = model1.get_difficulties(items_ids) v2 = model2.get_difficulties(items_ids) for item, x, y in zip(items_ids, v1, v2): plt.plot(x, y, ".") plt.text(x, y, items.loc[item]) plt.xlabel(str(data1)) plt.ylabel(str(data2)) plt.show()