Пример #1
0
from utils.data import Data
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt

TRASHOLD = 25

d = Data("../data/matmat/2015-11-20/answers.pd")
answers = d.get_dataframe_all()
items = d.get_items_df()
skills = d.get_skills_df()
items = items.join(skills, on="skill_lvl_1")

concepts = items["name"].unique()
sts = {}

for concept in concepts:
    print(concept)
    its = list(items[items["name"] == concept].index)
    students = answers[answers["item"].isin(its)].groupby("student").size()
    students = students[students >= TRASHOLD]
    print(len(students))
    sts[concept] = students

data = pd.DataFrame(index=concepts, columns=concepts, dtype=float)

for concept1 in concepts:
    for concept2 in concepts:
        count = len(set(sts[concept1]) & set(sts[concept2]))
        print(concept1, concept2, count)
        data[concept1][concept2] = count
Пример #2
0
            lambda l: Data(filename, response_modification=TimeLimitResponseModificator([(7, 0.5)])),
            lambda l: Data(filename, response_modification=ExpDrop(5, 0.9)),
            lambda l: Data(filename, response_modification=LinearDrop(14)),
        ],
        [basic_model, basic_model, basic_model, basic_model],
        ["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"],
        10, runs=5, data_ratio=ratio,
        # eval_data=data_test
    )

if 1:
    ratio = 1
    model1 = basic_model(None)
    model2 = basic_model(None)
    data1 = Data(filename, train_size=ratio)
    median = data1.get_dataframe_all()['response_time'].median()
    print('time median', median)
    data2 = Data(filename, response_modification=LinearDrop(median * 2), train_size=ratio)
    # data2 = Data(filename, response_modification=TimeLimitResponseModificator([(median, 0.5)]), train_size=ratio)
    # data2 = Data(filename, response_modification=ExpDrop(median / 2, 0.9), train_size=ratio)

    Runner(data1, model1).run(force=True, only_train=True)
    Runner(data2, model2).run(force=True, only_train=True)

    items_ids = data1.get_items()
    items_ids = list(items_in_concept(data(None), 'division'))

    v1 = model1.get_difficulties(items_ids)
    v2 = model2.get_difficulties(items_ids)
    for item, x, y in zip(items_ids, v1, v2):
        plt.plot(x, y, ".")