示例#1
0
def get_difficulties(answers, data=None, model=None, force=False, name="difficulty"):
    if data and model:
        runner = Runner(data, model)
        file_name = "../cache/difficulties_{}.pd".format(runner._hash)
    else:
        data = d.Data("../data/matmat/2016-11-28/answers.pd")
        model = EloPriorCurrentModel(KC=2, KI=0.5)
        runner = Runner(data, model)
        file_name = "../cache/difficulties_matmat.pd"
    if os.path.exists(file_name) and not force:
        difficulties = pd.read_pickle(file_name)
    else:
        items = answers["item"].unique()
        runner.run(force=True)
        difficulties = pd.Series(data=model.get_difficulties(items), index=items, name=name)
        difficulties.to_pickle(file_name)

    return difficulties

items = data.get_items_df()
items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")]
items = items[items["visualization"] == "free_answer"]

answers[answers["item"].isin(items.index)].to_pickle("../../data/matmat/2016-01-04/answers-multiplication.pd")
data_multiplication = Data("../../data/matmat/2016-01-04/answers-multiplication.pd")
model = EloPriorCurrentModel(alpha=1.4, beta=0.1, KC=3, KI=0.5)

items = items.join(pd.Series(answers.groupby("item").size(), name="answer_count"))
items = items.join(pd.Series(answers.groupby("item").apply(lambda i: i["correct"].sum() / len(i)), name="success_rate"))
items = items.join(pd.Series(answers.groupby("item")["response_time"].median(), name="response_time"))

Evaluator(data_multiplication, model).get_report(force_run=True)
items["model_difficulty"] = model.get_difficulties(items.index)
items["model_difficulty"] -= items["model_difficulty"].mean()

skills = items.groupby("question").agg({
    "answer_count": "sum",
    "success_rate": "mean",
    "response_time": "mean",
    "model_difficulty": "mean",
})

dfSR = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float)
dfD = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float)
dfAC = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float)
dfRT = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float)
for q, skill in skills.iterrows():
    a,b = map(int, q.split("x"))