def fnopt(par): loss = log_loss(trainreal, logitinv(par[0] * trainpred + par[1])) return loss
def fnopt(par): loss = log_loss(trainreal, logitinv(par[0]*trainpred+par[1])) return loss
# optimize probability optpar = np.array([0.0, 0.0]) for grade, col in enumerate(scores): trainpred = np.array(preds.ix[preds.essay_type == "TRAINING", col]) trainreal = np.array( (realscores[preds.essay_type == "TRAINING"] == grade).map(int)) def fnopt(par): loss = log_loss(trainreal, logitinv(par[0] * trainpred + par[1])) return loss opt = fmin(fnopt, np.array([0.0, 0.0])) optpar += opt optpar /= len(scores) preds.ix[:, scores] = logitinv(optpar[0] * preds.ix[:, scores] + optpar[1]) preds_validation = preds.ix[preds.essay_type == "VALIDATION", :] assert np.all( np.argmax(preds_validation.ix[:, scores].as_matrix(), axis=1) == np.array(preds_validation.final_score)) items = [] for _, record in preds_validation.iterrows(): record = dict(record) record["item_id"] = item_id item = ITEM_XML % dict(record) items.append(item) preds_validation.ix[:, ["student_id", "test_id", "final_score"] + scores].to_csv("final_scores/" + item_id +
realscores = essays.meta_data()["score3"] scores = [col for col in preds.columns if col.find("prob") > 0] # optimize probability optpar = np.array([0.0,0.0]) for grade,col in enumerate(scores): trainpred = np.array(preds.ix[preds.essay_type=="TRAINING",col]) trainreal = np.array((realscores[preds.essay_type=="TRAINING"]==grade).map(int)) def fnopt(par): loss = log_loss(trainreal, logitinv(par[0]*trainpred+par[1])) return loss opt = fmin(fnopt, np.array([0.0,0.0])) optpar += opt optpar /= len(scores) preds.ix[:,scores] = logitinv(optpar[0]*preds.ix[:,scores]+optpar[1]) preds_validation = preds.ix[preds.essay_type=="VALIDATION",:] assert np.all(np.argmax(preds_validation.ix[:,scores].as_matrix(),axis=1)==np.array(preds_validation.final_score)) items = [] for _, record in preds_validation.iterrows(): record = dict(record) record["item_id"] = item_id item = ITEM_XML % dict(record) items.append(item) preds_validation.ix[:,["student_id","test_id","final_score"] + scores].to_csv("final_scores/" + item_id + "_AI-PJ_scores_probs.csv",index=False) out = open("final_scores/" + item_id + "_AI-PJ_scores.xml","w") out.write(ALL_XML % ("".join(items))) out.close()