示例#1
0
 def fnopt(par):
     loss = log_loss(trainreal, logitinv(par[0] * trainpred + par[1]))
     return loss
 def fnopt(par):
     loss = log_loss(trainreal, logitinv(par[0]*trainpred+par[1]))
     return loss
示例#3
0
    # optimize probability
    optpar = np.array([0.0, 0.0])
    for grade, col in enumerate(scores):
        trainpred = np.array(preds.ix[preds.essay_type == "TRAINING", col])
        trainreal = np.array(
            (realscores[preds.essay_type == "TRAINING"] == grade).map(int))

        def fnopt(par):
            loss = log_loss(trainreal, logitinv(par[0] * trainpred + par[1]))
            return loss

        opt = fmin(fnopt, np.array([0.0, 0.0]))
        optpar += opt

    optpar /= len(scores)
    preds.ix[:, scores] = logitinv(optpar[0] * preds.ix[:, scores] + optpar[1])
    preds_validation = preds.ix[preds.essay_type == "VALIDATION", :]

    assert np.all(
        np.argmax(preds_validation.ix[:, scores].as_matrix(), axis=1) ==
        np.array(preds_validation.final_score))

    items = []
    for _, record in preds_validation.iterrows():
        record = dict(record)
        record["item_id"] = item_id
        item = ITEM_XML % dict(record)
        items.append(item)

    preds_validation.ix[:, ["student_id", "test_id", "final_score"] +
                        scores].to_csv("final_scores/" + item_id +
    realscores = essays.meta_data()["score3"]
    scores = [col for col in preds.columns if col.find("prob") > 0]

    # optimize probability
    optpar = np.array([0.0,0.0])
    for grade,col in enumerate(scores): 
        trainpred = np.array(preds.ix[preds.essay_type=="TRAINING",col])
        trainreal = np.array((realscores[preds.essay_type=="TRAINING"]==grade).map(int))
        def fnopt(par):
            loss = log_loss(trainreal, logitinv(par[0]*trainpred+par[1]))
            return loss
        opt = fmin(fnopt, np.array([0.0,0.0]))
        optpar += opt
    
    optpar /= len(scores)
    preds.ix[:,scores] = logitinv(optpar[0]*preds.ix[:,scores]+optpar[1])
    preds_validation = preds.ix[preds.essay_type=="VALIDATION",:]
    
    assert np.all(np.argmax(preds_validation.ix[:,scores].as_matrix(),axis=1)==np.array(preds_validation.final_score))    
    
    items = []
    for _, record in preds_validation.iterrows():
        record = dict(record)
        record["item_id"] = item_id
        item = ITEM_XML % dict(record)
        items.append(item)
            
    preds_validation.ix[:,["student_id","test_id","final_score"] + scores].to_csv("final_scores/" + item_id + "_AI-PJ_scores_probs.csv",index=False)
    out = open("final_scores/" + item_id + "_AI-PJ_scores.xml","w")
    out.write(ALL_XML % ("".join(items)))
    out.close()