def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): """find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values to try :returns: best smoothing value, scores of all smoothing values :rtype: float, dict """ labels = set( [u'worldnews', u'science', u'askreddit', u'iama', u'todayilearned']) bestAcc = 0 returnDict = {} for smoothing in smoothers: #estimate_nb(x_tr,y_tr,smoothing); theta_nb = estimate_nb(x_tr, y_tr, smoothing) #dev_predict = clf_base.predict(x_dv,theta_nb,labels); #train_predict = clf_base.predict(x_tr,theta_nb,labels); y_hat = clf_base.predict_all(x_dv, theta_nb, labels) accuracy = evaluation.acc(y_hat, y_dv) print "accuracy: ", accuracy if (accuracy > bestAcc): bestAcc = accuracy returnDict[smoothing] = accuracy return bestAcc, returnDict
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): """ find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float """ labels = list(set(y_tr)) best_acc = 0 best_smoother = None scores = {} for smoother in smoothers: theta_i = estimate_nb(x_tr, y_tr, smoother) y_hat = clf_base.predict_all(x_dv, theta_i, labels) acc = evaluation.acc(y_hat, y_dv) scores[smoother] = acc if acc > best_acc: best_acc = acc best_smoother = smoother return best_smoother, scores
def test_clf_base_d2_3(): global x_dv, y_dv, y_te, labels y_hat = clf_base.predict_all(x_dv,hand_weights.theta_hand,labels) assert_greater_equal(evaluation.acc(y_hat,y_dv),.41) # just make sure the file is there y_hat_te = evaluation.read_predictions('hand-test.preds') eq_(len(y_hat_te),len(y_te))
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat,scores = clf_base.predict(x_tr_pruned[0],hand_weights.theta_hand,labels) eq_(scores['pre-1980'],0.1) assert_almost_equals(scores['2000s'],1.3,places=5) eq_(y_hat,'2000s') eq_(scores['1980s'],0.0) y_hat = clf_base.predict_all(x_dv_pruned,hand_weights.theta_hand,labels) assert_almost_equals(evaluation.acc(y_hat,y_dv),.3422222, places=5)
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand, labels) eq_(scores['pre-1980'], 0.1) assert_almost_equals(scores['2000s'], 1.3, places=5) eq_(y_hat, '2000s') eq_(scores['1980s'], 0.0) y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels) assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): """find the smoothing value that gives the best accuracy on the dev data """ scores = {} labels = set(y_tr) for s in smoothers: theta_nb = estimate_nb(x_tr, y_tr, s) y_hat = clf_base.predict_all(x_dv, theta_nb, labels) scores[s] = evaluation.acc(y_hat, y_dv) l = scores.values() best = smoothers[np.argmax(l)] return best, scores
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' score = {} for smoother in smoothers: theta_nb = estimate_nb(x_tr, y_tr, smoother) y_hat = clf_base.predict_all(x_dv, theta_nb, set(y_tr)) score[smoother] = (evaluation.acc(y_hat, y_dv)) return clf_base.argmax(score), score
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): """find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values to try :returns: best smoothing value, scores of all smoothing values :rtype: float, dict """ smoother_acc = {} labels = set(y_dv) for smoother in smoothers: theta = estimate_nb(x_tr, y_tr, smoother) y_hat = clf_base.predict_all(x_dv, theta, labels) smoother_acc[smoother] = evaluation.acc(y_hat, y_dv) argmax = lambda x: max(x.iteritems(), key=lambda y: y[1])[0] return argmax(smoother_acc), smoother_acc
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' accuracy = {} genres = set(y_dv) for smoother in smoothers: accuracy[smoother] = evaluation.acc( clf_base.predict_all(x_dv, estimate_nb(x_tr, y_tr, smoother), genres), y_dv) best_smoother = clf_base.argmax(accuracy) return best_smoother, accuracy
def find_best_smoother(x_tr_pruned, y_tr, x_dv_pruned, y_dv, smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: 1) best smoothing value, 2) a dictionary of smoothing values and dev set accuracy. :rtype: 1) float, 2) dictionary ''' smther_dict = {} labels = set(y_tr) for x in smoothers: theta_nb = estimate_nb(x_tr_pruned, y_tr, x) y_hat = clf_base.predict_all(x_dv_pruned, theta_nb, labels) smther_dict[x] = evaluation.acc(y_hat, y_dv) key_min = min(smther_dict.keys(), key=(lambda k: smther_dict[k])) return smther_dict[key_min], smther_dict
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers): """find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values to try :returns: best smoothing value, scores of all smoothing values :rtype: float, dict """ labels = set(y_tr) smoother_scores = {} for smoother in smoothers: nb = estimate_nb(x_dv, y_dv, smoother) predictions = clf_base.predict_all(x_tr, nb, list(labels)) score = 0 for prediction, target in izip(predictions, y_tr): if prediction == target: score+= 1 smoother_scores[smoother] = score return clf_base.argmax(smoother_scores), smoother_scores