Пример #1
0
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    """find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values to try
    :returns: best smoothing value, scores of all smoothing values
    :rtype: float, dict

    """
    labels = set(
        [u'worldnews', u'science', u'askreddit', u'iama', u'todayilearned'])
    bestAcc = 0
    returnDict = {}
    for smoothing in smoothers:
        #estimate_nb(x_tr,y_tr,smoothing);
        theta_nb = estimate_nb(x_tr, y_tr, smoothing)
        #dev_predict = clf_base.predict(x_dv,theta_nb,labels);
        #train_predict = clf_base.predict(x_tr,theta_nb,labels);
        y_hat = clf_base.predict_all(x_dv, theta_nb, labels)
        accuracy = evaluation.acc(y_hat, y_dv)
        print "accuracy: ", accuracy
        if (accuracy > bestAcc):
            bestAcc = accuracy
        returnDict[smoothing] = accuracy
    return bestAcc, returnDict
Пример #2
0
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    """
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    """

    labels = list(set(y_tr))

    best_acc = 0
    best_smoother = None
    scores = {}

    for smoother in smoothers:
        theta_i = estimate_nb(x_tr, y_tr, smoother)
        y_hat = clf_base.predict_all(x_dv, theta_i, labels)
        acc = evaluation.acc(y_hat, y_dv)
        scores[smoother] = acc
        if acc > best_acc:
            best_acc = acc
            best_smoother = smoother

    return best_smoother, scores
Пример #3
0
def test_clf_base_d2_3():
    global x_dv, y_dv, y_te, labels

    y_hat = clf_base.predict_all(x_dv,hand_weights.theta_hand,labels)
    assert_greater_equal(evaluation.acc(y_hat,y_dv),.41)

    # just make sure the file is there
    y_hat_te = evaluation.read_predictions('hand-test.preds')
    eq_(len(y_hat_te),len(y_te))
def test_clf_base_d2_3():
    global x_dv, y_dv, y_te, labels

    y_hat = clf_base.predict_all(x_dv,hand_weights.theta_hand,labels)
    assert_greater_equal(evaluation.acc(y_hat,y_dv),.41)

    # just make sure the file is there
    y_hat_te = evaluation.read_predictions('hand-test.preds')
    eq_(len(y_hat_te),len(y_te))
Пример #5
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat,scores = clf_base.predict(x_tr_pruned[0],hand_weights.theta_hand,labels)
    eq_(scores['pre-1980'],0.1)
    assert_almost_equals(scores['2000s'],1.3,places=5)
    eq_(y_hat,'2000s')
    eq_(scores['1980s'],0.0)

    y_hat = clf_base.predict_all(x_dv_pruned,hand_weights.theta_hand,labels)
    assert_almost_equals(evaluation.acc(y_hat,y_dv),.3422222, places=5)
Пример #6
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand,
                                     labels)
    eq_(scores['pre-1980'], 0.1)
    assert_almost_equals(scores['2000s'], 1.3, places=5)
    eq_(y_hat, '2000s')
    eq_(scores['1980s'], 0.0)

    y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels)
    assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    """find the smoothing value that gives the best accuracy on the dev data
    """
    scores = {}
    labels = set(y_tr)
    for s in smoothers:
        theta_nb = estimate_nb(x_tr, y_tr, s)
        y_hat = clf_base.predict_all(x_dv, theta_nb, labels)
        scores[s] = evaluation.acc(y_hat, y_dv)
    l = scores.values()
    best = smoothers[np.argmax(l)]
    return best, scores
Пример #8
0
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''
    score = {}
    for smoother in smoothers:
        theta_nb = estimate_nb(x_tr, y_tr, smoother)
        y_hat = clf_base.predict_all(x_dv, theta_nb, set(y_tr))
        score[smoother] = (evaluation.acc(y_hat, y_dv))
    return clf_base.argmax(score), score
Пример #9
0
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    """find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values to try
    :returns: best smoothing value, scores of all smoothing values
    :rtype: float, dict

    """
    smoother_acc = {}
    labels = set(y_dv)
    for smoother in smoothers:
        theta = estimate_nb(x_tr, y_tr, smoother)
        y_hat = clf_base.predict_all(x_dv, theta, labels)
        smoother_acc[smoother] = evaluation.acc(y_hat, y_dv)

    argmax = lambda x: max(x.iteritems(), key=lambda y: y[1])[0]
    return argmax(smoother_acc), smoother_acc
Пример #10
0
def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''
    accuracy = {}
    genres = set(y_dv)
    for smoother in smoothers:
        accuracy[smoother] = evaluation.acc(
            clf_base.predict_all(x_dv, estimate_nb(x_tr, y_tr, smoother),
                                 genres), y_dv)

    best_smoother = clf_base.argmax(accuracy)
    return best_smoother, accuracy
Пример #11
0
def find_best_smoother(x_tr_pruned, y_tr, x_dv_pruned, y_dv, smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: 1) best smoothing value, 2) a dictionary of smoothing values and dev set accuracy.
    :rtype: 1) float, 2) dictionary

    '''
    smther_dict = {}
    labels = set(y_tr)
    for x in smoothers:
        theta_nb = estimate_nb(x_tr_pruned, y_tr, x)
        y_hat = clf_base.predict_all(x_dv_pruned, theta_nb, labels)
        smther_dict[x] = evaluation.acc(y_hat, y_dv)
    key_min = min(smther_dict.keys(), key=(lambda k: smther_dict[k]))

    return smther_dict[key_min], smther_dict
Пример #12
0
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers):
    """find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values to try
    :returns: best smoothing value, scores of all smoothing values
    :rtype: float, dict

    """
    labels = set(y_tr)
    smoother_scores = {}
    for smoother in smoothers:
        nb = estimate_nb(x_dv, y_dv, smoother)
        predictions = clf_base.predict_all(x_tr, nb, list(labels))
        score = 0
        for prediction, target in izip(predictions, y_tr):
            if prediction == target:
                score+= 1
        smoother_scores[smoother] = score
    return clf_base.argmax(smoother_scores), smoother_scores