Exemplo n.º 1
0
    def __init__(self, clf):
        self.clf = clf
        results = list(extract.extract_labeled_tweets())
        _ids, _labels, _tweets = zip(*results)

        tweets = np.asarray(_tweets)
        labels = np.asarray(_labels)

        # remap non-negative ('X') to 1, negative 0
        labels = convert_labels_to_binary(labels, ['X'])

        self.clf.fit(tweets, labels)
Exemplo n.º 2
0
def evaluate_revised(scorer):
    """Evaluate a scorer using revised labels."""

    y_true = []
    y_pred = []

    def get_score(text):
        score = scorer.get_document_score(text, normalize=False)
        if score < 0:
             return '-'
        else:
            return 'X'

    res = [(label, get_score(text)) for _id, label, text in
            extract.extract_labeled_tweets()]
    y_true, y_pred = zip(*res)

    p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label='-')
    correct = sum(1 for label, pred in zip(y_true, y_pred) if label==pred)
    accuracy = float(correct) / len(y_true)
    print 'precision=%.3f recall=%.3f fscore=%.3f accuracy=%.3f' % (
        p[0], r[0], f[0], accuracy)
Exemplo n.º 3
0
        analyzer = super(TfidfVectorizer, self).build_analyzer()
        return lambda doc: [stemmer.stem(x) for x in analyzer(doc)]

def convert_labels_to_binary(Y, one_label_list):
    """Convert string labels to zero or ones."""
    pos = Y == one_label_list[0]
    for label in one_label_list[1:]:
        pos |= Y == sent_label

    X = np.zeros(Y.shape[0], dtype=np.int)
    X[pos] = 1

    return X

if __name__ == "__main__":
    results = list(extract.extract_labeled_tweets())
    _ids, _labels, _tweets = zip(*results)

    tweets = np.asarray(_tweets)
    labels = np.asarray(_labels)

    # remap 'X' to 1, everything else to 0
    labels = convert_labels_to_binary(labels, ['X'])

    X_train, X_test, y_train, y_test = train_test_split(
        tweets, labels, test_size=0.2, random_state=0)
    clf = create_logistic_regression_classifier()
    clf.fit(X_train, y_train)

    y_preds = clf.predict(X_test)
    target_names = ['Negative', 'Non-negative']