Пример #1
0
import pprint

import featurize
import util

if __name__ == "__main__":
    # compsci = util.load_comments_from_file("data/compsci.csv")
    hiphop = util.load_comments_from_file("data/hiphopheads.csv")
    hiphop = sorted(hiphop, key=lambda x: int(x["score"]), reverse=True)
    
    print 'Number of comments: {}'.format(len(hiphop))
    print 'Highest score: {}'.format(hiphop[0]["score"])
    print 'Top five comments:'

Пример #2
0
def load_data(input_file_name):
    return f.transform(util.load_comments_from_file(input_file_name))
Пример #3
0
    args = parser.parse_args()
    print 'Loading and processing comments...'
    data = load_data(args.input_file)
    features = data[:, :-1]
    scores = data[:, -1]
    N = features.shape[0]
    split = int(math.floor(0.6 * N))
    print 'Loaded and processed {} comments'.format(N)

    train_features = features[:split, :]
    train_scores = scores[:split]
    test_features = features[split:, :]
    test_scores = scores[split:]

    print 'Training model...'
    clf = linear_model.LinearRegression()
    clf.fit(train_features, train_scores)

    print 'Mean Squared Error: '
    error = mean_squared_error(test_scores, clf.predict(test_features))
    print error

    # DEBUG
    raw = util.load_comments_from_file(args.input_file)
    for c in raw[-5:]:
        util.pretty_print_comment(c)
        prediction = clf.predict(f.transform_comment(c))
        print 'Real score: {}, Predicted: {}'.format(c.get("score"),
                                                     prediction)
Пример #4
0
def load_data(input_file_name):
    return f.transform(util.load_comments_from_file(input_file_name))
Пример #5
0
    args = parser.parse_args()
    print 'Loading and processing comments...'
    data = load_data(args.input_file)
    features = data[:, :-1]
    scores = data[:, -1]
    N = features.shape[0]
    split = int(math.floor(0.6*N))
    print 'Loaded and processed {} comments'.format(N)

    train_features = features[:split, :]
    train_scores = scores[:split]
    test_features = features[split:, :]
    test_scores = scores[split:]

    print 'Training model...'
    clf = linear_model.LinearRegression()
    clf.fit(train_features, train_scores)

    print 'Mean Squared Error: '
    error = mean_squared_error(test_scores, 
                               clf.predict(test_features))
    print error

    # DEBUG
    raw = util.load_comments_from_file(args.input_file)
    for c in raw[-5:]:
        util.pretty_print_comment(c)
        prediction = clf.predict(f.transform_comment(c))
        print 'Real score: {}, Predicted: {}'.format(c.get("score"),
                                                     prediction)
Пример #6
0
import pprint

import featurize
import util

if __name__ == "__main__":
    # compsci = util.load_comments_from_file("data/compsci.csv")
    hiphop = util.load_comments_from_file("data/hiphopheads.csv")
    hiphop = sorted(hiphop, key=lambda x: int(x["score"]), reverse=True)

    print 'Number of comments: {}'.format(len(hiphop))
    print 'Highest score: {}'.format(hiphop[0]["score"])
    print 'Top five comments:'