예제 #1
0
def classify(processed_csv, test_file=True, **params):
    positive_words = utils.file_to_wordset(params.pop('positive_words'))
    negative_words = utils.file_to_wordset(params.pop('negative_words'))
    predictions = []

    with open(processed_csv, 'r', encoding='ISO-8859-1') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            if test_file:
                tweet_id = row['ItemID']
                tweet = row['SentimentText']
            else:
                tweet_id = row['ItemID']
                sentiment = row['Sentiment']
                tweet = row['SentimentText']
            pos_count, neg_count = 0, 0
            for word in tweet.split():
                if word in positive_words:
                    pos_count += 1
                elif word in negative_words:
                    neg_count += 1
            # print pos_count, neg_count
            prediction = 1 if pos_count >= neg_count else 0
            if test_file:
                predictions.append((tweet_id, prediction))
            else:
                predictions.append((tweet_id, sentiment, prediction))
    return predictions
예제 #2
0
def classify(processed_csv, test_file=True, **params):
    positive_words = utils.file_to_wordset(params.pop('positive_words'))
    negative_words = utils.file_to_wordset(params.pop('negative_words'))
    predictions = []
    with open(processed_csv, 'r') as csv:
        for line in csv:
            if test_file:
                tweet_id, tweet = line.strip().split(',')
            else:
                tweet_id, label, tweet = line.strip().split(',')
            pos_count, neg_count = 0, 0
            for word in tweet.split():
                if word in positive_words:
                    pos_count += 1
                elif word in negative_words:
                    neg_count += 1
            prediction = 1 if pos_count >= neg_count else 0
            if test_file:
                predictions.append((tweet_id, prediction))
            else:
                predictions.append((tweet_id, int(label), prediction))
    return predictions
def classify(processed_csv, service_port, test_file=True, **params):
    positive_words = file_to_wordset(params.pop('positive_words'), service_port)
    negative_words = file_to_wordset(params.pop('negative_words'), service_port)
    predictions = []
    for row in parse_csv(processed_csv, service_port):
        tweet_id = row.field[1]
        tweet = row.field[2]
        if not test_file:
            label = row.field[0]
        pos_count, neg_count = 0, 0
        for word in tweet.split():
            if word in positive_words:
                pos_count += 1
            elif word in negative_words:
                neg_count += 1
        # print pos_count, neg_count
        prediction = 1 if pos_count >= neg_count else 0
        if test_file:
            predictions.append((tweet_id, prediction))
        else:
            predictions.append((tweet_id, int(label), prediction))
    return predictions