Exemplo n.º 1
0
def calculate_accuracy(emoji='',
                       metrics=None,
                       spam=None,
                       sentiment_api_columns=None,
                       db_name="sentiment_db"):
    """
    Open two database connections:
        - one to fetch comment sentiment
        - one to store accuracy in sentiment api stats table
    """
    db = CommentSentimentDbConnection(db=db_name)
    db.connect()

    db_emoji = CommentEmojiSentimentDbConnection(db=db_name)
    db_emoji.connect()

    db_update = SentimentApiStatsDbConnection(db=db_name)
    db_update.connect()

    for sentiment_api_column in sentiment_api_columns:
        for consider_spam in spam:
            for consider_emoji in emoji:
                update_single_performance(db, db_emoji, db_update,
                                          consider_spam, consider_emoji,
                                          metrics, sentiment_api_column)

    print_horizontal_rule()
    db_update.close()
    db_emoji.close()
    db.close()
def mark_if_comment_has_emojis(id_selection="", db_name="sentiment_db"):
    """
    Open two database connections:
        - one to fetch comment sentiment
        - another to update comment_sentiment_emoji records
    """
    emoji_stats = EmojiStats()

    db_sentiment = CommentSentimentDbConnection(db=db_name)
    db_sentiment.connect()

    id_selection = id_selection.replace('id', 'idcommento')

    results = db_sentiment.fetch_all(select='idcommento, english_translation',
                                     where=id_selection)

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        english_translation = row[1]

        print("Comment_id: %s" % comment_id)
        print("Translation: %s" % english_translation)

        if emoji_stats.contains_emoji(english_translation):
            emoji_stats.update_db(value=1,
                                  column='has_emoji',
                                  comment_id=comment_id)

        else:
            print("Didn't detect any emojis... Not updating db")

    print_horizontal_rule()
    db_sentiment.close()
    emoji_stats.close_db_connection()
def run_sentiment_api_batch(api=None,
                            id_selection="",
                            db_name="sentiment_db",
                            original_language=False):
    """
    Open two database connections:
        - one to fetch comment records
        - another to update comment_sentiment records
    """
    db = Database(db=db_name)
    db.connect()

    db_sentiment = CommentSentimentDbConnection(db=db_name)
    db_sentiment.connect()

    print('\nUsing %s ' % api)

    if id_selection != '':
        id_selection = id_selection.replace('id', 'c.id') + ' AND '

    results = db.fetch_all(select='c.id, c.content, s.english_translation',
                           from_clause='im_commento AS c JOIN   \
        im_commento_sentiment AS s\
        ON c.id = s.idcommento',
                           where=id_selection + "s.english_translation != ''",
                           order_by='c.id ASC')

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        content = row[1]
        english_translation = row[2]

        print("Comment_id: %s" % comment_id)
        print("Content: %s" % content)
        print("Translation: %s" % english_translation)

        api.set_data(content if original_language else english_translation)
        api.post()

        if api.is_request_successful():
            api.update_sentiment_stats()
            print("Predicted sentiment: %s" %
                  json.dumps(api.get_sentiment_stats(), indent=2))

            db_sentiment.update(comment_id=comment_id,
                                column=api.sentiment_api_column,
                                value=json.dumps(api.get_sentiment_stats()))
        else:
            print("API request was NOT successful: returned %d status code" %
                  api.get_status_code())
            break

    print_horizontal_rule()
    db.close()
    db_sentiment.close()
Exemplo n.º 4
0
def update_single_performance(db, db_emoji, db_update, consider_spam,
                              consider_emoji, metrics, sentiment_api_column):
    print_horizontal_rule()
    print_horizontal_rule()

    print("Calculations for api: %s %s spam" %
          (sentiment_api_column + ('_emoji' if consider_emoji else ''),
           'with' if consider_spam else 'without'))

    where_clause = '(real_sentiment is not null OR real_sentiment != "") AND %s !="{}" ' % sentiment_api_column
    where_clause = where_clause if consider_spam else where_clause + ' AND spam REGEXP "false"'

    metric = Metric(metrics)
    results = db.fetch_all(select='idcommento, real_sentiment, %s' %
                           sentiment_api_column,
                           where=where_clause)

    for row in results:
        comment_id = row[0]
        try:
            real_sentiment = json.loads(row[1])
            api_sentiment = json.loads(row[2])
        except ValueError:
            continue
        if not real_sentiment or not api_sentiment:
            continue

        if consider_emoji:
            try:
                api_sentiment = json.loads(
                    db_emoji.fetch_sentiment_by_comment_id(
                        comment_id=comment_id,
                        sentiment=sentiment_api_column)[0][0])
            except IndexError:
                continue

        metric.update_stats(real_sentiment=real_sentiment,
                            predicted_sentiment=api_sentiment)

        # print_each_step(metric, comment_id, sentiment_api_column + '_emoji' if emoji else '', api_sentiment, real_sentiment)
        # print_horizontal_rule()

    metric.print_real_sentiment_distribution()
    metric.calculate_stats()
    metric.print_stats()

    for metric_name in metrics:
        print_sparse_horizontal_rule()
        db_update.update(
            column=metric.get_db_column(metric_name, consider_spam),
            value=metric.get_db_safe_stats(metric_name),
            api_id=sentiment_api_column + ('_emoji' if consider_emoji else ''))
def detect_spam(api=None, id_selection="", use_en="", db_name="sentiment_db"):
    """
    Open three database connections:
        - one to fetch comment records
        - one to fetch the english translation
        - another to update comment_spam records
    """
    db_spam = CommentSpamDbConnection(db=db_name)
    db_spam.connect()

    db_comment = CommentDbConnection(db=db_name)
    db_comment.connect()

    db_sentiment = CommentSentimentDbConnection(db=db_name)
    db_sentiment.connect()

    results = db_comment.fetch_all(where=id_selection)

    spam_json = {'is_spam': False, 'type': 'other'}

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        comment = row[1]
        post_id = row[2]
        comment_author = row[3]
        english_translation = db_sentiment.fetch_by_comment_id(
            comment_id)[0][2]
        content = english_translation if use_en else comment

        content_description = 'English translation' if use_en else 'Original content'
        print("Comment_id: %s" % comment_id)
        print("%s: %s" % (content_description, content))
        if not use_en:
            print("English translation: %s" % english_translation)

        spam = deepcopy(spam_json)
        spam['is_spam'] = api.is_spam(content=content,
                                      post_id=post_id,
                                      comment_author=comment_author)

        print("%s  detected as: %s spam" %
              (content_description, '' if spam['is_spam'] else 'NOT'))

        db_spam.update(comment_id=comment_id,
                       column=api.get_db_column(use_en),
                       value=json.dumps(spam))

    print_horizontal_rule()
    db_comment.close()
    db_spam.close()
Exemplo n.º 6
0
def mark_comments_as_spam(spam_json, rows, db):
    for row in rows:
        print_horizontal_rule()
        comment_id = row[1]
        english_translation = row[2]

        print("Comment id: %s" % comment_id)
        print("Translation: %s" % english_translation)
        print(json.dumps(spam_json))

        db.update(column='spam',
                  value=json.dumps(spam_json),
                  comment_id=comment_id)

        print_horizontal_rule()
def set_default_post_sentiment(subquery, where_clause, db,
                               sentiment_api_columns):
    print_horizontal_rule()
    print('Setting columns %s of commentless posts to default to JSON:' %
          '\n '.join(sentiment_api_columns))
    print(json.dumps(SENTIMENT_DEFAULT_STATS, indent=2))

    set_clause = {
        api_column: json.dumps(SENTIMENT_DEFAULT_STATS)
        for api_column in sentiment_api_columns
    }
    db.update(table='im_post_sentiment',
              set=set_clause,
              where=where_clause.replace('id', 'idpost') + 'idpost not in ' +
              subquery)
    print_horizontal_rule()
def detect_lang_and_translate_content(id_selection="", db_name="sentiment_db"):
    """
    Open two database connections:
        - one to fetch comment records
        - another to update comment language records
    """
    db_comment = CommentDbConnection(db=db_name)
    db_comment.connect()

    db_language = Database(db=db_name)
    db_language.connect()

    api = GoogleTranslateAPI()
    print('\nUsing GoogleTranslateAPI')

    results = db_comment.fetch_all(where=id_selection)

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        content = row[1]

        print("Comment_id: %s" % comment_id)
        print("Content: %s" % content)

        api.set_text(content)
        api.get()

        if api.is_request_successful():
            print("Detected language: %s" % api.get_detected_language())
            print("English translation: %s" % api.get_translation())
            db_language.update(table="im_commento_sentiment",
                               where="idcommento = %d" % comment_id,
                               set={
                                   api.language_column:
                                   api.get_detected_language(),
                                   api.english_translation_column:
                                   api.get_translation().replace("'", "\\'")
                               })
        else:
            print("API request was NOT successful: returned %d status code" %
                  api.get_status_code())
            break

    print_horizontal_rule()
    db_comment.close()
    db_language.close()
Exemplo n.º 9
0
def update_sentiment_emoji_stats(api=None, id_selection="", db_name="sentiment_db"):
    """
    Open three database connections:
        - one to fetch comment sentiment
        - another to fetch emoji stats
        - another to update comment_sentiment_emoji records
    """

    emoji_stats = EmojiStats()
    db_sentiment = CommentSentimentDbConnection(db=db_name)
    db_sentiment.connect()

    print ('\nUsing %s ' % api)
    id_selection = id_selection.replace('id', 'idcommento')

    results = db_sentiment.fetch_all(
        select='idcommento, %s , english_translation' % api.sentiment_api_column,
        where=id_selection)

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        english_translation = row[2]
        print ("Comment_id: %s" % comment_id)
        print ("Translation: %s" % english_translation)

        try:
            api.sentiment_stats = json.loads(row[1])
            print ("Current sentiment: %s" % json.dumps(api.get_sentiment_stats(), indent=2))
        except ValueError:
            print ("ValueError: Not a Json Object: %s" % row[1])
            continue

        if emoji_stats.contains_emoji(english_translation):
            comment_emoji_stats = emoji_stats.get_comment_emoji_stats(english_translation)
            api.update_emoji_sentiment_stats(comment_emoji_stats)
            print ("Updated sentiment: %s" % json.dumps(api.get_sentiment_stats(), indent=2))


        emoji_stats.update_db(
            comment_id=comment_id,
            column=api.sentiment_api_column,
            value=json.dumps(api.get_sentiment_stats()))

    print_horizontal_rule()
    db_sentiment.close()
    emoji_stats.close_db_connection()
def convert_sentiment_api_labels(sentiment_api_column='', id_selection='', db_name="sentiment_db"):
    """
    Open one database connections:
        - to fetch comment sentiment
        - and to convert it to a json form
    """

    db = CommentSentimentDbConnection(db=db_name)
    db.connect()

    results = db.fetch_all(
        select='idcommento, %s' % sentiment_api_column,
        where=id_selection.replace('id', 'idcommento'))

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        sentiment = json.loads(row[1])

        print ("Comment id: %d" % comment_id)
        print ("%s: %s" % (sentiment_api_column, json.dumps(sentiment, indent=2)))

        if not sentiment:
            continue
        if 'sentiment_stats' not in sentiment:
            sentiment_stats = deepcopy(sentiment)
            sentiment = deepcopy(SENTIMENT_DEFAULT_STATS)
            sentiment['sentiment_stats'] = sentiment_stats
            sentiment['sentiment_label'] = get_most_frequent_label(deepcopy(sentiment['sentiment_stats']))
            raw_input()

        sentiment['sentiment_label'] = get_most_frequent_label(deepcopy(sentiment['sentiment_stats']))
        if is_neutral(sentiment['sentiment_stats']):
            sentiment['sentiment_label'] = 'neutral'
        print ("Converting to...")
        print ("%s: %s" % (sentiment_api_column, json.dumps(sentiment, indent=2)))

        db.update(
            column=sentiment_api_column,
            value=json.dumps(sentiment),
            comment_id=comment_id)

        print_horizontal_rule()

    db.close()
def determine_and_update_post_sentiment(rows, db, sentiment_api_columns):
    for row in rows:
        print_horizontal_rule()
        post_id = row[0]

        for api_column in sentiment_api_columns:
            sentiment_stats = count_comment_sentiment_labels(
                db=db, post_id=post_id, api_column=api_column)

            print("Post id: %s" % post_id)
            print("Api column: %s" % api_column)
            print(json.dumps(sentiment_stats, indent=2))

            db.update(table='im_post_sentiment',
                      set={api_column: json.dumps(sentiment_stats)},
                      where='idpost=%d' % post_id)

            print_horizontal_rule()
def convert_comment_sentiment_to_json(sentiment_api_column='',
                                      id_selection='',
                                      db_name="sentiment_db"):
    """
    Open one database connections:
        - to fetch comment sentiment
        - and to convert it to a json form
    """

    db = CommentSentimentDbConnection(db=db_name)
    db.connect()

    results = db.fetch_all(select='idcommento, %s' % sentiment_api_column,
                           where=id_selection.replace('id', 'idcommento'))

    for row in results:
        print_horizontal_rule()
        comment_id = row[0]
        api_sentiment = row[1]

        print("Comment id: %d" % comment_id)
        print("%s: %s" % (sentiment_api_column, api_sentiment))

        if api_sentiment is None or api_sentiment == '':
            print('No record (nothing to convert)')
            print_horizontal_rule()
            continue
        elif is_json(api_sentiment):
            print('Already in json format')
            print_horizontal_rule()
            continue

        sentiment_stats = deepcopy(SENTIMENT_DEFAULT_STATS)
        sentiment_stats[api_sentiment] = 1.0
        print("Converting to...")
        print("%s: %s" % (sentiment_api_column, json.dumps(sentiment_stats)))

        db.update(column=sentiment_api_column,
                  value=json.dumps(sentiment_stats),
                  comment_id=comment_id)

        print_horizontal_rule()

    db.close()
Exemplo n.º 13
0
def update_real_sentiment_batch(id_selection="", db_name="sentiment_db"):
    """
    Open two database connections:
        - one to fetch comment records
        - another to update comment_sentiment records
    """
    db_comment = CommentDbConnection(db=db_name)
    db_comment.connect()

    db_sentiment = CommentSentimentDbConnection(db=db_name)
    db_sentiment.connect()

    results = db_comment.fetch_all(where=id_selection)
    for row in results:
        print_horizontal_rule()
        print_horizontal_rule()
        comment_id = row[0]
        content = row[1]
        comment_sentiment_row = db_sentiment.fetch_by_comment_id(comment_id)[0]
        english_translation = comment_sentiment_row[2]
        real_sentiment = comment_sentiment_row[3]

        print("Comment_id: %s" % comment_id)
        print("Content: %s" % content.encode('utf-8'))
        print("English translation: %s" % english_translation.encode('utf-8'))

        print_sparse_horizontal_rule()
        determine_and_store_is_mention(db=db_sentiment, comment_id=comment_id)

        print_sparse_horizontal_rule()
        spam = determine_and_store_spam(db=db_sentiment, comment_id=comment_id)

        print_sparse_horizontal_rule()
        if spam['is_spam']:
            store_default_real_sentiment(db=db_sentiment,
                                         comment_id=comment_id)
        else:
            determine_and_store_real_sentiment(db=db_sentiment,
                                               comment_id=comment_id,
                                               real_sentiment=real_sentiment)

    print_horizontal_rule()
    db_sentiment.close()
    db_comment.close()