示例#1
0
def insert_into_db(metaData):
    """Inserts the supplied articles into the database.
    Duplicate articles are ignored."""
    print('Trying to insert %d elements into the database.' % len(metaData))
    conn = database.get_connection()
    cur = conn.cursor()
    try:
        insert_categories(metaData, cur)
        article_category_sql = 'insert into article_categories values(%s,%s)'

        for i, (article_id, article) in enumerate(metaData.items()):
            insert_article(cur, article_id, article)

            if cur.rowcount == 0:  # Ignore article already in database
                continue
            for category in article['categories']:
                cur.execute(article_category_sql, (article_id, category))
            for author in article['authors']:
                insert_author(cur, article_id, author['firstname'],
                              author['lastname'], author['affiliations'])

            conn.commit()
            print('\rInserted {} elements.'.format(i), end='')
        print('\nSuccessfully inserted the elements.')
    finally:
        cur.close()
        conn.close()
示例#2
0
def get_user_suggested_topics(user_id):
    """Gets suggested topics per system for one user.
    Returns {system_id:[topic_id, topic_id, topic_id, ... ], ...
        } with score decending"""
    connection = database.get_connection()
    with closing(connection.cursor(dictionary=True)) as cur:
        sql = '''select topic_recommendations.topic_id, 
                topic_recommendations.system_id from 
                topic_recommendations inner join topics on 
                topics.topic_id = topic_recommendations.topic_id 
                left join user_topics on user_topics.topic_id =
                topic_recommendations.topic_id and user_topics.user_id = 
                topic_recommendations.user_id where 
                user_topics.state is NULL and topic_recommendations.user_id
                = %s and topic_recommendations.interleaving_batch is NULL 
                order by topic_recommendations.system_score DESC'''
        cur.execute(sql, (user_id, ))

        rankings = defaultdict(list)
        for row in cur.fetchall():
            topic_id = row['topic_id']
            system_id = row['system_id']

            rankings[system_id].append(topic_id)
        return rankings
示例#3
0
def get_users_for_suggestion_generation(limit, offset, all_users=False):
    """Gets the users that have not provided Semantic Scholar profile links and have not previously accepted/declined
    any suggestions.

    :param limit: Number of users to retrieve.
    :param offset: An offset to the first user returned.
    :param all_users: Return all users, regardless of whether they have provided Semantic Scholar profile links
    and/or have previously accepted/declined any suggestions.
    :return: A dictionary {user_id: {firstname, lastname}}.
    """
    with closing(database.get_connection().cursor(dictionary=True)) as cur:
        sql = '''SELECT u.user_id, u.firstname, u.lastname
                 FROM users u LEFT JOIN semantic_scholar_suggestion_log log ON u.user_id=log.user_id
                 WHERE %s OR (semantic_scholar_profile = "" AND log.user_id IS NULL)
                 ORDER BY user_id
                 LIMIT %s OFFSET %s'''
        cur.execute(sql, (all_users, limit, offset))
        users = {u['user_id']: u for u in cur.fetchall()}
        for user_id, user_data in users.items():
            sql = '''SELECT t.topic FROM user_topics ut 
                     NATURAL JOIN topics t WHERE user_id = %s AND NOT t.filtered
                     and ut.state in ('USER_ADDED','SYSTEM_RECOMMENDED_ACCEPTED')'''
            cur.execute(sql, (user_id, ))
            user_data['topics'] = cur.fetchall()
        return users
示例#4
0
def get_number_of_users_for_suggestion_generation():
    """Gets the number of users that have not provided links to their Semantic Scholar profiles and have not
    previously accepted/declined any suggestions."""
    with closing(database.get_connection().cursor()) as cur:
        sql = '''SELECT count(*)
                 FROM users u LEFT JOIN semantic_scholar_suggestion_log log ON u.user_id=log.user_id
                 WHERE semantic_scholar_profile = "" AND log.user_id IS NULL'''
        cur.execute(sql)
        return cur.fetchone()[0]
示例#5
0
def insert_topic_suggestions(suggested_topics):
    """Inserts the score for the interleaved topic suggestions into
    the database along with the datetime they were interleaved"""
    connection = database.get_connection()
    with closing(connection.cursor(dictionary=True)) as cur:
        sql = '''update topic_recommendations set interleaving_order = %s,
              interleaving_batch = %s where user_id = %s and topic_id = %s
              and system_id = %s'''
        cur.executemany(sql, suggested_topics)
        connection.commit()
示例#6
0
def assign_unsubscribe_trace(user_id):
    """Gives a user an unsubscribe trace if they dont have one."""
    trace = str(uuid4())
    connection = database.get_connection()
    with closing(connection.cursor(dictionary=True)) as cur:
        sql = '''update users set unsubscribe_trace = %s where user_id = %s'''
        cur.execute(sql, (trace, user_id))

    connection.commit()
    return trace
示例#7
0
def get_article_data():
    """Returns a dictionary of article_ids: title and authors."""
    with closing(database.get_connection().cursor()) as cur:
        sql = '''SELECT article_id,title, 
                 GROUP_CONCAT(concat(firstname,' ',lastname)  SEPARATOR ', ')
                 FROM articles NATURAL LEFT JOIN article_authors
                 WHERE datestamp >=DATE_SUB(UTC_DATE(),INTERVAL 8 DAY)
                 GROUP BY article_id'''
        cur.execute(sql)
        return {x[0]: {'title': x[1], 'authors': x[2]} for x in cur.fetchall()}
示例#8
0
def get_users(limit, offset):
    """Fetches users in batches.

    :param limit: Number of users to retrieve.
    :param offset: An offset to the first user returned.
    :return: A dictionary of user_ids: {email, name, notification_interval}.
    """
    with closing(database.get_connection().cursor(dictionary=True)) as cur:
        sql = '''SELECT u.user_id, u.email, u.firstname as name, 
                 u.notification_interval, unsubscribe_trace FROM users u ORDER BY user_id 
                 LIMIT %s OFFSET %s'''
        cur.execute(sql, (limit, offset))
        return {u.pop('user_id'): u for u in cur.fetchall()}
def insert_article_recommendations(recommendations):
    """Inserts the recommended articles into the article feedback table."""
    connection = database.get_connection()
    with closing(connection.cursor(dictionary=True)) as cur:
        sql = '''INSERT INTO article_feedback (user_id, article_id, system_id, 
                 explanation, score, recommendation_date)      
                 VALUES(%s, %s, %s, %s, %s, %s)'''
        cur.executemany(sql, recommendations)

        users = list({x[0] for x in recommendations})
        sql = '''UPDATE users SET last_recommendation_date=UTC_DATE() 
                 WHERE user_id in ({})'''.format(','.join(['%s'] * len(users)))
        cur.execute(sql, users)
        connection.commit()
示例#10
0
def insert_mail_trackers(article_traces):
    """Inserts mail trackers into the article feedback table."""
    connection = database.get_connection()
    with closing(connection.cursor(dictionary=True)) as cur:
        sql = '''UPDATE article_feedback af, users u
                 SET af.seen_email = CURRENT_TIMESTAMP,
                 af.trace_click_email = %(click_trace)s,
                 af.trace_save_email= %(save_trace)s,
                 u.last_email_date=UTC_DATE()
                 WHERE af.user_id=%(user_id)s AND af.article_id=%(article_id)s
                 AND u.user_id = %(user_id)s'''
        cur.executemany(sql, article_traces)

    connection.commit()
def update_semantic_scholar_suggestions(suggestions, timestamp: datetime):
    """Deletes existing Semantic Scholar profile suggestions and inserts new ones for a set of users."""
    conn = database.get_connection()
    with closing(conn.cursor()) as cur:
        for user_id, user_suggestions in suggestions.items():
            cur.execute(
                'DELETE FROM semantic_scholar_suggestions WHERE user_id = %s',
                (user_id, ))
            for author_id, suggestion in user_suggestions.items():
                sql = '''INSERT INTO semantic_scholar_suggestions (semantic_scholar_id, name, score, user_id, created)
                         VALUES (%s, %s, %s, %s, %s)'''
                cur.execute(sql, (author_id, suggestion["name"],
                                  suggestion["score"], user_id, timestamp))
                sql = 'UPDATE users SET show_semantic_scholar_popup=true WHERE user_id=%s'
                cur.execute(sql, (user_id, ))
        conn.commit()
def get_article_recommendations(limit, offset):
    """Fetches article recommendation for articles released the past week.

    :param limit: Number of users to retrieve recommendations for.
    :param offset: An offset to the first user returned.
    :return: Article recommendations (rankings) in the following format
    sorted by score descending:
     {user_id:
        {system_id:[article_id, article_id, article_id, ... ], ...
        }, ...
     }
    And explanations in the format:
         {user_id: {system_id: {article_id: explanation, ...}, ...}, ...}
    """
    with closing(database.get_connection().cursor(dictionary=True)) as cur:
        sql = '''SELECT ar.user_id, ar.system_id, ar.article_id, ar.explanation 
        FROM article_recommendations ar Left JOIN article_feedback  af  
        on ar.article_id = af.article_id AND ar.user_id = af.user_id 
        JOIN users u on ar.user_id = u.user_id
        JOIN articles a on a.article_id = ar.article_id
        RIGHT JOIN (SELECT user_id FROM users ORDER BY user_id 
        LIMIT %s OFFSET %s) limit_u on limit_u.user_id = ar.user_id 
        WHERE af.score is null 
        AND a.datestamp > DATE_SUB(UTC_DATE(), INTERVAL 7 DAY) 
        AND u.last_recommendation_date < UTC_DATE() ORDER BY ar.score DESC;'''
        cur.execute(sql, (limit, offset))

        rankings = defaultdict(lambda: defaultdict(list))
        explanations = defaultdict(lambda: defaultdict(dict))
        for row in cur.fetchall():
            user_id = row['user_id']
            article_id = row['article_id']
            system_id = row['system_id']
            explanation = row['explanation']

            rankings[user_id][system_id].append(article_id)
            explanations[user_id][system_id][article_id] = explanation
        return rankings, explanations
示例#13
0
def get_multileaved_articles(limit, offset):
    """Fetches multileaved article recommendations.

    :param limit: Number of users to retrieve recommendations for.
    :param offset: An offset to the first user returned.
    :return: Nested dict in format user_id: date: article_id: recommendation
    """
    with closing(database.get_connection().cursor(dictionary=True)) as cur:
        sql = '''SELECT user_id, DATE(recommendation_date) as date, 
                 article_id, score, explanation 
                 FROM article_feedback NATURAL JOIN users
                 NATURAL RIGHT JOIN 
                 (SELECT user_id FROM users ORDER BY user_id 
                 LIMIT %s OFFSET %s) limit_u 
                 WHERE DATE(recommendation_date) > 
                 DATE_SUB(UTC_DATE(), INTERVAL 7 DAY) 
                 AND last_email_date < UTC_DATE()'''
        cur.execute(sql, (limit, offset))

        result = defaultdict(lambda: defaultdict(dict))
        for r in cur.fetchall():
            result[r.pop('user_id')][r.pop('date')][r.pop('article_id')] = r
        return result
示例#14
0
def get_number_of_users():
    """This method returns the number of users in the database."""
    with closing(database.get_connection().cursor()) as cur:
        cur.execute('SELECT count(*) FROM users')
        return cur.fetchone()[0]