示例#1
0
def insert_text_data(data_source, source_url, text, time_posted, session):
    """Adds the base entry for a text data source to the database and returns
    the newly created model


    Keyword arguments:
    data_source -- An enum indicating source. The enum is located in
    interns.models.models.AllowedSources
    source_url -- A string indicating the url the text was pulled from
    text -- the raw text data pulled from the url
    time_posted -- either a datetime object or a datetime string
    session -- active db session
    """
    if not isinstance(time_posted, datetime):
        time_posted = date_parse(time_posted)

    eleanor_logger.debug('Inserting text data into postgres')

    TextModel = models.TextSource(
        source_key=data_source,
        source_url=source_url,
        written_text=text,
        time_posted=time_posted
    )

    session.add(TextModel)
    return TextModel
示例#2
0
def last_twitter_user_entry_id(screen_name):
    """
    Returns the latest tweet id assocaited with screen_name otherwise returns
    None.

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    with GetDBSession() as db_session:
        if is_twitter_user_in_interns(screen_name):
            # Check to make sure it's not a retweet
            # change this to filter against if retweet
            query = db_session.query(
                twitter_models.TwitterSource.tweet_id
            ).filter_by(
                tweeter_user_name=screen_name
            ).order_by(
                desc(twitter_models.TwitterSource.tweet_id)
            ).first().tweet_id

            eleanor_logger.debug(
                'Last tweet id from twitter user %s is %s',
                screen_name,
                query
            )
            return query
        else:
            return None
示例#3
0
def insert_text_data(data_source, source_url, text, time_posted, session):
    """Adds the base entry for a text data source to the database and returns
    the newly created model


    Keyword arguments:
    data_source -- An enum indicating source. The enum is located in
    interns.models.models.AllowedSources
    source_url -- A string indicating the url the text was pulled from
    text -- the raw text data pulled from the url
    time_posted -- either a datetime object or a datetime string
    session -- active db session
    """
    if not isinstance(time_posted, datetime):
        time_posted = date_parse(time_posted)

    eleanor_logger.debug('Inserting text data into postgres')

    TextModel = models.TextSource(source_key=data_source,
                                  source_url=source_url,
                                  written_text=text,
                                  time_posted=time_posted)

    session.add(TextModel)
    return TextModel
示例#4
0
def is_twitter_user_in_interns(screen_name):
    """
    Checks to see if a twitter user exists within the database. Returns True
    if the screen_name is present in the database else returns False.

    For example checking to see if the user '@NASA' exists within the database
    the method would be called like so: is_twitter_user_in_interns('NASA')

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    screen_names = []
    with GetDBSession() as db_session:
        distinct_screen_names = db_session.query(
            distinct(twitter_models.TwitterSource.tweeter_user_name)
        )
    for sn in distinct_screen_names:
        screen_names.append(sn[0])
    is_user_tracked = screen_name in screen_names
    eleanor_logger.debug(
        'Twitter username %s is currently being tracked by interns is: %s',
        screen_name,
        is_user_tracked
    )
    return screen_name in screen_names
示例#5
0
def insert_non_retweet_data(tweet_data):
    """Takes the passed in JSON tweet_data and inserts into the database"""
    eleanor_logger.debug('Inserting tweet data')
    with GetDBSession() as db_session:
        tweetTextModel = insert_text_data(
            models.AllowedSources.twitter.name,
            tweet_data['url'], tweet_data['tweet_text'],
            date_parse(tweet_data['tweet_created']), db_session)

        tweetModel = twitter_models.TwitterSource(
            tweeter_user_name=tweet_data['user_name'],
            tweet_id=tweet_data['tweet_id'],
            is_retweet=False)
        tweetTextModel.twitter_source = tweetModel

        add_user_mentions(tweet_data, tweetModel)
        add_hashtags(tweet_data, tweetModel)
        add_urls(tweet_data, tweetModel)

        try:
            db_session.commit()
        except IntegrityError as e:
            if 'duplicate key value' in e.message:
                # We've already captured this so, moving on
                eleanor_logger.info(
                    'Duplicate tweet is already in the database, skipping')
            else:
                eleanor_logger.critical(
                    ('A database error occurred while attempting '
                     'to insert tweet %s'), e)
        except Exception as e:
            # Something real bad happened
            eleanor_logger.critical(
                ('An error has occurred while inserting a tweet into '
                 'the database %s'), e)
示例#6
0
def get_tracked_twitter_tl_users():
    """
    Pull the list of twitter users that is being polled by the interns
    """
    eleanor_logger.debug('Getting listing of tracked twitter users')
    tracked_users = []
    with GetDBSession() as db_session:
        tracked_users_query = db_session.query(
            twitter_models.PolledTimelineUsers)
        for user in tracked_users_query:
            tracked_users.append(user.user_name)
    return tracked_users
示例#7
0
def begin_tracking_twitter_user(username):
    """
    Add a twitter user to be tracked to the databse

    Arguments:
    username -- Twitter username/screen_name to be added. For example to add
    username '@NASA' to be polled: add_tracked_twitter_tl_user('NASA')
    """
    new_user = twitter_models.PolledTimelineUsers(user_name=username)
    with GetDBSession() as db_session:
        db_session.add(new_user)
        db_session.commit()
    eleanor_logger.debug('Adding twitter user %s to be tracked', username)
示例#8
0
def get_tracked_twitter_tl_users():
    """
    Pull the list of twitter users that is being polled by the interns
    """
    eleanor_logger.debug('Getting listing of tracked twitter users')
    tracked_users = []
    with GetDBSession() as db_session:
        tracked_users_query = db_session.query(
            twitter_models.PolledTimelineUsers
        )
        for user in tracked_users_query:
            tracked_users.append(user.user_name)
    return tracked_users
示例#9
0
def begin_tracking_twitter_user(username):
    """
    Add a twitter user to be tracked to the databse

    Arguments:
    username -- Twitter username/screen_name to be added. For example to add
    username '@NASA' to be polled: add_tracked_twitter_tl_user('NASA')
    """
    new_user = twitter_models.PolledTimelineUsers(user_name=username)
    with GetDBSession() as db_session:
        db_session.add(new_user)
        db_session.commit()
    eleanor_logger.debug('Adding twitter user %s to be tracked', username)
示例#10
0
def insert_non_retweet_data(tweet_data):
    """Takes the passed in JSON tweet_data and inserts into the database"""
    eleanor_logger.debug('Inserting tweet data')
    with GetDBSession() as db_session:
        tweetTextModel = insert_text_data(
            models.AllowedSources.twitter.name,
            tweet_data['url'],
            tweet_data['tweet_text'],
            date_parse(tweet_data['tweet_created']),
            db_session
        )

        tweetModel = twitter_models.TwitterSource(
            tweeter_user_name=tweet_data['user_name'],
            tweet_id=tweet_data['tweet_id'],
            is_retweet=False
        )
        tweetTextModel.twitter_source = tweetModel

        add_user_mentions(tweet_data, tweetModel)
        add_hashtags(tweet_data, tweetModel)
        add_urls(tweet_data, tweetModel)

        try:
            db_session.commit()
        except IntegrityError as e:
            if 'duplicate key value' in e.message:
                # We've already captured this so, moving on
                eleanor_logger.info(
                    'Duplicate tweet is already in the database, skipping'
                )
            else:
                eleanor_logger.critical(
                    (
                        'A database error occurred while attempting '
                        'to insert tweet %s'
                    ),
                    e
                )
        except Exception as e:
            # Something real bad happened
            eleanor_logger.critical(
                (
                    'An error has occurred while inserting a tweet into '
                    'the database %s'
                ),
                e
            )
示例#11
0
def last_twitter_user_entry_id(screen_name):
    """
    Returns the latest tweet id assocaited with screen_name otherwise returns
    None.

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    with GetDBSession() as db_session:
        if is_twitter_user_in_interns(screen_name):
            # Check to make sure it's not a retweet
            # change this to filter against if retweet
            query = db_session.query(
                twitter_models.TwitterSource.tweet_id
            ).filter_by(tweeter_user_name=screen_name).order_by(
                desc(twitter_models.TwitterSource.tweet_id)).first().tweet_id

            eleanor_logger.debug('Last tweet id from twitter user %s is %s',
                                 screen_name, query)
            return query
        else:
            return None
示例#12
0
def is_twitter_user_in_interns(screen_name):
    """
    Checks to see if a twitter user exists within the database. Returns True
    if the screen_name is present in the database else returns False.

    For example checking to see if the user '@NASA' exists within the database
    the method would be called like so: is_twitter_user_in_interns('NASA')

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    screen_names = []
    with GetDBSession() as db_session:
        distinct_screen_names = db_session.query(
            distinct(twitter_models.TwitterSource.tweeter_user_name))
    for sn in distinct_screen_names:
        screen_names.append(sn[0])
    is_user_tracked = screen_name in screen_names
    eleanor_logger.debug(
        'Twitter username %s is currently being tracked by interns is: %s',
        screen_name, is_user_tracked)
    return screen_name in screen_names