def insert_text_data(data_source, source_url, text, time_posted, session): """Adds the base entry for a text data source to the database and returns the newly created model Keyword arguments: data_source -- An enum indicating source. The enum is located in interns.models.models.AllowedSources source_url -- A string indicating the url the text was pulled from text -- the raw text data pulled from the url time_posted -- either a datetime object or a datetime string session -- active db session """ if not isinstance(time_posted, datetime): time_posted = date_parse(time_posted) eleanor_logger.debug('Inserting text data into postgres') TextModel = models.TextSource( source_key=data_source, source_url=source_url, written_text=text, time_posted=time_posted ) session.add(TextModel) return TextModel
def last_twitter_user_entry_id(screen_name): """ Returns the latest tweet id assocaited with screen_name otherwise returns None. Arguments: screen_name -- Twitter user_name/screen_name to check for. """ with GetDBSession() as db_session: if is_twitter_user_in_interns(screen_name): # Check to make sure it's not a retweet # change this to filter against if retweet query = db_session.query( twitter_models.TwitterSource.tweet_id ).filter_by( tweeter_user_name=screen_name ).order_by( desc(twitter_models.TwitterSource.tweet_id) ).first().tweet_id eleanor_logger.debug( 'Last tweet id from twitter user %s is %s', screen_name, query ) return query else: return None
def insert_text_data(data_source, source_url, text, time_posted, session): """Adds the base entry for a text data source to the database and returns the newly created model Keyword arguments: data_source -- An enum indicating source. The enum is located in interns.models.models.AllowedSources source_url -- A string indicating the url the text was pulled from text -- the raw text data pulled from the url time_posted -- either a datetime object or a datetime string session -- active db session """ if not isinstance(time_posted, datetime): time_posted = date_parse(time_posted) eleanor_logger.debug('Inserting text data into postgres') TextModel = models.TextSource(source_key=data_source, source_url=source_url, written_text=text, time_posted=time_posted) session.add(TextModel) return TextModel
def is_twitter_user_in_interns(screen_name): """ Checks to see if a twitter user exists within the database. Returns True if the screen_name is present in the database else returns False. For example checking to see if the user '@NASA' exists within the database the method would be called like so: is_twitter_user_in_interns('NASA') Arguments: screen_name -- Twitter user_name/screen_name to check for. """ screen_names = [] with GetDBSession() as db_session: distinct_screen_names = db_session.query( distinct(twitter_models.TwitterSource.tweeter_user_name) ) for sn in distinct_screen_names: screen_names.append(sn[0]) is_user_tracked = screen_name in screen_names eleanor_logger.debug( 'Twitter username %s is currently being tracked by interns is: %s', screen_name, is_user_tracked ) return screen_name in screen_names
def insert_non_retweet_data(tweet_data): """Takes the passed in JSON tweet_data and inserts into the database""" eleanor_logger.debug('Inserting tweet data') with GetDBSession() as db_session: tweetTextModel = insert_text_data( models.AllowedSources.twitter.name, tweet_data['url'], tweet_data['tweet_text'], date_parse(tweet_data['tweet_created']), db_session) tweetModel = twitter_models.TwitterSource( tweeter_user_name=tweet_data['user_name'], tweet_id=tweet_data['tweet_id'], is_retweet=False) tweetTextModel.twitter_source = tweetModel add_user_mentions(tweet_data, tweetModel) add_hashtags(tweet_data, tweetModel) add_urls(tweet_data, tweetModel) try: db_session.commit() except IntegrityError as e: if 'duplicate key value' in e.message: # We've already captured this so, moving on eleanor_logger.info( 'Duplicate tweet is already in the database, skipping') else: eleanor_logger.critical( ('A database error occurred while attempting ' 'to insert tweet %s'), e) except Exception as e: # Something real bad happened eleanor_logger.critical( ('An error has occurred while inserting a tweet into ' 'the database %s'), e)
def get_tracked_twitter_tl_users(): """ Pull the list of twitter users that is being polled by the interns """ eleanor_logger.debug('Getting listing of tracked twitter users') tracked_users = [] with GetDBSession() as db_session: tracked_users_query = db_session.query( twitter_models.PolledTimelineUsers) for user in tracked_users_query: tracked_users.append(user.user_name) return tracked_users
def begin_tracking_twitter_user(username): """ Add a twitter user to be tracked to the databse Arguments: username -- Twitter username/screen_name to be added. For example to add username '@NASA' to be polled: add_tracked_twitter_tl_user('NASA') """ new_user = twitter_models.PolledTimelineUsers(user_name=username) with GetDBSession() as db_session: db_session.add(new_user) db_session.commit() eleanor_logger.debug('Adding twitter user %s to be tracked', username)
def get_tracked_twitter_tl_users(): """ Pull the list of twitter users that is being polled by the interns """ eleanor_logger.debug('Getting listing of tracked twitter users') tracked_users = [] with GetDBSession() as db_session: tracked_users_query = db_session.query( twitter_models.PolledTimelineUsers ) for user in tracked_users_query: tracked_users.append(user.user_name) return tracked_users
def insert_non_retweet_data(tweet_data): """Takes the passed in JSON tweet_data and inserts into the database""" eleanor_logger.debug('Inserting tweet data') with GetDBSession() as db_session: tweetTextModel = insert_text_data( models.AllowedSources.twitter.name, tweet_data['url'], tweet_data['tweet_text'], date_parse(tweet_data['tweet_created']), db_session ) tweetModel = twitter_models.TwitterSource( tweeter_user_name=tweet_data['user_name'], tweet_id=tweet_data['tweet_id'], is_retweet=False ) tweetTextModel.twitter_source = tweetModel add_user_mentions(tweet_data, tweetModel) add_hashtags(tweet_data, tweetModel) add_urls(tweet_data, tweetModel) try: db_session.commit() except IntegrityError as e: if 'duplicate key value' in e.message: # We've already captured this so, moving on eleanor_logger.info( 'Duplicate tweet is already in the database, skipping' ) else: eleanor_logger.critical( ( 'A database error occurred while attempting ' 'to insert tweet %s' ), e ) except Exception as e: # Something real bad happened eleanor_logger.critical( ( 'An error has occurred while inserting a tweet into ' 'the database %s' ), e )
def last_twitter_user_entry_id(screen_name): """ Returns the latest tweet id assocaited with screen_name otherwise returns None. Arguments: screen_name -- Twitter user_name/screen_name to check for. """ with GetDBSession() as db_session: if is_twitter_user_in_interns(screen_name): # Check to make sure it's not a retweet # change this to filter against if retweet query = db_session.query( twitter_models.TwitterSource.tweet_id ).filter_by(tweeter_user_name=screen_name).order_by( desc(twitter_models.TwitterSource.tweet_id)).first().tweet_id eleanor_logger.debug('Last tweet id from twitter user %s is %s', screen_name, query) return query else: return None
def is_twitter_user_in_interns(screen_name): """ Checks to see if a twitter user exists within the database. Returns True if the screen_name is present in the database else returns False. For example checking to see if the user '@NASA' exists within the database the method would be called like so: is_twitter_user_in_interns('NASA') Arguments: screen_name -- Twitter user_name/screen_name to check for. """ screen_names = [] with GetDBSession() as db_session: distinct_screen_names = db_session.query( distinct(twitter_models.TwitterSource.tweeter_user_name)) for sn in distinct_screen_names: screen_names.append(sn[0]) is_user_tracked = screen_name in screen_names eleanor_logger.debug( 'Twitter username %s is currently being tracked by interns is: %s', screen_name, is_user_tracked) return screen_name in screen_names