def get_tweets_for_nonprofit(nonprofits_id): """Retrieve tweets for the given nonprofit and store them in the DB.""" logger.debug('Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) max_tweet = DBSession.query(func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(Tweet.twitter_name == nonprofit.twitter_name).first() if max_tweet is None or max_tweet.max_tweet_id is None: max_tweet_id = 1 else: max_tweet_id = max_tweet.max_tweet_id tweets = [] if nonprofit.twitter_id is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_id(nonprofit.twitter_id, True, since_id=max_tweet_id) elif nonprofit.twitter_name is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_name(nonprofit.twitter_name, True, since_id=max_tweet_id) else: pass for tweet in tweets: DBSession.add(Tweet(tweet['user']['screen_name'], tweet['id_str'], tweet['created_at'], tweet['text'].encode('utf-8'), tweet['lang'], tweet['retweet_count'], tweet['favorite_count'], ', '.join([mention['id_str'] for mention in tweet['entities']['user_mentions']]), ', '.join([mention['screen_name'] for mention in tweet['entities']['user_mentions']]), ', '.join([hashtag['text'] for hashtag in tweet['entities']['hashtags']]), ', '.join([url['expanded_url'] for url in tweet['entities']['urls']]), tweet['in_reply_to_screen_name'], tweet['in_reply_to_user_id_str'], tweet['in_reply_to_status_id_str'])) DBSession.commit()
def add_guidestar_info_to_db(ein): """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated. If the nonprofit is not in the DB, it is inserted.""" logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein)) query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein) nonprofit_db = query.first() nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein) if nonprofit_gs is None: return None if nonprofit_db is None: nonprofit_db = Nonprofit(nonprofit_gs.name, nonprofit_gs.ein, nonprofit_gs.ntee_code, nonprofit_gs.mission, nonprofit_gs.mission, None, None, nonprofit_gs.city, nonprofit_gs.state, nonprofit_gs.zip) DBSession.add(nonprofit_db) else: nonprofit_db.name = nonprofit_gs.name nonprofit_db.ntee_code = nonprofit_gs.ntee_code nonprofit_db.mission = nonprofit_gs.mission nonprofit_db.description = nonprofit_gs.mission nonprofit_db.city = nonprofit_gs.city nonprofit_db.state = nonprofit_gs.state nonprofit_db.ZIP = nonprofit_gs.zip DBSession.commit() return nonprofit_db
def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat(Tweet.text).label('text')).group_by(Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs([tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter(Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs([nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Description(nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter( Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs( [nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Description( nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def get_tweets_for_nonprofit(nonprofits_id): """Retrieve tweets for the given nonprofit and store them in the DB.""" logger.debug( 'Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'. format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) max_tweet = DBSession.query( func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter( Tweet.twitter_name == nonprofit.twitter_name).first() if max_tweet is None or max_tweet.max_tweet_id is None: max_tweet_id = 1 else: max_tweet_id = max_tweet.max_tweet_id tweets = [] if nonprofit.twitter_id is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_id( nonprofit.twitter_id, True, since_id=max_tweet_id) elif nonprofit.twitter_name is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_name( nonprofit.twitter_name, True, since_id=max_tweet_id) else: pass for tweet in tweets: DBSession.add( Tweet( tweet['user']['screen_name'], tweet['id_str'], tweet['created_at'], tweet['text'].encode('utf-8'), tweet['lang'], tweet['retweet_count'], tweet['favorite_count'], ', '.join([ mention['id_str'] for mention in tweet['entities']['user_mentions'] ]), ', '.join([ mention['screen_name'] for mention in tweet['entities']['user_mentions'] ]), ', '.join([ hashtag['text'] for hashtag in tweet['entities']['hashtags'] ]), ', '.join([ url['expanded_url'] for url in tweet['entities']['urls'] ]), tweet['in_reply_to_screen_name'], tweet['in_reply_to_user_id_str'], tweet['in_reply_to_status_id_str'])) DBSession.commit()
def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat( Tweet.text).label('text')).group_by( Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs( [tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()