def add_guidestar_info_to_db(ein): """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated. If the nonprofit is not in the DB, it is inserted.""" logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein)) query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein) nonprofit_db = query.first() nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein) if nonprofit_gs is None: return None if nonprofit_db is None: nonprofit_db = Nonprofit(nonprofit_gs.name, nonprofit_gs.ein, nonprofit_gs.ntee_code, nonprofit_gs.mission, nonprofit_gs.mission, None, None, nonprofit_gs.city, nonprofit_gs.state, nonprofit_gs.zip) DBSession.add(nonprofit_db) else: nonprofit_db.name = nonprofit_gs.name nonprofit_db.ntee_code = nonprofit_gs.ntee_code nonprofit_db.mission = nonprofit_gs.mission nonprofit_db.description = nonprofit_gs.mission nonprofit_db.city = nonprofit_gs.city nonprofit_db.state = nonprofit_gs.state nonprofit_db.ZIP = nonprofit_gs.zip DBSession.commit() return nonprofit_db
def add_nonprofit_company_news_article_connections(article_ids, companies): """Takes a list of IDs of news articles and a list of Company objects as input. If any of the articles contain a company name, a link is made in the DB between the article and the company.""" logger.debug( 'Inside add_nonprofit_company_news_article_connections(news_articles, companies)' ) for article_id in article_ids: article = DBSession.query(News_Article).get(article_id) if article is None: print '***************************' print '***************************' print article_id print '***************************' print '***************************' time.sleep(180) counter = 1 for company in companies: if counter % 100 == 0: print 'Processing article {0} for company {1} of {2}...'.format( article_id, counter, len(companies)) counter += 1 for mention in news_parser.get_company_mentions_in_text( article.text, company.name.encode('utf-8')): if news_parser.contains_supportive_wording(mention): article.companies.append(company) break DBSession.commit()
def add_new_nonprofit(ein): if DBSession.query(Nonprofit).filter(Nonprofit.ein == ein).first() is not None: return # lookup guidestar info before doing anything else. nonprofit = add_guidestar_info_to_db(ein) if nonprofit is None: print 'Guidestar returned nothing for EIN {0}, exiting.'.format(ein) return None logger.debug('Getting companies...') companies = DBSession.query(Company).all() logger.debug('Companies retrieved.') # twitter_chain = chain(update_nonprofit_twitter_name.si(nonprofit.nonprofits_id), # group(get_tweets_for_nonprofit.si(nonprofit.nonprofits_id), # get_followers_for_nonprofit.si(nonprofit.nonprofits_id))) # add_news_articles_to_db_for_nonprofit returns a list of articles, which will get passed as the 2nd argument to add_nonprofit_company_news_article_connections # NOTE: Commented out because of synchronization issue: articles passed to add_nonprofit_company_news_article_connections(...) are in the DB, but SQLAlchemy doesn't see them. # news_chain = chain(add_news_articles_to_db_for_nonprofit.si(nonprofit.nonprofits_id), # add_nonprofit_company_news_article_connections.s(companies)) article_ids = add_news_articles_to_db_for_nonprofit(nonprofit.nonprofits_id) add_nonprofit_company_news_article_connections(article_ids, companies) update_nonprofit_twitter_name(nonprofit.nonprofits_id) get_tweets_for_nonprofit(nonprofit.nonprofits_id) get_followers_for_nonprofit(nonprofit.nonprofits_id)
def add_new_nonprofit(ein): if DBSession.query(Nonprofit).filter( Nonprofit.ein == ein).first() is not None: return # lookup guidestar info before doing anything else. nonprofit = add_guidestar_info_to_db(ein) if nonprofit is None: print 'Guidestar returned nothing for EIN {0}, exiting.'.format(ein) return None logger.debug('Getting companies...') companies = DBSession.query(Company).all() logger.debug('Companies retrieved.') # twitter_chain = chain(update_nonprofit_twitter_name.si(nonprofit.nonprofits_id), # group(get_tweets_for_nonprofit.si(nonprofit.nonprofits_id), # get_followers_for_nonprofit.si(nonprofit.nonprofits_id))) # add_news_articles_to_db_for_nonprofit returns a list of articles, which will get passed as the 2nd argument to add_nonprofit_company_news_article_connections # NOTE: Commented out because of synchronization issue: articles passed to add_nonprofit_company_news_article_connections(...) are in the DB, but SQLAlchemy doesn't see them. # news_chain = chain(add_news_articles_to_db_for_nonprofit.si(nonprofit.nonprofits_id), # add_nonprofit_company_news_article_connections.s(companies)) article_ids = add_news_articles_to_db_for_nonprofit( nonprofit.nonprofits_id) add_nonprofit_company_news_article_connections(article_ids, companies) update_nonprofit_twitter_name(nonprofit.nonprofits_id) get_tweets_for_nonprofit(nonprofit.nonprofits_id) get_followers_for_nonprofit(nonprofit.nonprofits_id)
def add_news_articles_to_db_for_nonprofits(): """Look up news articles for every nonprofit in the DB, and store any news articles containing company names.""" logger.debug('Inside add_news_articles_to_db_for_nonprofits()') logger.debug('Getting companies...') companies = DBSession.query(Company).all() logger.debug('Done loading companies...') for nonprofit in DBSession.query(Nonprofit).all(): articles = add_news_articles_to_db_for_nonprofit(nonprofit) add_nonprofit_company_news_article_connections(companies, articles)
def show_topics_for_tweets(): """Experimental code for displaying topics generated by topic modeling.""" twitter_names = [row.twitter_name for row in DBSession.query(Tweet.twitter_name).group_by(Tweet.twitter_name).all()] tweets = [] print 'Retrieving tweets...' for tweet_name in twitter_names: tweet_text = [row.text for row in DBSession.query(Tweet.text).filter(Tweet.twitter_name == tweet_name).all()] tweets.append('\n'.join(tweet_text)) print 'Getting topics...' lda.get_topics(tweets)
def sector_stats(ntee): """ Return a dict containing average values for various graph metrics for nonprofits with this NTEE code. """ results = DBSession.query(Nonprofit).filter(Nonprofit.ntee_code.like(ntee + '%')).all() stats = defaultdict(lambda: []) for result in [r for r in results if r.twitter_name]: nta = DBSession.query(Nonprofit_Twitter_Attributes).filter(Nonprofit_Twitter_Attributes.id == result.twitter_name).first() if nta: stats['clustering_coefficient'].append(float(nta.clustering_coefficient)) stats['hub'].append(float(nta.hub)) stats['authority'].append(float(nta.authority)) return dict((k, np.mean(v)) for k, v in stats.iteritems())
def update_null_nonprofit_twitter_ids(): """Finds nonprofits for which the Twitter name is not null, but the Twitter user ID is null, and gives the Twitter user ID a value.""" logger.debug('Inside update_null_nonprofit_twitter_ids()') query = DBSession.query(Nonprofit).filter(Nonprofit.twitter_id == None).filter(Nonprofit.twitter_name != None) # nopep8 nonprofits = query.all() screen_names = [nonprofit.twitter_name for nonprofit in nonprofits] screen_name_to_id_map = givinggraph.twitter.users.get_screen_name_to_id_map(screen_names) for nonprofit in nonprofits: if nonprofit.twitter_name.lower() in screen_name_to_id_map: nonprofit.twitter_id = screen_name_to_id_map[nonprofit.twitter_name.lower()] else: print '"{0}" was not found, the account may have been deleted or the screen name may have changed.'.format(nonprofit.twitter_name) DBSession.commit()
def update_nonprofit_twitter_name(nonprofits_id): """Takes the ID of a nonprofit and uses Yahoo to try to find the Twitter name for that nonprofit. If found, the nonprofit's entry in the DB is updated.""" logger.debug('Inside update_nonprofit_twitter_name(nonprofits_id) for nonprofits_id {0}'.format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) search_results = givinggraph.yahoo.search.get_search_results('twitter ' + nonprofit.name) if len(search_results) == 0: return twitter_url = search_results[0] twitter_url = twitter_url.replace('http://', '').replace('https://', '') twitter_name = None if twitter_url[:11] == 'twitter.com': twitter_name = twitter_url[12:] nonprofit.twitter_name = twitter_name DBSession.commit()
def show_topics_for_tweets(): """Experimental code for displaying topics generated by topic modeling.""" twitter_names = [ row.twitter_name for row in DBSession.query( Tweet.twitter_name).group_by(Tweet.twitter_name).all() ] tweets = [] print 'Retrieving tweets...' for tweet_name in twitter_names: tweet_text = [ row.text for row in DBSession.query(Tweet.text).filter( Tweet.twitter_name == tweet_name).all() ] tweets.append('\n'.join(tweet_text)) print 'Getting topics...' lda.get_topics(tweets)
def sector_stats(ntee): """ Return a dict containing average values for various graph metrics for nonprofits with this NTEE code. """ results = DBSession.query(Nonprofit).filter(Nonprofit.ntee_code.like(ntee + "%")).all() stats = defaultdict(lambda: []) for result in [r for r in results if r.twitter_name]: nta = ( DBSession.query(Nonprofit_Twitter_Attributes) .filter(Nonprofit_Twitter_Attributes.id == result.twitter_name) .first() ) if nta: stats["clustering_coefficient"].append(float(nta.clustering_coefficient)) stats["hub"].append(float(nta.hub)) stats["authority"].append(float(nta.authority)) return dict((k, np.mean(v)) for k, v in stats.iteritems())
def update_nonprofit_twitter_name(nonprofits_id): """Takes the ID of a nonprofit and uses Yahoo to try to find the Twitter name for that nonprofit. If found, the nonprofit's entry in the DB is updated.""" logger.debug( 'Inside update_nonprofit_twitter_name(nonprofits_id) for nonprofits_id {0}' .format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) search_results = givinggraph.yahoo.search.get_search_results( 'twitter ' + nonprofit.name) if len(search_results) == 0: return twitter_url = search_results[0] twitter_url = twitter_url.replace('http://', '').replace('https://', '') twitter_name = None if twitter_url[:11] == 'twitter.com': twitter_name = twitter_url[12:] nonprofit.twitter_name = twitter_name DBSession.commit()
def update_null_nonprofit_twitter_ids(): """Finds nonprofits for which the Twitter name is not null, but the Twitter user ID is null, and gives the Twitter user ID a value.""" logger.debug('Inside update_null_nonprofit_twitter_ids()') query = DBSession.query(Nonprofit).filter( Nonprofit.twitter_id == None).filter( Nonprofit.twitter_name != None) # nopep8 nonprofits = query.all() screen_names = [nonprofit.twitter_name for nonprofit in nonprofits] screen_name_to_id_map = givinggraph.twitter.users.get_screen_name_to_id_map( screen_names) for nonprofit in nonprofits: if nonprofit.twitter_name.lower() in screen_name_to_id_map: nonprofit.twitter_id = screen_name_to_id_map[ nonprofit.twitter_name.lower()] else: print '"{0}" was not found, the account may have been deleted or the screen name may have changed.'.format( nonprofit.twitter_name) DBSession.commit()
def possible_partners(): """Return the possible donors given a nonprofit""" attr = request.args.get('attr') if attr == 'description': query = "call from_id_to_companies_by_desc('%d')" % int(request.args.get('id')) elif attr == 'homepage': query = "call from_id_to_companies_by_home('%d')" % int(request.args.get('id')) elif attr == 'tweets': query = "call from_id_to_companies_by_tweets('%d')" % int(request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def get_tweets_for_nonprofit(nonprofits_id): """Retrieve tweets for the given nonprofit and store them in the DB.""" logger.debug('Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) max_tweet = DBSession.query(func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(Tweet.twitter_name == nonprofit.twitter_name).first() if max_tweet is None or max_tweet.max_tweet_id is None: max_tweet_id = 1 else: max_tweet_id = max_tweet.max_tweet_id tweets = [] if nonprofit.twitter_id is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_id(nonprofit.twitter_id, True, since_id=max_tweet_id) elif nonprofit.twitter_name is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_name(nonprofit.twitter_name, True, since_id=max_tweet_id) else: pass for tweet in tweets: DBSession.add(Tweet(tweet['user']['screen_name'], tweet['id_str'], tweet['created_at'], tweet['text'].encode('utf-8'), tweet['lang'], tweet['retweet_count'], tweet['favorite_count'], ', '.join([mention['id_str'] for mention in tweet['entities']['user_mentions']]), ', '.join([mention['screen_name'] for mention in tweet['entities']['user_mentions']]), ', '.join([hashtag['text'] for hashtag in tweet['entities']['hashtags']]), ', '.join([url['expanded_url'] for url in tweet['entities']['urls']]), tweet['in_reply_to_screen_name'], tweet['in_reply_to_user_id_str'], tweet['in_reply_to_status_id_str'])) DBSession.commit()
def sector_summary(): """Return the summary of a given NTEE code""" query = "call sector_summary('%s')" % request.args.get('ntee') result = DBSession.execute(query) my_dict = procedure_to_json(result) nonprofits = my_dict['results'] result = { 'avg_closeness_centrality': 0, 'avg_clustering_coefficient': 0, 'avg_degree': 0, 'avg_hubAuth': 0, 'avg_weighted_degree': 0, 'avg_eccentricity': 0, 'avg_clustering_coefficient': 0 } tw_communities = {} web_communities = {} desc_communities = {} for nonprofit in nonprofits: result['avg_closeness_centrality'] += nonprofit[ 'closeness_centrality'] / float(len(nonprofits)) result['avg_clustering_coefficient'] += nonprofit[ 'clustering_coefficient'] / float(len(nonprofits)) result['avg_degree'] += nonprofit['degree'] / float(len(nonprofits)) result['avg_hubAuth'] += nonprofit['hubAuth'] / float(len(nonprofits)) result['avg_weighted_degree'] += nonprofit['weighted_degree'] / float( len(nonprofits)) result['avg_eccentricity'] += nonprofit['eccentricity'] / float( len(nonprofits)) result['avg_clustering_coefficient'] += nonprofit[ 'clustering_coefficient'] / float(len(nonprofits)) if nonprofit['tw_community'] not in tw_communities: tw_communities[nonprofit['tw_community']] = 0 else: tw_communities[nonprofit['tw_community']] += 1 if nonprofit['web_community'] not in web_communities: web_communities[nonprofit['web_community']] = 0 else: web_communities[nonprofit['web_community']] += 1 if nonprofit['desc_community'] not in desc_communities: desc_communities[nonprofit['desc_community']] = 0 else: desc_communities[nonprofit['desc_community']] += 1 result['tw_communities'] = tw_communities result['web_communities'] = web_communities result['desc_communities'] = desc_communities return json.dumps(result)
def get_followers_for_nonprofit(nonprofits_id): """Retrieve followers for the given nonprofit and store them in the DB.""" logger.debug('Inside get_followers_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) if nonprofit.twitter_id is not None: pass # follower_ids = givinggraph.twitter.users.get_followers(nonprofit.twitter_id) # DBSession.query(Nonprofits_Follower).filter(Nonprofits_Follower.nonprofit_handle == nonprofit.twitter_name).delete() # for follower_id in follower_ids: # DBSession.add(Nonprofits_Follower(nonprofit.twitter_name, follower_id)) # DBSession.commit() else: pass
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter(Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs([nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Description(nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat(Tweet.text).label('text')).group_by(Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs([tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()
def possible_partners(): """Return the possible donors given a nonprofit""" attr = request.args.get('attr') if attr == 'description': query = "call from_id_to_companies_by_desc('%d')" % int( request.args.get('id')) elif attr == 'homepage': query = "call from_id_to_companies_by_home('%d')" % int( request.args.get('id')) elif attr == 'tweets': query = "call from_id_to_companies_by_tweets('%d')" % int( request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def add_nonprofit_company_news_article_connections(article_ids, companies): """Takes a list of IDs of news articles and a list of Company objects as input. If any of the articles contain a company name, a link is made in the DB between the article and the company.""" logger.debug('Inside add_nonprofit_company_news_article_connections(news_articles, companies)') for article_id in article_ids: article = DBSession.query(News_Article).get(article_id) if article is None: print '***************************' print '***************************' print article_id print '***************************' print '***************************' time.sleep(180) counter = 1 for company in companies: if counter % 100 == 0: print 'Processing article {0} for company {1} of {2}...'.format(article_id, counter, len(companies)) counter += 1 for mention in news_parser.get_company_mentions_in_text(article.text, company.name.encode('utf-8')): if news_parser.contains_supportive_wording(mention): article.companies.append(company) break DBSession.commit()
def similarity(): """Return the most similar nonprofits given a nonprofits and a metric.""" top = 10 if request.args.get('top') is None else int(request.args.get('top')) attr = request.args.get('attr') if attr == 'description': query = 'call from_nonprofit_id_to_similar_charities_by_description(%d, %d)' % (int(request.args.get('id')), top) elif attr == 'homepage': query = 'call from_nonprofit_id_to_similar_charities_by_homepage(%d, %d)' % (int(request.args.get('id')), top) elif attr == 'tweets': query = 'call from_nonprofit_id_to_similar_charities_by_tweets(%d, %d)' % (int(request.args.get('id')), top) elif attr == 'followers': query = 'call from_nonprofit_id_to_similar_charities_by_followers(%d, %d)' % (int(request.args.get('id')), top) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def get_followers_for_nonprofit(nonprofits_id): """Retrieve followers for the given nonprofit and store them in the DB.""" logger.debug( 'Inside get_followers_for_nonprofit(nonprofit) for nonprofits_id {0}'. format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) if nonprofit.twitter_id is not None: pass # follower_ids = givinggraph.twitter.users.get_followers(nonprofit.twitter_id) # DBSession.query(Nonprofits_Follower).filter(Nonprofits_Follower.nonprofit_handle == nonprofit.twitter_name).delete() # for follower_id in follower_ids: # DBSession.add(Nonprofits_Follower(nonprofit.twitter_name, follower_id)) # DBSession.commit() else: pass
def sector_summary(): """Return the summary of a given NTEE code""" query = "call sector_summary('%s')" % request.args.get('ntee') result = DBSession.execute(query) my_dict = procedure_to_json(result) nonprofits = my_dict['results'] result = { 'avg_closeness_centrality': 0, 'avg_clustering_coefficient': 0, 'avg_degree': 0, 'avg_hubAuth': 0, 'avg_weighted_degree': 0, 'avg_eccentricity': 0, 'avg_clustering_coefficient': 0 } tw_communities = {} web_communities = {} desc_communities = {} for nonprofit in nonprofits: result['avg_closeness_centrality'] += nonprofit['closeness_centrality'] / float(len(nonprofits)) result['avg_clustering_coefficient'] += nonprofit['clustering_coefficient'] / float(len(nonprofits)) result['avg_degree'] += nonprofit['degree'] / float(len(nonprofits)) result['avg_hubAuth'] += nonprofit['hubAuth'] / float(len(nonprofits)) result['avg_weighted_degree'] += nonprofit['weighted_degree'] / float(len(nonprofits)) result['avg_eccentricity'] += nonprofit['eccentricity'] / float(len(nonprofits)) result['avg_clustering_coefficient'] += nonprofit['clustering_coefficient'] / float(len(nonprofits)) if nonprofit['tw_community'] not in tw_communities: tw_communities[nonprofit['tw_community']] = 0 else: tw_communities[nonprofit['tw_community']] += 1 if nonprofit['web_community'] not in web_communities: web_communities[nonprofit['web_community']] = 0 else: web_communities[nonprofit['web_community']] += 1 if nonprofit['desc_community'] not in desc_communities: desc_communities[nonprofit['desc_community']] = 0 else: desc_communities[nonprofit['desc_community']] += 1 result['tw_communities'] = tw_communities result['web_communities'] = web_communities result['desc_communities'] = desc_communities return json.dumps(result)
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter( Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs( [nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Description( nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def add_news_articles_to_db_for_nonprofit(nonprofits_id): """Searches the web for news articles related to the nonprofit and stores them in the DB. Returns the IDs of the news articles found.""" logger.debug('Inside add_news_articles_to_db_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) query = DBSession.query(News_Article).filter(News_Article.nonprofits_id == nonprofits_id) already_retrieved_urls = [news_article.url for news_article in query.all()] news_articles = [] for article in news_searcher.find_news_articles(nonprofit.name, urls_to_ignore=already_retrieved_urls): news_articles.append(News_Article(nonprofit.nonprofits_id, article.url, article.headline, article.body)) DBSession.add_all(news_articles) DBSession.commit() return [news_article.news_articles_id for news_article in news_articles]
def similarity(): """Return the most similar nonprofits given a nonprofits and a metric.""" top = 10 if request.args.get('top') is None else int( request.args.get('top')) attr = request.args.get('attr') if attr == 'description': query = 'call from_nonprofit_id_to_similar_charities_by_description(%d, %d)' % ( int(request.args.get('id')), top) elif attr == 'homepage': query = 'call from_nonprofit_id_to_similar_charities_by_homepage(%d, %d)' % ( int(request.args.get('id')), top) elif attr == 'tweets': query = 'call from_nonprofit_id_to_similar_charities_by_tweets(%d, %d)' % ( int(request.args.get('id')), top) elif attr == 'followers': query = 'call from_nonprofit_id_to_similar_charities_by_followers(%d, %d)' % ( int(request.args.get('id')), top) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat( Tweet.text).label('text')).group_by( Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs( [tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()
def get_tweets_for_nonprofit(nonprofits_id): """Retrieve tweets for the given nonprofit and store them in the DB.""" logger.debug( 'Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'. format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) max_tweet = DBSession.query( func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter( Tweet.twitter_name == nonprofit.twitter_name).first() if max_tweet is None or max_tweet.max_tweet_id is None: max_tweet_id = 1 else: max_tweet_id = max_tweet.max_tweet_id tweets = [] if nonprofit.twitter_id is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_id( nonprofit.twitter_id, True, since_id=max_tweet_id) elif nonprofit.twitter_name is not None: tweets = givinggraph.twitter.tweets.get_tweets_by_name( nonprofit.twitter_name, True, since_id=max_tweet_id) else: pass for tweet in tweets: DBSession.add( Tweet( tweet['user']['screen_name'], tweet['id_str'], tweet['created_at'], tweet['text'].encode('utf-8'), tweet['lang'], tweet['retweet_count'], tweet['favorite_count'], ', '.join([ mention['id_str'] for mention in tweet['entities']['user_mentions'] ]), ', '.join([ mention['screen_name'] for mention in tweet['entities']['user_mentions'] ]), ', '.join([ hashtag['text'] for hashtag in tweet['entities']['hashtags'] ]), ', '.join([ url['expanded_url'] for url in tweet['entities']['urls'] ]), tweet['in_reply_to_screen_name'], tweet['in_reply_to_user_id_str'], tweet['in_reply_to_status_id_str'])) DBSession.commit()
def add_news_articles_to_db_for_nonprofit(nonprofits_id): """Searches the web for news articles related to the nonprofit and stores them in the DB. Returns the IDs of the news articles found.""" logger.debug( 'Inside add_news_articles_to_db_for_nonprofit(nonprofit) for nonprofits_id {0}' .format(nonprofits_id)) nonprofit = DBSession.query(Nonprofit).get(nonprofits_id) query = DBSession.query(News_Article).filter( News_Article.nonprofits_id == nonprofits_id) already_retrieved_urls = [news_article.url for news_article in query.all()] news_articles = [] for article in news_searcher.find_news_articles( nonprofit.name, urls_to_ignore=already_retrieved_urls): news_articles.append( News_Article(nonprofit.nonprofits_id, article.url, article.headline, article.body)) DBSession.add_all(news_articles) DBSession.commit() return [news_article.news_articles_id for news_article in news_articles]
def related_companies(): """Return the companies that are mentioned with a nonprofit in news articles""" query = 'call related_companies(%d)' % int(request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def twitter(): """Return twitter-related information given a nonprofit""" query = 'call from_nonprofit_id_to_twitter(%d)' % int(request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def graph_stats(): """Return the SNA indexes given a nonprofit""" query = 'call from_nonprofit_id_to_sna(%d)' % int(request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))
def by_ein(ein_id): """Lookup nonprofit by EIN.""" return result2json(DBSession.query(Nonprofit).filter(Nonprofit.ein == ein_id).first())
def by_id(nonprofit_id): """Lookup nonprofit by our internal id.""" return result2json(DBSession.query(Nonprofit).filter(Nonprofit.nonprofits_id == nonprofit_id).first())
def by_ein(ein_id): """Lookup nonprofit by EIN.""" return result2json( DBSession.query(Nonprofit).filter(Nonprofit.ein == ein_id).first())
def by_id(nonprofit_id): """Lookup nonprofit by our internal id.""" return result2json( DBSession.query(Nonprofit).filter( Nonprofit.nonprofits_id == nonprofit_id).first())
def twitter(): """Return twitter-related information given a nonprofit""" query = 'call from_nonprofit_id_to_twitter(%d)' % int( request.args.get('id')) result = DBSession.execute(query) return json.dumps(procedure_to_json(result))