Exemplo n.º 1
0
def update_all_partner_posts_task():
    """
    Fetch all partners, and for each one, pass the feed_url to update_posts_for_feed
    """
    logger.debug("Running update_all_partner_posts_task.")

    number_of_new_posts = 0

    partners = Partner.objects.all()
    for partner in partners:
        # find all the posts in the current partner feeds and update them
        post_count = update_posts_for_feed_task(partner)
        number_of_new_posts = number_of_new_posts + post_count
        # Set the current time as when the partner feed was last retrieved
        Partner.objects.filter(pk=partner.pk).update(date_feed_updated=datetime.now())

    # clear home and early bird page cache and delete old posts if there is a change
    if number_of_new_posts > 0:
        logger.debug("Clearing site cache")
        expire_cache_by_path('/', is_view=False)
        expire_cache_by_path('/news/earlybird/', is_view=False)
        try:
            """ clear the NG Homepage cache by making a HTTP request to a view the clears the page cache by path """
            if not settings.DEBUG and settings.SITE_URL == 'http://www.govexec.com':
                clear_cache_cmd = os.path.join(settings.PROJECT_ROOT, 'support/clear_cache_for_external_sites.sh')
                # run the "clear_cache_for_external_sites.sh" to clear production NG and mobile site page caches
                subprocess.call([clear_cache_cmd, '2>&1', '>/dev/null', '&'])
        except:
            pass
        # set num_posts_to_keep to a high number to prevent clearing of active posts
        # that are then re-entered on next update
        delete_old_posts_tasks()
    logger.debug("Finished running update_all_partner_posts_task")
Exemplo n.º 2
0
def update_all_partner_posts_task():
    """
    Fetch all partners, and for each one, pass the feed_url to update_posts_for_feed
    """
    logger.debug("Running update_all_partner_posts_task.")

    number_of_new_posts = 0

    partners = Partner.objects.all()
    for partner in partners:
        # find all the posts in the current partner feeds and update them
        post_count = update_posts_for_feed_task(partner)
        number_of_new_posts = number_of_new_posts + post_count
        # Set the current time as when the partner feed was last retrieved
        Partner.objects.filter(pk=partner.pk).update(
            date_feed_updated=datetime.now())

    # clear home and early bird page cache and delete old posts if there is a change
    if number_of_new_posts > 0:
        logger.debug("Clearing site cache")
        expire_cache_by_path('/', is_view=False)
        expire_cache_by_path('/news/earlybird/', is_view=False)
        try:
            """ clear the NG Homepage cache by making a HTTP request to a view the clears the page cache by path """
            if not settings.DEBUG and settings.SITE_URL == 'https://www.govexec.com':  # TODO: stringly typed!!!
                clear_cache_cmd = os.path.join(
                    settings.PROJECT_ROOT,
                    'support/clear_cache_for_external_sites.sh')
                # run the "clear_cache_for_external_sites.sh" to clear production NG and mobile site page caches
                subprocess.call([clear_cache_cmd, '2>&1', '>/dev/null', '&'])
        except:
            pass
        # set num_posts_to_keep to a high number to prevent clearing of active posts
        # that are then re-entered on next update
        delete_old_posts_tasks()
    logger.debug("Finished running update_all_partner_posts_task")
    def cache_tweets(self):
        users = FollowAccount.objects.filter(active=True)

        '''
        A note on parameters:

        track=words: This feature can be added.  It will receive tweets with the keywords provided in them.
        This can be used to find posts with specific hashtags (i.e. track=["#innovation",]).
        Note: it does NOT filter out tweets by "followed" (e.g. follow=users) users that don't contain the text.
        For that feature, we would have to implement filtering in our own DB queries.

        follow=users: This is a list of user IDs to be pulled into the stream.  The CMS automatically fetches ID based
        on screen_name.
        '''
        auth = tweepy.auth.OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
        auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_SECRET)
        api = tweepy.API(auth)

        for user in users:
            for streamtweet in api.user_timeline(user.screen_name):
                #import pdb;pdb.set_trace()
                if hasattr(streamtweet, "delete"):
                    if hasattr(streamtweet.delete, "status") and "user_id" in streamtweet["delete"]["status"]:
                        if streamtweet.delete.status.user_id in users:

                            # client.captureMessage(u"Deleting tweet from %-16s\t( tweet %d, rate %.1f tweets/sec)" % (streamtweet.delete.status.user_id, stream.count, stream.rate))
                            try:
                                tweet = Tweet.objects.get(external_tweet_id=int(streamtweet.delete.status.id))
                                tweet.delete()
                                expire_cache_by_path('/data/twitter_feed_cache/tweets/', is_view=False)
                            except Exception, err:
                                pass
                                # client.captureMessage(u"Failed to delete tweet: %s\n%s"
                                #    % (sys.exc_info()[0], str(err),))
                        else:
                            pass
                            # client.captureMessage("Bypassing delete tweet from %-16s\t( tweet %d, rate %.1f tweets/sec)" % (streamtweet.delete.status.user_id, stream.count, stream.rate))

                else:
                    user_screen_name = force_unicode(streamtweet.user.screen_name)
                    user_name = force_unicode(streamtweet.user.name)
                    # client.captureMessage(u"Saving tweet from %-16s" % (user_screen_name, ))

                    # Parse data
                    created_at = utc_to_local_datetime(streamtweet.created_at)

                    # Add links to tweet
                    text = mark_safe(streamtweet.text).encode('ascii','xmlcharrefreplace')
                    if hasattr(streamtweet, "entities") and streamtweet.entities:
                        if "user_mentions" in streamtweet.entities:
                            # reset already_processed
                            already_processed = []
                            for mention in streamtweet.entities["user_mentions"]:
                                mention["screen_name"] = force_unicode(mention["screen_name"])
                                if not mention["screen_name"] in already_processed:
                                    already_processed.append(mention["screen_name"])
                                    # replace @screen_name with link
                                    link = u"<a href=\"http://www.twitter.com/%s\" rel=\"external\">@%s</a>" % (mention["screen_name"], mention["screen_name"])
                                    text = text.replace(u"@%s" % mention["screen_name"], link)

                        if hasattr(streamtweet.entities, "hashtags") and streamtweet.entities.hashtags:
                            # reset already_processed
                            already_processed = []
                            for hashtag in streamtweet.entities.hashtags:
                                hashtag.text = force_unicode(hashtag.text)
                                if not hashtag.text in already_processed:
                                    already_processed.append(hashtag.text)
                                    # replace #hash_tag with link
                                    link = u"<a href=\"https://twitter.com/search/?src=hash&q=%%23%s\" rel=\"external\">#%s</a>" % (hashtag.text, hashtag.text)
                                    text = text.replace(u"#%s" % hashtag.text, link)

                        if "urls" in streamtweet.entities:
                            # reset already_processed
                            already_processed = []
                            for url in streamtweet.entities["urls"]:
                                if "display_url" in url and "url" in url and "expanded_url" in url:
                                    if url["display_url"] and url["url"] and url["expanded_url"]:
                                        url["display_url"] = force_unicode(url["display_url"])
                                        url["url"] = force_unicode(url["url"])
                                        url["expanded_url"] = force_unicode(url["expanded_url"])
                                        if not url["display_url"] in already_processed:
                                            already_processed.append(url["display_url"])
                                            # replace #hash_tag with link
                                            link = u"<a href=\"%s\" rel=\"external\" title=\"%s\">%s</a>" % (url["url"], url["expanded_url"], url["display_url"])
                                            text = text.replace(url["url"], link)

                    # If tweet exists, don't save twice
                    try:
                        tweet = Tweet.objects.get(external_tweet_id=int(streamtweet.id))
                    except:
                        tweet = Tweet()

                    # Save tweet to DB

                    # Tweet data
                    tweet.external_tweet_id = streamtweet.id
                    tweet.text = text
                    tweet.created_at = created_at
                    # Posted by data
                    tweet.posted_by_user_id = streamtweet.user.id
                    tweet.posted_by_name = user_name
                    tweet.posted_by_screen_name = user_screen_name
                    # In reply to data
                    tweet.in_reply_to_user_id = None if not streamtweet.in_reply_to_user_id else \
                        streamtweet.in_reply_to_user_id
                    tweet.in_reply_to_screen_name = None if not streamtweet.in_reply_to_screen_name else \
                        force_unicode(streamtweet.in_reply_to_screen_name)
                    tweet.in_reply_to_status_id = None if not streamtweet.in_reply_to_status_id else \
                        streamtweet.in_reply_to_status_id
                    # save tweet
                    tweet.save()

                    # clear cache for tweet feed
                    expire_cache_by_path('/data/twitter_feed_cache/tweets/', is_view=False)