Пример #1
0
def main():

    abbrs = sys.argv[1:] or [x['abbreviation'] for x in db.metadata.find()]
    logger = logging.getLogger('billy.purge_committee_ids')
    logger.setLevel(logging.INFO)
    tally = defaultdict(Counter)

    for abbr in abbrs:
        abbr_tally = tally['abbr']
        spec = {
            settings.LEVEL_FIELD: abbr,
            'related_bills': {
                '$exists': True,
                '$ne': []
            },
        }
        for event in db.events.find(spec):
            fixed = []
            for bill in event['related_bills']:

                bill_id = bill.get('bill_id')
                if bill_id is not None:

                    # If "bill_id" is a big id, rename it.
                    if re.match(r'[A-Z]{2}B\d{8}', bill_id):
                        _id = bill.pop('bill_id')
                        bill['id'] = _id
                        logger.info('Renamed "bill_id" to "id"')
                        abbr_tally['bill_id --> id'] += 1

                    # If it's something else, do fix_bill_id to
                    # fix screwed up old ids.
                    else:
                        bill['bill_id'] = fix_bill_id(bill['bill_id'])
                        logger.info('Fixed an un-fixed bill_id')
                        abbr_tally['fix_bill_id'] += 1

                    fixed = True

                if '_scraped_bill_id' in bill:
                    bill_id = fix_bill_id(bill.pop('_scraped_bill_id'))
                    bill['bill_id'] = bill_id
                    logger.info('Renamed "_scraped_bill_id" to "bill_id"')
                    abbr_tally['_scraped_bill_id --> bill_id'] += 1

                    fixed = True

            if fixed:
                msg = 'Updating related_bills on event %r.'
                logger.debug(msg % event['_id'])
                db.events.save(event)

        logger.info(abbr)
def main():

    abbrs = sys.argv[1:] or [x['abbreviation'] for x in db.metadata.find()]
    logger = logging.getLogger('billy.purge_committee_ids')
    logger.setLevel(logging.INFO)
    tally = defaultdict(Counter)

    for abbr in abbrs:
        abbr_tally = tally['abbr']
        spec = {
            settings.LEVEL_FIELD: abbr,
            'related_bills': {'$exists': True, '$ne': []},
            }
        for event in db.events.find(spec):
            fixed = []
            for bill in event['related_bills']:

                bill_id = bill.get('bill_id')
                if bill_id is not None:

                    # If "bill_id" is a big id, rename it.
                    if re.match(r'[A-Z]{2}B\d{8}', bill_id):
                        _id = bill.pop('bill_id')
                        bill['id'] = _id
                        logger.info('Renamed "bill_id" to "id"')
                        abbr_tally['bill_id --> id'] += 1

                    # If it's something else, do fix_bill_id to
                    # fix screwed up old ids.
                    else:
                        bill['bill_id'] = fix_bill_id(bill['bill_id'])
                        logger.info('Fixed an un-fixed bill_id')
                        abbr_tally['fix_bill_id'] += 1

                    fixed = True

                if '_scraped_bill_id' in bill:
                    bill_id = fix_bill_id(bill.pop('_scraped_bill_id'))
                    bill['bill_id'] = bill_id
                    logger.info('Renamed "_scraped_bill_id" to "bill_id"')
                    abbr_tally['_scraped_bill_id --> bill_id'] += 1

                    fixed = True

            if fixed:
                msg = 'Updating related_bills on event %r.'
                logger.debug(msg % event['_id'])
                db.events.save(event)

        logger.info(abbr)
def main():

    import sys
    abbr = sys.argv[1]

    logger = logging.getLogger('purge_committee_ids')
    spec = {settings.LEVEL_FIELD: abbr}
    committee_ids = [c['_id'] for c in db.committees.find(spec, fields=['_id'])]

    # Events with committee participants.
    spec = {
        settings.LEVEL_FIELD: abbr,
        'participants.committee_id': {'$nin': committee_ids}
        }
    for event in db.events.find(spec):
        found = False
        for participant in event['participants']:
            for id_key in 'committee_id', 'id':
                _id = participant.get(id_key, None)
                type_ = participant.get('participant_type')
                if id_key == 'id' and type_ != 'committee':
                    continue
                if _id and (_id not in committee_ids):
                    found = True
                    msg = 'Removing participant %r from event %r'
                    logger.info(msg % (participant['committee_id'], event['_id']))
                    event['participants'].remove(participant)
        if found:
            pass
            # import ipdb;ipdb.set_trace()

    # Bill actions.
    spec = {
        settings.LEVEL_FIELD: abbr,
        'actions.related_entities.type': 'committee'
        }
    for bill in db.bills.find(spec):
        # pprint.pprint(bill['actions'])
        found = False
        for action in bill['actions']:
            for entity in action['related_entities']:
                if entity['type'] == 'committee':
                    if entity['id'] not in committee_ids:
                        found = True

                        msg = 'Removing entity %r from action in %r'
                        logger.info(msg % (entity['id'], bill['bill_id']))
                        action['related_entities'].remove(entity)
        if found:
            pass
Пример #4
0
def main():

    import sys
    abbrs = sys.argv[1:] or [x['abbreviation'] for x in db.metadata.find()]
    logger = logging.getLogger('purge_committee_ids')
    logger.setLevel(logging.DEBUG)

    for abbr in abbrs:
        spec = {settings.LEVEL_FIELD: abbr}
        committee_ids = [
            c['_id'] for c in db.committees.find(spec, fields=['_id'])
        ]

        # Events with committee participants.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'participants.committee_id': {
                '$nin': committee_ids
            }
        }
        for event in db.events.find(spec):
            old_ids = set()
            count = 0
            found = False
            for participant in event['participants']:
                for id_key in 'committee_id', 'id':
                    _id = participant.get(id_key, None)
                    type_ = participant.get('participant_type')
                    if id_key == 'id' and type_ != 'committee':
                        continue
                    if _id and (_id not in committee_ids):
                        found = True
                        msg = 'Removing participant %r from event %r'
                        logger.info(msg % (participant[id_key], event['_id']))

                        # Leave the participant in but set their id to none.
                        # Text will still be displayed without a hyperlink.
                        participant[id_key] = None

            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, event['_id']))
                db.events.save(event, safe=True)

        # Related committees in bill actions.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'actions.related_entities.type': 'committee'
        }
        for bill in db.bills.find(spec):
            old_ids = set()
            count = 0
            found = False
            for action in bill['actions']:
                for entity in action['related_entities']:
                    if entity['type'] == 'committee':
                        if entity['id'] and (entity['id']
                                             not in committee_ids):
                            found = True
                            count += 1
                            old_ids.add(entity['id'])
                            msg = 'Removing entity %r from action in %r'
                            logger.debug(msg % (entity['id'], bill['bill_id']))

                            # Completely remove the related entity. Without an
                            # id it has no other purpose.
                            action['related_entities'].remove(entity)
            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, bill['_id']))
                db.bills.save(bill, safe=True)

        # Legislator old roles.
        spec = {settings.LEVEL_FIELD: abbr, 'old_roles': {'$exists': True}}
        for leg in db.legislators.find(spec):
            old_ids = set()
            count = 0
            found = False
            for role in leg['old_roles']:
                if 'committee_id' in role:
                    _id = role['committee_id']
                    if _id and (_id not in committee_ids):
                        found = True
                        count += 1
                        old_ids.add(_id)
                        msg = 'Removing id %r from old_role in %r'
                        logger.info(msg %
                                    (role['committee_id'], leg['full_name']))
                        # Set the id to None.
                        role['committee_id'] = None
            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, leg['_id']))
                db.legislators.save(leg, safe=True)

        # Related entities in feeds.
        spec = {settings.LEVEL_FIELD: abbr, 'entity_ids': {'$ne': None}}
        for entry in feeds_db.entries.find(spec):
            old_ids = set()
            count = 0
            found = False
            for entity_id in entry['entity_ids']:
                if entity_id[2] == 'C':
                    if entity_id not in committee_ids:
                        found = True
                        count += 1
                        msg = 'Removing id %r from feed %r'
                        logger.info(msg % (entity_id, entry['_id']))

                        # Delete the entity from the feed.
                        old_ids.add(entity_id)
                        index = entry['entity_ids'].index(entity_id)
                        del entry['entity_ids'][index]
                        del entry['entity_strings'][index]
            if found:
                msg = 'Removed %d old committee ids %r from %r'
                logger.info(msg % (count, old_ids, entry['_id']))
                feeds_db.entries.save(entry, safe=True)

        # Nuke any committee sponsors of bills.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'sponsors.committee_id': {
                '$nin': committee_ids
            }
        }
        for bill in db.bills.find(spec):
            count = 0
            found = False
            old_ids = set()
            for sponsor in bill.get('sponsors', []):
                if 'committee_id' in sponsor:
                    _id = sponsor['committee_id']
                    old_ids.add(_id)
                    found = True
                    count += 1

                    del sponsor['committee_id']

            if found:
                msg = 'Removed %d old committee ids %r from %r'
                logger.info(msg % (count, old_ids, bill['_id']))
                db.bills.save(bill)
def main():

    import sys
    abbrs = sys.argv[1:] or [x['abbreviation'] for x in db.metadata.find()]
    logger = logging.getLogger('purge_committee_ids')
    logger.setLevel(logging.DEBUG)

    for abbr in abbrs:
        spec = {settings.LEVEL_FIELD: abbr}
        committee_ids = [c['_id'] for c in db.committees.find(spec, fields=['_id'])]

        # Events with committee participants.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'participants.committee_id': {'$nin': committee_ids}
            }
        for event in db.events.find(spec):
            old_ids = set()
            count = 0
            found = False
            for participant in event['participants']:
                for id_key in 'committee_id', 'id':
                    _id = participant.get(id_key, None)
                    type_ = participant.get('participant_type')
                    if id_key == 'id' and type_ != 'committee':
                        continue
                    if _id and (_id not in committee_ids):
                        found = True
                        msg = 'Removing participant %r from event %r'
                        logger.info(msg % (participant[id_key], event['_id']))

                        # Leave the participant in but set their id to none.
                        # Text will still be displayed without a hyperlink.
                        participant[id_key] = None

            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, event['_id']))
                db.events.save(event, safe=True)

        # Related committees in bill actions.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'actions.related_entities.type': 'committee'
            }
        for bill in db.bills.find(spec):
            old_ids = set()
            count = 0
            found = False
            for action in bill['actions']:
                for entity in action['related_entities']:
                    if entity['type'] == 'committee':
                        if entity['id'] and (entity['id'] not in committee_ids):
                            found = True
                            count += 1
                            old_ids.add(entity['id'])
                            msg = 'Removing entity %r from action in %r'
                            logger.debug(msg % (entity['id'], bill['bill_id']))

                            # Completely remove the related entity. Without an
                            # id it has no other purpose.
                            action['related_entities'].remove(entity)
            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, bill['_id']))
                db.bills.save(bill, safe=True)

        # Legislator old roles.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'old_roles': {'$exists': True}
            }
        for leg in db.legislators.find(spec):
            old_ids = set()
            count = 0
            found = False
            for role in leg['old_roles']:
                if 'committee_id' in role:
                    _id = role['committee_id']
                    if _id and (_id not in committee_ids):
                        found = True
                        count += 1
                        old_ids.add(_id)
                        msg = 'Removing id %r from old_role in %r'
                        logger.info(msg % (role['committee_id'], leg['full_name']))
                        # Set the id to None.
                        role['committee_id'] = None
            if found:
                msg = 'Removed %d old committee %r ids from %r'
                logger.info(msg % (count, old_ids, leg['_id']))
                db.legislators.save(leg, safe=True)

        # Related entities in feeds.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'entity_ids': {'$ne': None}
            }
        for entry in feeds_db.entries.find(spec):
            old_ids = set()
            count = 0
            found = False
            for entity_id in entry['entity_ids']:
                if entity_id[2] == 'C':
                    if entity_id not in committee_ids:
                        found = True
                        count += 1
                        msg = 'Removing id %r from feed %r'
                        logger.info(msg % (entity_id, entry['_id']))

                        # Delete the entity from the feed.
                        old_ids.add(entity_id)
                        index = entry['entity_ids'].index(entity_id)
                        del entry['entity_ids'][index]
                        del entry['entity_strings'][index]
            if found:
                msg = 'Removed %d old committee ids %r from %r'
                logger.info(msg % (count, old_ids, entry['_id']))
                feeds_db.entries.save(entry, safe=True)

        # Nuke any committee sponsors of bills.
        spec = {
            settings.LEVEL_FIELD: abbr,
            'sponsors.committee_id': {'$nin': committee_ids}
            }
        for bill in db.bills.find(spec):
            count = 0
            found = False
            old_ids = set()
            for sponsor in bill.get('sponsors', []):
                if 'committee_id' in sponsor:
                    _id = sponsor['committee_id']
                    old_ids.add(_id)
                    found = True
                    count += 1

                    del sponsor['committee_id']

            if found:
                msg = 'Removed %d old committee ids %r from %r'
                logger.info(msg % (count, old_ids, bill['_id']))
                db.bills.save(bill)
Пример #6
0
import os
import sys
from os.path import dirname, abspath, join
import shutil

from billy.core import logging

from models import Feed
from entities import Extractor

if __name__ == '__main__':

    level = logging.DEBUG

    logging.getLogger('billy.feed-model').setLevel(level)
    logging.getLogger('billy.entry-model').setLevel(level)
    logging.getLogger('billu.extractor').setLevel(level)

    # The path where the news/blogs code and urls files are located.
    PATH = dirname(abspath(__file__))

    #
    filenames = os.listdir(join(PATH, 'urls'))
    filenames = filter(lambda s: '~' not in s, filenames)

    for urls_filename in filenames:
        abbr = urls_filename.lower().replace('.txt', '')

        # If abbrs are specified on the command line, scrape only those.
        if sys.argv[1:] and (abbr not in sys.argv[1:]):
            continue
Пример #7
0
class Feed(object):
    '''This model handles fetching the rss feed and recording any errors
    that occur for post-mortem reporting. It also has an instance-level
    report dictionary that gets augmented each time one of the feed's
    entries is scanned for relevant entities.
    '''
    request_defaults = dict(requests_per_minute=0, cache_write_only=False)

    session = scrapelib.Scraper(**_request_defaults(request_defaults))
    logger = logging.getLogger('billy.feed-model')

    def __init__(self, url):
        self.url = url
        self.succeeded = None
        self.default_report = {
            'entries': {
                'count': 0,
                'new': 0,
                'old': 0,
                'relevant': 0,
            },
            'entities': {
                'count': 0,
            }
        }
        self.report = {
            'url': url,

            # The info is stored under the jurisdiction key
            # to avoid over-writing data for feeds with national scope that
            # are scanned for multiple jursidictions. For example:
            'ex': self.default_report
        }

        # Delete example data.
        self.report['ex']

        self._initial_save()

    def _initial_save(self):
        '''Perform the initial save (to get us the mongo_id if none exists yet.
        '''
        spec = dict(url=self.url)
        update = {'$set': spec}
        self.logger.info('feed._initial_save %r' % self.url)
        doc = feeds_db.feeds.find_and_modify(spec, update, upsert=True)
        self.mongo_id = doc['_id']

    def _get_feed(self):
        '''Try to fetch the feed and parse it. If the fetch fails, log
        the exception. Finally, update the report with details of the
        success/failure of the fetch.
        '''
        self.logger.info('feed GET %r' % self.url)
        try:
            text = self.session.get(self.url).text
        except Exception:
            tb = traceback.format_exc()
            self._handle_fetch_exception(tb)
            return

        self.succeeded = True

        # XXX: This will fail if the link doesn't point to a valid feed.
        data = feedparser.parse(text)
        self._data = data

        self._update_report_after_fetch()
        return data

    @property
    def data(self):
        '''The parsed feed contents.
        '''
        data = getattr(self, '_data', None)
        return data or self._get_feed()

    def _handle_fetch_exception(self, _traceback):
        '''If the fetch fails, log the exception and store the traceback for
        the report.
        '''
        self.traceback = _traceback
        self.logger.exception(_traceback)
        self.succeeded = False

    def _update_report_after_fetch(self):
        '''Update the feed's report with whether the fetch operation
        succeeded or failed, including a formatted traceback if it failed.
        '''
        last_fetch = {
            'succeeded': self.succeeded,
            'datetime': datetime.datetime.utcnow()
        }
        if not self.succeeded:
            last_fetch['traceback'] = self.traceback
        report = {'url': self.url, 'last_fetch': last_fetch}
        self.report.update(report)

    def entries(self):
        '''A generator of wrapped entries for this feed.
        '''
        for entry in self.data['entries']:
            yield Entry(entry, feed=self)

    def serializable(self):
        '''Returns metadata about the feed (url, etc) and report information
        that can be saved in mongo.
        '''
        return {'$set': self.report}

    def finish_report(self):
        '''
        '''

    def save(self):
        '''
        '''
        spec = dict(url=self.url)
        feeds_db.feeds.find_and_modify(spec, self.serializable(), upsert=True)
        self.logger.info('feed.save: %r' % self.url)
Пример #8
0
class Entry(object):
    '''Wrap a parsed feed entry dictionary thingy from feedparser.
    '''
    request_defaults = dict(requests_per_minute=0, cache_write_only=False)

    session = scrapelib.Scraper(**_request_defaults(request_defaults))
    logger = logging.getLogger('billy.entry-model')

    def __init__(self, entry, feed):
        self.entry = entry
        self.feed = feed
        self.report = {}

        # Whether a fetch of the full text was tried and succeeded.
        self.tried = False
        self.succeeded = None

    def mongo_id(self):
        '''Get a unique mongo id based on this entry's url and title.
        '''
        s = self.entry['link'] + self.entry['title']
        return hashlib.md5(s).hexdigest()

    def is_new(self):
        '''Guess whether this entry is new (i.e., previously unseen)
        or old.
        '''
        mongo_id = self.mongo_id()
        if feeds_db.entries.find_one(mongo_id) is None:
            is_new = True
        else:
            is_new = False
        self.logger.info('is_new? %r --> %r' % (mongo_id, is_new))

    def _get_full_text(self):
        '''Just for experimenting at this point. Fetch the full text,
        log any exception the occurs, and store the details regarding the
        outcome of the fetch on the object.
        '''
        self.logger.info('entry GET %r' % self.entry.link)
        try:
            html = self.session.get(self.entry.link).text
        except Exception:
            tb = traceback.format_exc()
            self._handle_fetch_exception(tb)
            return

        self.succeeded = True
        self.tried = True
        self.html = html

        self._update_report_after_fetch()

        return html

    def _handle_fetch_exception(self, _traceback):
        '''If the fetch failed, log the failre and store the traceback
        object for the report.
        '''
        self.traceback = _traceback
        self.logger.exception(_traceback)
        self.succeeded = False

    def _update_report_after_fetch(self):
        '''After fetching the entry's full text (if at all), update
        the entry's report with the outcome of the fetch operation, including
        a traceback if it failed.
        '''
        report = {'url': self.url, 'entity_count': len(self['entity_ids'])}
        if self.tried:
            last_fetch = {
                'succeeded': self.succeeded,
                'datetime': datetime.datetime.utcnow()
            }
            if not self.succeeded:
                last_fetch['traceback'] = self.traceback
            report.update(last_fetch=last_fetch)
        self.report.update(report)

    def serializable(self):
        '''Replace date objects with datetime objects that can be
        json serialized.
        '''
        # Add the feed's id to make the entry and its feed joinable.
        ret = dict(feed_id=self.feed.mongo_id)

        # Convert unserializable timestructs into datetimes.
        for k, v in self.entry.items():
            if isinstance(v, time.struct_time):
                t = time.mktime(self.entry[k])
                dt = datetime.datetime.fromtimestamp(t)
                ret[k] = dt
        return ret

    def save_if_entities_found(self):
        '''If the entry is previously unseen and the extractor finds entities
        have been mentioned, save, otherwise do nothing.
        '''
        if self.is_new() and self.entry['entity_ids']:
            feeds_db.entries.save(self.serializable())
            self.logger('entry.save_if_entities_found: %r' % self.entry.link)

    def finish_report(self, abbr):
        '''After attempting to extract entities, update the report and the
        report of this entry's feed with relevant information.

        Two things happen in this function: the entry's report gets updated,
        and the report object on the entry's feed gets updated.

        The feed's default report for a jurisdiction has this basic shape:
            {
            'entries': {
                'count': 0,
                'new': 0,
                'old': 0,
                'relevant': 0,
                },
            'entities': {
                'count' : 0,
                }
            }

        `abbr` is the jurisdiction abbreviation this info will be stored under
        in the feed's report object.
        '''
        # Update the feed's report.
        feed_report = self.feed.report
        report = feed_report.get(abbr, self.feed.default_report)

        report['entries']['count'] += 1

        # If this is a new entry...
        if self.tried:
            report['entries']['new'] += 1
            if self.entry['entity_ids']:
                report['entries']['relevant'] += 1
            report['entities']['count'] += len(self.entry['entity_ids'])
        else:
            report['entries']['old'] += 1
Пример #9
0
class Feed(object):
    '''This model handles fetching the rss feed and recording any errors
    that occur for post-mortem reporting. It also has an instance-level
    report dictionary that gets augmented each time one of the feed's
    entries is scanned for relevant entities.
    '''

    request_defaults = dict(
        cache_obj=FileCache(FEEDS_CACHE),
        requests_per_minute=0,
        cache_write_only=False)

    session = scrapelib.Scraper(
        **_request_defaults(request_defaults))
    logger = logging.getLogger('billy.feed-model')

    def __init__(self, url, jurisdiction):
        self.url = url
        self.jurisdiction = jurisdiction

        self.succeeded = None
        self.default_report = {
            'entries': {
                'count': 0,
                'new': 0,
                'old': 0,
                'relevant': 0,
                },
            'entities': {
                'count' : 0,
                }
            }
        self.report = {
            'url': url,

            # The info is stored under the jurisdiction key
            # to avoid over-writing data for feeds with national scope that
            # are scanned for multiple jursidictions.
            jurisdiction: self.default_report
            }


        # Make sure this feed has a mongo id.
        self._initial_save()

    @staticmethod
    def blast_cache(self):
        '''Remove the scrapelib.Scraper fastmode cache for feed retrieval.
        Done before a scrape, but not before multiple jurisdictions in a
        single run, in case a feed of national scope needs to get processed
        for each state.
        '''
        shutil.rmtree(FEEDS_CACHE)

    def _initial_save(self):
        '''Perform the initial save (to get us the mongo_id if none exists yet.
        '''
        spec = dict(url=self.url)
        update = {'$set': spec}
        self.logger.debug('feed._initial_save %r' % self.url)
        doc = feeds_db.feeds.find_and_modify(
            spec, update, upsert=True, new=True)
        self.mongo_id = doc['_id']

    def _get_feed(self):
        '''Try to fetch the feed and parse it. If the fetch fails, log
        the exception. Finally, update the report with details of the
        success/failure of the fetch.
        '''
        try:
            text = self.session.get(self.url).text
        except Exception:
            tb = traceback.format_exc()
            self._handle_fetch_exception(tb)
            self._update_report_after_fetch()
        else:
            self.succeeded = True

            # XXX: This will fail if the text isn't a valid feed.
            data = feedparser.parse(text)
            self._data = data
            self._update_report_after_fetch()
            return data

    @property
    def data(self):
        '''The parsed feed contents.
        '''
        data = getattr(self, '_data', None)
        return data or self._get_feed() or {}

    def is_valid(self):
        '''Does this hot garbage contain the keys we expect?
        '''
        return 'title' in self.data.get('feed', {})

    def _handle_fetch_exception(self, _traceback):
        '''If the fetch fails, log the exception and store the traceback for
        the report.
        '''
        self.traceback = _traceback
        self.logger.exception(_traceback)
        self.succeeded = False

    def _update_report_after_fetch(self):
        '''Update the feed's report with whether the fetch operation
        succeeded or failed, including a formatted traceback if it failed.
        '''
        last_fetch = {
            'succeeded': self.succeeded,
            'datetime': datetime.datetime.utcnow()
            }
        if not self.succeeded:
            last_fetch['traceback'] = self.traceback
        self.report[self.jurisdiction].update(last_fetch=last_fetch)

    def entries(self):
        '''A generator of wrapped entries for this feed.
        '''
        data = self.data or {}
        entries = data.get('entries', [])
        for entry in entries:
            yield Entry(entry, feed=self)

    def serializable(self):
        '''Returns metadata about the feed (url, etc) and report information
        that can be saved in mongo.
        '''
        return {'$set': self.report}

    def finish_report(self):
        '''Extra stuff to go in the report goes here.
        '''

    def save(self):
        '''Update the feed record with the latest report.
        '''
        if not self.is_valid():
            return
        spec = dict(url=self.url)
        update = {'$set': self.report}
        self.logger.debug('feed.finish_report %r' % self.url)
        feeds_db.feeds.find_and_modify(spec, update, upsert=True, new=True)
        self.logger.info('feed.save: %r' % self.url)