def all_aliases():
    import itertools
    from regs_common.util import get_db
    db = get_db()

    return itertools.chain.from_iterable(
        itertools.imap(
            lambda entity: [(alias, entity['_id']) for alias in entity.get('filtered_aliases', [])],
            db.entities.find()
        )
    )
def run(options, args):
    global os
    import settings
    import datetime
    import os
    import pymongo
    import itertools
    import json
    from regs_common.util import get_db
    import zipfile
    import sys

    print 'Starting dump...'
    
    query = {'scraped': True, 'deleted': False}
    
    if options.docket:
        query['docket_id'] = options.docket
    if options.agency:
        query['agency'] = options.agency
    print query
    
    db = get_db()
    
    export_dir = os.path.join(settings.DATA_DIR, 'bulk', 'regulations-%s' % str(datetime.datetime.now().date()))
    ensure_directory(export_dir)
        
    for agency, agency_docs in itertools.groupby(db.docs.find(query, sort=[('document_id', pymongo.ASCENDING)]), key=lambda d: d['agency']):
        print 'Starting agency %s...' % agency
        agency_dir = os.path.join(export_dir, agency)
        ensure_directory(agency_dir)
        
        for docket, docket_docs in itertools.groupby(agency_docs, key=lambda d: d['docket_id']):
            print 'Starting docket %s...' % docket
            zip_path = os.path.join(agency_dir, '%s.zip' % docket)
            
            with zipfile.ZipFile(zip_path, 'a', zipfile.ZIP_DEFLATED, True) as docket_zip:
                docket_record = list(db.dockets.find({'_id': docket}))
                
                if docket_record:
                    docket_zip.writestr(
                        'metadata.json',
                        json.dumps(
                            extract(
                                docket_record[0],
                                ['docket_id', 'title', 'agency', 'rin', 'details', 'year']
                            ),
                            default=dthandler
                        )
                    )
                
                for doc in docket_docs:
                    files = []
                    
                    views = [('view', view) for view in doc['views']]
                    if 'attachments' in doc:
                        for attachment in doc['attachments']:
                            views.extend([('attachment', view) for view in attachment['views']])
                    
                    for type, view in views:
                        file = {'url': view['url']}
                        if view['extracted'] == True:
                            filename = '%s_%s.txt' % (type, view['file'].split('/')[-1].replace('.', '_'))
                            file['filename'] = filename
                            
                            docket_zip.writestr(os.path.join(doc['document_id'], filename), view['text'].encode('utf8'))
                            
                        files.append(file)
                        
                    metadata = extract(
                        doc,
                        ['document_id', 'title', 'agency', 'docket_id', 'type', 'topics', 'details', 'comment_on', 'rin']
                    )
                    metadata['files'] = files
                    
                    docket_zip.writestr(os.path.join(doc['document_id'], 'metadata.json'), json.dumps(metadata, default=dthandler))