Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Import scraped data into database.',
        parents=[base_arg_parser],
    )

    parser.add_argument('abbreviation', type=str,
                        help=('the short name of the data to import'))
    parser.add_argument('-r', '--rpm', type=int, default=60,
                        help=('maximum number of documents to download '
                              'per minute'))
    parser.add_argument('--bills', action='store_true',
                        help='scrape bill data')
    parser.add_argument('--legislators', action='store_true',
                        help='scrape legislator data')
    parser.add_argument('--committees', action='store_true',
                        help='scrape (separate) committee data')
    parser.add_argument('--events', action='store_true',
                        help='scrape event data')
    parser.add_argument('--alldata', action='store_true', dest='alldata',
                        default=False, help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or
            args.events or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                           "--legislators, --committees, --events, "
                           "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Пример #2
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)
    report = {}

    if 'legislators' in args.types:
        report['legislators'] = \
            import_legislators(abbrev, settings.BILLY_DATA_DIR)

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = \
            import_committees(abbrev, settings.BILLY_DATA_DIR)

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Пример #3
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)

    dist_filename = os.path.join(settings.BILLY_MANUAL_DATA_DIR, 'districts',
                                 '%s.csv' % abbrev)
    if os.path.exists(dist_filename):
        db.districts.remove({'abbr': abbrev})
        dist_csv = unicodecsv.DictReader(open(dist_filename))
        for dist in dist_csv:
            dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
            dist['boundary_id'] = dist['boundary_id'] % dist
            dist['num_seats'] = int(dist['num_seats'])
            _log.debug(dist)
            db.districts.save(dist, safe=True)
    else:
        logging.getLogger('billy').warning("%s not found, continuing without "
                                           "districts" % dist_filename)

    report = {}

    if 'legislators' in args.types:
        report['legislators'] = import_legislators(
            abbrev,
            settings.BILLY_DATA_DIR
        )

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = import_committees(
            abbrev,
            settings.BILLY_DATA_DIR
        )

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Пример #4
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)

    dist_filename = os.path.join(settings.BILLY_MANUAL_DATA_DIR, 'districts',
                                 '%s.csv' % abbrev)
    if os.path.exists(dist_filename):
        db.districts.remove({'abbr': abbrev})
        dist_csv = unicodecsv.DictReader(open(dist_filename))
        for dist in dist_csv:
            dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
            dist['boundary_id'] = dist['boundary_id'] % dist
            dist['num_seats'] = int(dist['num_seats'])
            db.districts.save(dist, safe=True)
    else:
        logging.getLogger('billy').warning("%s not found, continuing without "
                                           "districts" % dist_filename)

    report = {}

    if 'legislators' in args.types:
        report['legislators'] = \
            import_legislators(abbrev, settings.BILLY_DATA_DIR)

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = \
            import_committees(abbrev, settings.BILLY_DATA_DIR)

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Пример #5
0
    # configure logger
    if args.verbose == 0:
        verbosity = logging.WARNING
    elif args.verbose == 1:
        verbosity = logging.INFO
    else:
        verbosity = logging.DEBUG

    logging.basicConfig(level=verbosity,
                    format="%(asctime)s %(name)s %(levelname)s %(message)s",
                    datefmt="%H:%M:%S")

    # always import metadata
    import_metadata(args.state, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.state, data_dir)
    if args.bills or args.alldata:
        import_bills(args.state, data_dir)
    if args.committees or args.alldata:
        import_committees(args.state, data_dir)
    if args.votes or args.alldata:
        import_votes(args.state, data_dir)

    # events and versions currently excluded from --alldata
    if args.events:
        import_events(args.state, data_dir)
    if args.versions:
        import_versions(args.state, args.rpm)
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Import scraped data into database.',
        parents=[base_arg_parser],
    )

    parser.add_argument('abbreviation',
                        type=str,
                        help=('the short name of the data to import'))
    parser.add_argument('-r',
                        '--rpm',
                        type=int,
                        default=60,
                        help=('maximum number of documents to download '
                              'per minute'))
    parser.add_argument('--bills',
                        action='store_true',
                        help='scrape bill data')
    parser.add_argument('--legislators',
                        action='store_true',
                        help='scrape legislator data')
    parser.add_argument('--committees',
                        action='store_true',
                        help='scrape (separate) committee data')
    parser.add_argument('--events',
                        action='store_true',
                        help='scrape event data')
    parser.add_argument('--alldata',
                        action='store_true',
                        dest='alldata',
                        default=False,
                        help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Пример #7
0
                        default=False, help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or
            args.events or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                           "--legislators, --committees, --events, "
                           "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Пример #8
0
    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.versions or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--versions,  --alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.state)

    # always import metadata
    import_metadata(args.state, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.state, data_dir)
    if args.bills or args.alldata:
        import_bills(args.state, data_dir)
    if args.committees or args.alldata:
        import_committees(args.state, data_dir)

    # events and versions currently excluded from --alldata
    if args.events:
        import_events(args.state, data_dir)
    if args.versions:
        import_versions(args.state, args.rpm)
Пример #9
0
                        help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)