Пример #1
0
    def do_scrape(self, juris, args, scrapers):
        # make output and cache dirs
        utils.makedirs(settings.CACHE_DIR)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)
        utils.makedirs(datadir)
        # clear json from data dir
        for f in glob.glob(datadir + '/*.json'):
            os.remove(f)

        report = {}

        # do jurisdiction
        jscraper = JurisdictionScraper(juris,
                                       datadir,
                                       strict_validation=args.strict,
                                       fastmode=args.fastmode)
        report['jurisdiction'] = jscraper.do_scrape()

        for scraper_name, scrape_args in scrapers.items():
            ScraperCls = juris.scrapers[scraper_name]
            scraper = ScraperCls(juris,
                                 datadir,
                                 strict_validation=args.strict,
                                 fastmode=args.fastmode)
            report[scraper_name] = scraper.do_scrape(**scrape_args)

        return report
Пример #2
0
    def do_scrape(self, juris, args, scrapers):
        # make output and cache dirs
        utils.makedirs(settings.CACHE_DIR)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)
        utils.makedirs(datadir)
        # clear json from data dir
        for f in glob.glob(datadir + '/*.json'):
            os.remove(f)

        report = {}

        for scraper_name, scrape_args in scrapers.items():
            ScraperCls = juris.scrapers[scraper_name]
            scraper = ScraperCls(juris, datadir, args.strict, args.fastmode)
            report[scraper_name] = scraper.do_scrape(**scrape_args)

        return report
Пример #3
0
    def do_scrape(self, juris, args):
        # make output and cache dirs
        utils.makedirs(args.cachedir)
        utils.makedirs(args.datadir)
        # clear json from data dir
        for f in glob.glob(args.datadir + '/*.json'):
            os.remove(f)

        report = {}

        # run scrapers
        for session in args.sessions:
            # get mapping of ScraperClass -> scrapers
            session_scrapers = defaultdict(list)
            for scraper_type in args.scrapers:
                ScraperCls = juris.get_scraper(args.term, session,
                                               scraper_type)
                if not ScraperCls:
                    raise Exception('no scraper for term={0} session={1} '
                                    'type={2}'.format(args.term, session,
                                                      scraper_type))
                session_scrapers[ScraperCls].append(scraper_type)

            report[session] = {}

            # run each scraper once
            for ScraperCls, scraper_types in session_scrapers.iteritems():
                scraper = ScraperCls(juris, session, args.datadir,
                                     args.cachedir, args.strict,
                                     args.fastmode)
                if 'people' in scraper_types:
                    report[session].update(scraper.scrape_people())
                elif 'bills' in scraper_types:
                    report[session].update(scraper.scrape_bills())
                elif 'events' in scraper_types:
                    report[session].update(scraper.scrape_events())
                elif 'votes' in scraper_types:
                    report[session].update(scraper.scrape_votes())
                elif 'speeches' in scraper_types:
                    report[session].update(scraper.scrape_speeches())

        return report