def committee_summaries(year=2013): from mptracker.scraper.committee_summaries import SummaryScraper patcher = TablePatcher(models.CommitteeSummary, models.db.session, key_columns=['pdf_url']) summary_scraper = SummaryScraper(get_cached_session(), get_cached_session('question-pdf')) records = summary_scraper.fetch_summaries(year, get_pdf_text=True) patcher.update(records)
def load(name, include_columns=None, create=True, remove=False): if include_columns: include_columns = set(include_columns.split(',')) def filter_record(r): return {k: r[k] for k in r if k in include_columns} else: filter_record = lambda r: r loader = TableLoader(name) patcher = TablePatcher(loader.model, db.session, key_columns=['id']) records = (filter_record(loader.decode_dict(flask.json.loads(line))) for line in sys.stdin) patcher.update(records, create=create, remove=remove)
def committee_summaries(year=2013): from mptracker.scraper.committee_summaries import SummaryScraper patcher = TablePatcher(models.CommitteeSummary, models.db.session, key_columns=['pdf_url']) summary_scraper = SummaryScraper(get_cached_session(), get_cached_session('question-pdf')) records = summary_scraper.fetch_summaries(year, get_pdf_text=True) patcher.update(records) models.db.session.commit()
def load(name, include_columns=None, create=True, remove=False, _file=sys.stdin): if include_columns: include_columns = set(include_columns.split(',')) def filter_record(r): return {k: r[k] for k in r if k in include_columns} else: filter_record = lambda r: r loader = TableLoader(name) patcher = TablePatcher(loader.model, db.session, key_columns=['id']) records = (filter_record(loader.decode_dict(flask.json.loads(line))) for line in _file) patcher.update(records, create=create, remove=remove) db.session.commit()
def people(year='2012'): from mptracker.scraper.people import PersonScraper patcher = TablePatcher(models.Person, models.db.session, key_columns=['cdep_id']) def get_people(): person_scraper = PersonScraper(get_cached_session()) for row in person_scraper.fetch_people(year): county_name = row.pop('county_name') if county_name: ok_name = fix_local_chars(county_name.title()) if ok_name == "Bistrița-Năsăud": ok_name = "Bistrița Năsăud" county = models.County.query.filter_by(name=ok_name).first() if county is None: logger.warn("Can't match county name %r", ok_name) else: row['county'] = county yield row patcher.update(get_people())