def to_csv(abbreviations, upload): """ Sync YAML files to DB. """ if not abbreviations: abbreviations = get_all_abbreviations() if upload: s3 = boto3.client("s3") for abbr in abbreviations: click.secho("==== {} ====".format(abbr), bold=True) directory = get_data_dir(abbr) jurisdiction_id = get_jurisdiction_id(abbr) person_files = sorted( glob.glob(os.path.join(directory, "legislature/*.yml"))) fname = f"{abbr}.csv" write_csv(person_files, jurisdiction_id, fname) if upload: s3.upload_file( fname, "data.openstates.org", f"people/current/{abbr}.csv", ExtraArgs={ "ContentType": "text/csv", "ACL": "public-read" }, ) click.secho( f"uploaded to data.openstates.org/people/current/{abbr}.csv", fg="green")
def match_ids(abbreviations): if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho("==== {} ====".format(abbr), bold=True) m = Matcher(abbr) for fname in glob.glob(os.path.join(get_data_dir(abbr), "people/*.yml")): with open(fname) as f: person = load_yaml(f) already_done = False for oid in person.get("other_identifiers", []): if oid["scheme"] == "legacy_openstates": already_done = True break if already_done: continue exact = m.match(person) if exact: if "other_identifiers" not in person: person["other_identifiers"] = [] for id in exact: person["other_identifiers"].append( {"scheme": "legacy_openstates", "identifier": id} ) dump_obj(person, filename=fname)
def match_ids(abbreviations): if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho('==== {} ===='.format(abbr), bold=True) m = Matcher(abbr) for fname in glob.glob(os.path.join(get_data_dir(abbr), 'people/*.yml')): with open(fname) as f: person = load_yaml(f) already_done = False for oid in person.get('other_identifiers', []): if oid['scheme'] == 'legacy_openstates': already_done = True break if already_done: continue exact = m.match(person) if exact: if 'other_identifiers' not in person: person['other_identifiers'] = [] for id in exact: person['other_identifiers'].append({ 'scheme': 'legacy_openstates', 'identifier': id }) dump_obj(person, filename=fname)
def to_database(abbreviations, purge, safe): """ Sync YAML files to DB. """ init_django() if not abbreviations: abbreviations = get_all_abbreviations() settings = get_settings() for abbr in abbreviations: click.secho('==== {} ===='.format(abbr), bold=True) directory = get_data_dir(abbr) jurisdiction_id = get_jurisdiction_id(abbr) person_files = (glob.glob(os.path.join(directory, 'people/*.yml')) + glob.glob(os.path.join(directory, 'retired/*.yml'))) committee_files = glob.glob(os.path.join(directory, 'organizations/*.yml')) if safe: click.secho('running in safe mode, no changes will be made', fg='magenta') state_settings = settings[abbr] try: with transaction.atomic(): create_posts(jurisdiction_id, state_settings) load_directory(person_files, 'person', jurisdiction_id, purge=purge) load_directory(committee_files, 'organization', jurisdiction_id, purge=purge) if safe: click.secho('ran in safe mode, no changes were made', fg='magenta') raise CancelTransaction() except CancelTransaction: pass
def sync_images(abbreviations, skip_existing): """ Download images and sync them to S3. <ABBR> can be provided to restrict to single state. """ if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: download_state_images(abbr, skip_existing)
def to_database(abbreviations, create, verbose, clear_old_roles): """ Sync YAML files to legacy MongoDB. """ if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho('==== {} ===='.format(abbr), bold=True) dir_to_mongo(abbr, create, clear_old_roles, verbose)
def to_csv(abbreviations): """ Sync YAML files to DB. """ if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho('==== {} ===='.format(abbr), bold=True) directory = get_data_dir(abbr) jurisdiction_id = get_jurisdiction_id(abbr) person_files = sorted(glob.glob(os.path.join(directory, 'people/*.yml'))) write_csv(person_files, jurisdiction_id, f"csv/{abbr}_legislators.csv")
def summarize(abbreviations, verbose, summary, municipal): """ Lint YAML files, optionally also providing a summary of state's data. <ABBR> can be provided to restrict linting to single state's files. """ if not abbreviations: abbreviations = get_all_abbreviations() summarizer = Summarizer() for abbr in abbreviations: summarizer.process_legislature(abbr) summarizer.print_summary()
def process_unmatched(abbreviations, dump, match, dry): if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: if match: if dry: click.secho("dry run, nothing will be saved", fg="blue") try: check_historical_matches(abbr, dry=dry) except AbortTransaction as e: click.secho(f"{e}\ntransaction aborted!", fg="red") if dump: archive_leg_to_csv(abbr)
def to_database(abbreviations, purge, safe): """ Sync YAML files to DB. """ init_django() create_parties() if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho("==== {} ====".format(abbr), bold=True) directory = get_data_dir(abbr) jurisdiction_id = get_jurisdiction_id(abbr) municipalities = load_municipalities(abbr) with transaction.atomic(): create_municipalities(municipalities) person_files = ( glob.glob(os.path.join(directory, "legislature/*.yml")) + glob.glob(os.path.join(directory, "executive/*.yml")) + glob.glob(os.path.join(directory, "municipalities/*.yml")) + glob.glob(os.path.join(directory, "retired/*.yml"))) committee_files = glob.glob( os.path.join(directory, "organizations/*.yml")) if safe: click.secho("running in safe mode, no changes will be made", fg="magenta") try: with transaction.atomic(): load_directory(person_files, "person", jurisdiction_id, purge=purge) load_directory(committee_files, "organization", jurisdiction_id, purge=purge) if safe: click.secho("ran in safe mode, no changes were made", fg="magenta") raise CancelTransaction() except CancelTransaction: sys.exit(1)
def lint(abbreviations, verbose, municipal, date): """ Lint YAML files. <ABBR> can be provided to restrict linting to single state's files. """ error_count = 0 if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho("==== {} ====".format(abbr), bold=True) error_count += process_dir(abbr, verbose, municipal, date) if error_count: click.secho(f"exiting with {error_count} errors", fg="red") sys.exit(99)
def manual_data(abbreviations, missing_id, filename, fields, other_identifiers): """ Import & Export Manual Data CSV Files """ if not abbreviations: abbreviations = get_all_abbreviations() if missing_id: click.secho( f'generating {filename} with all legislators missing {missing_id}') generate_template_csv(abbreviations, filename, missing_id=missing_id) if fields or other_identifiers: click.secho( f'loading {fields} and other_ids{other_identifiers} from {filename}' ) update_from_csv(filename, fields, other_identifiers)
def lint(abbreviations, verbose, summary): """ Lint YAML files, optionally also providing a summary of state's data. <ABBR> can be provided to restrict linting to single state's files. """ settings = get_settings() error_count = 0 if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho("==== {} ====".format(abbr), bold=True) error_count += process_dir(abbr, verbose, summary, settings) if error_count: click.secho(f"exiting with {error_count} errors", fg="red") sys.exit(99)
def lint(abbreviations, verbose, summary): """ Lint YAML files, optionally also providing a summary of state's data. <ABBR> can be provided to restrict linting to single state's files. """ settings_file = os.path.join(os.path.dirname(__file__), '../settings.yml') with open(settings_file) as f: settings = load_yaml(f) error_count = 0 if not abbreviations: abbreviations = get_all_abbreviations() for abbr in abbreviations: click.secho('==== {} ===='.format(abbr), bold=True) error_count += process_dir(abbr, verbose, summary, settings) if error_count: click.secho(f'exiting with {error_count} errors', fg='red') sys.exit(99)