Пример #1
0
def entrypoint(incoming, old, new, retirement):
    """
    Script to assist with merging legislator files.

    Can be used in two modes: incoming or file merge.

    Incoming mode analyzes incoming/ directory files (generated with to_yaml.py)
    and discovers identical & similar files to assist with merging.

    File merge mode merges two legislator files.
    """
    if incoming:
        abbr = incoming
        existing_people = []
        for filename in glob.glob(
            os.path.join(get_data_dir(abbr), "legislature/*.yml")
        ) + glob.glob(os.path.join(get_data_dir(abbr), "retired/*.yml")):
            with open(filename) as f:
                existing_people.append(load_yaml(f))

        new_people = []
        incoming_dir = get_data_dir(abbr).replace("data", "incoming")
        for filename in glob.glob(os.path.join(incoming_dir, "legislature/*.yml")):
            with open(filename) as f:
                new_people.append(load_yaml(f))

        click.secho(
            f"analyzing {len(existing_people)} existing people and {len(new_people)} incoming"
        )

        unmatched = incoming_merge(abbr, existing_people, new_people, retirement)
        click.secho(f"{len(unmatched)} people were unmatched")

    if old and new:
        with open(old) as f:
            old_obj = load_yaml(f)
        with open(new) as f:
            new_obj = load_yaml(f)
        keep_both_ids = True
        if "incoming" in new:
            keep_both_ids = False
        merged = merge_people(old_obj, new_obj, keep_both_ids=keep_both_ids)
        dump_obj(merged, filename=old)
        os.remove(new)
        click.secho(f"merged files into {old}\ndeleted {new}\ncheck git diff before committing")
    def test_out_threshold(self, activations):
        stat_for_threshold = {}
        pred_for_threshold = {}
        for threshold in [0.5, 0.52, 0.55, 0.6]:
            preds, stat = self.kernel.predict_rle_from_acts_with_threshold(
                activations, threshold)

            stat_for_threshold[threshold] = stat
            pred_for_threshold[threshold] = preds
            # my_trace()
            # print(self.metric_cal(preds))

        utils.dump_obj(stat_for_threshold,
                       "stat_for_threshold.pkl",
                       force=True)
        utils.dump_obj(pred_for_threshold,
                       "pred_for_threshold.pkl",
                       force=True)
Пример #3
0
def merge_scraped_coms(abbr, old, new):
    old_by_key = {(c["parent"], c["name"]): c for c in old}
    for c in new:
        old_com = old_by_key.pop((c["parent"], c["name"]), None)
        if old_com:
            old_com["sources"] = c["sources"]
            old_com["memberships"] = c["memberships"]
            fname = os.path.join(get_data_dir(abbr), "organizations",
                                 get_filename(old_com))
            dump_obj(old_com, filename=fname)
            click.secho(f"updated {fname}")
            os.remove(f"incoming/{abbr}/organizations/{get_filename(c)}")
        else:
            copy_new_incoming(abbr, c, "organizations")

    # remove unmatched old committees
    for com in old_by_key.values():
        fn = get_filename(com)
        click.secho(f"removing {fn}", fg="yellow")
        os.remove(os.path.join(get_data_dir(abbr), "organizations", fn))
Пример #4
0
def update_from_csv(filename, fields, other_identifiers):
    with open(filename) as f:
        for line in csv.DictReader(f):
            yaml_filename = find_by_id(line['id'])
            with open(yaml_filename) as yf:
                person = load_yaml(yf)

            for field in fields:
                person[field] = line[field]

            if other_identifiers and 'other_identifiers' not in person:
                person['other_identifiers'] = []
            for scheme in other_identifiers:
                # TODO: check for duplicates among what was already there
                for id in line[scheme].split(';'):
                    if id:
                        person['other_identifiers'].append({
                            'scheme': scheme,
                            'identifier': id
                        })
            dump_obj(person, filename=yaml_filename)
Пример #5
0
def update_from_csv(filename, fields, other_identifiers):
    with open(filename) as f:
        for line in csv.DictReader(f):
            yaml_filename = find_file(line["id"])
            with open(yaml_filename) as yf:
                person = load_yaml(yf)

            for field in fields:
                person[field] = line[field]

            if other_identifiers and "other_identifiers" not in person:
                person["other_identifiers"] = []
            for scheme in other_identifiers:
                # TODO: check for duplicates among what was already there
                for id in line[scheme].split(";"):
                    if id:
                        person["other_identifiers"].append({
                            "scheme": scheme,
                            "identifier": id
                        })
            dump_obj(person, filename=yaml_filename)
Пример #6
0
def create_person(fname, lname, name, state, district, party, rtype, url,
                  image, email, start_date):
    role = {
        "type": rtype,
        "district": district,
        "jurisdiction": get_jurisdiction_id(state),
        "start_date": start_date,
    }
    if rtype in ("upper", "lower", "legislature"):
        directory = "legislature"
    elif rtype in ("mayor", ):
        directory = "municipalities"
        role.pop("district")
    elif rtype in ("governor", "lt_governor"):
        directory = "executive"
        role.pop("district")
    else:
        raise ValueError(f"unknown role type {rtype}")

    person = OrderedDict({
        "id": ocd_uuid("person"),
        "name": name or f"{fname} {lname}",
        "given_name": fname,
        "family_name": lname,
        "image": image,
        "email": email,
        "party": [{
            "name": party
        }],
        "roles": [role],
        "links": [{
            "url": url
        }],
        "sources": [{
            "url": url
        }],
    })

    output_dir = get_data_dir(state)
    dump_obj(person, output_dir=os.path.join(output_dir, directory))
Пример #7
0
def process_dir(input_dir, output_dir, jurisdiction_id):
    person_memberships = defaultdict(list)

    # collect memberships
    for filename in glob.glob(os.path.join(input_dir, "membership_*.json")):
        with open(filename) as f:
            membership = json.load(f)

        if membership["person_id"].startswith("~"):
            raise ValueError(membership)
        person_memberships[membership["person_id"]].append(membership)

    # process people
    for filename in glob.glob(os.path.join(input_dir, "person_*.json")):
        with open(filename) as f:
            person = json.load(f)

        scrape_id = person["_id"]
        person["memberships"] = person_memberships[scrape_id]
        person = process_person(person, jurisdiction_id)

        dump_obj(person, output_dir=os.path.join(output_dir, "legislature"))
Пример #8
0
def retire(end_date, filename, reason, death):
    """
    Retire a legislator, given END_DATE and FILENAME.

    Will set end_date on active roles.
    """
    # end the person's active roles & re-save
    with open(filename) as f:
        person = load_yaml(f)
    if death:
        reason = "Deceased"
    person, num = retire_person(person, end_date, reason, death)
    dump_obj(person, filename=filename)

    if num == 0:
        click.secho("no active roles to retire", fg="red")
    elif num == 1:
        click.secho("retired person")
    else:
        click.secho(f"retired person from {num} roles")

    move_file(filename)
Пример #9
0
def create_person(fname, lname, name, state, district, party, rtype, url, image,
                  start_date):
    person = OrderedDict({
        'id': ocd_uuid('person'),
        'name': name or f'{fname} {lname}',
        'given_name': fname,
        'family_name': lname,
        'image': image,
        'party': [{'name': party}],
        'roles': [
            {'type': rtype,
             'district': district,
             'jurisdiction': get_jurisdiction_id(state),
             'start_date': start_date,
             }
        ],
        'links': [{'url': url}],
        'sources': [{'url': url}],
    })

    output_dir = get_data_dir(state)
    dump_obj(person, output_dir=os.path.join(output_dir, 'people'))
Пример #10
0
def create_committee(*, name, state, parent, url):
    members = []
    click.echo("Enter members, enter a blank member to stop.")
    while True:
        mname = click.prompt("Member name ('done' to stop)")
        if mname == "done":
            break
        members.append({"name": mname})
    com = OrderedDict(
        {
            "id": ocd_uuid("organization"),
            "name": name,
            "classification": "committee",
            "jurisdiction": get_jurisdiction_id(state),
            "parent": parent,
            "sources": [{"url": url}],
            "links": [{"url": url}],
            "memberships": members,
        }
    )

    output_dir = get_data_dir(state)
    dump_obj(com, output_dir=os.path.join(output_dir, "organizations"))
Пример #11
0
def directory_merge(abbr, existing_people, new_people, remove_identical, copy_new, interactive):
    perfect_matched = set()
    matches = []
    id_to_new_filename = {}

    for new in new_people:
        best_similarity = 0
        best_match = None

        id_to_new_filename[new["id"]] = get_filename(new)

        for existing in existing_people:
            similarity = calculate_similarity(existing, new)
            if similarity > 0.999:
                perfect_matched.add(new["id"])
                continue

            if similarity > best_similarity:
                best_similarity = similarity
                best_match = existing

        matches.append((best_similarity, new, best_match))

    click.secho(f"{len(perfect_matched)} were perfect matches", fg="green")

    if remove_identical:
        for id in perfect_matched:
            fname = id_to_new_filename[id]
            fname = f"incoming/{abbr}/people/{fname}".format(fname)
            click.secho("removing " + fname, fg="red")
            os.remove(fname)

    unmatched = set(p["id"] for p in new_people) - perfect_matched

    for sim, new, old in sorted(matches, reverse=True, key=lambda x: x[0]):
        if sim < 0.001:
            break
        unmatched.remove(new["id"])
        oldfname = "data/{}/people/{}".format(abbr, get_filename(old))
        newfname = "incoming/{}/people/{}".format(abbr, get_filename(new))
        click.secho(" {:.2f} {} {}".format(sim, oldfname, newfname), fg="yellow")
        if interactive:
            differences = compare_objects(old, new)

            for difference in differences:
                click.echo("    " + str(difference))
            ch = "~"
            while ch not in "onsa":
                click.secho("Keep (o)ld? Keep (n)ew? (s)kip? (a)bort?", bold=True)
                ch = click.getchar()
                if ch == "a":
                    raise SystemExit(-1)
                elif ch == "o":
                    keep_on_conflict = "old"
                elif ch == "n":
                    keep_on_conflict = "new"
                elif ch == "s":
                    continue
                merged = merge_people(
                    old, new, keep_both_ids=False, keep_on_conflict=keep_on_conflict
                )
                dump_obj(merged, filename=oldfname)
                os.remove(newfname)

    click.secho(f"{len(unmatched)} were unmatched")
    for id in unmatched:
        fname = id_to_new_filename[id]
        oldfname = f"incoming/{abbr}/people/{fname}".format(fname)
        if copy_new:
            newfname = f"data/{abbr}/people/{fname}".format(fname)
            click.secho(f"moving {oldfname} to {newfname}", fg="yellow")
            os.rename(oldfname, newfname)
Пример #12
0
def train():
    algo = sys.argv[1]
    print('[INFO] Chosen algo is:', algo)
    print('[INFO] Loading data')
    train_data, train_labels, train_query_lens = load_data(train_file)
    print('[INFO] Training set loaded')
    valid_data, valid_labels, valid_query_lens = load_data(valid_file)
    print('[INFO] Validation set loaded')
    test_data, test_labels, test_query_lens = load_data(test_file)
    print('[INFO] Testing set loaded')

    eval_set = [(train_data, train_labels),
                (valid_data, valid_labels),
                (test_data, test_labels)]
    eval_group = [train_query_lens, valid_query_lens, test_query_lens]
    eval_names = ['train', 'valid', 'test']
    params = {
        'objective': 'rank_xendcg',
        'learning_rate': 0.05,
        'num_leaves': 64,
        'metric': ['ndcg'],
        'ndcg_eval_at': 10,
        'force_row_wise': True,
        'max_bin': 127
    }

    strategies = ('fixed', 'random_iter', 'random_query', 'decay',
                  'false_positives', 'equal_size', 'delta', 'limit_resample')
    n_estimators = 1000
    early_stopping_rounds = 100
    n_iter_sample = 1
    verbose = 5
    print('[INFO] Starting training')

    if algo in ('lgbm base', 'lgbm goss', 'lambdarank'):
        if algo == 'lambdarank':
            params['objective'] = 'lambdarank'
        elif algo == 'lgbm goss':
            params['boosting'] = 'goss'
        train_set = lgb.Dataset(train_data, label=train_labels, group=train_query_lens)
        eval_results = {}
        valid_sets = [train_set]
        for i, data in enumerate(eval_set[1:]):
            ds = lgb.Dataset(data[0], data[1], group=eval_group[1:][i], reference=train_set)
            valid_sets.append(ds)
        model = lgb.train(params, train_set, num_boost_round=n_estimators,
                          valid_sets=valid_sets, valid_names=eval_names,
                          verbose_eval=verbose, evals_result=eval_results,
                          early_stopping_rounds=early_stopping_rounds)
        dump_obj(eval_results, results_path, algo)
    elif algo in strategies:
        if algo == 'fixed':
            print('[INFO] Starting fitting, tuning')
            eval_results = {}
            for p in (0.5, 0.25, 0.1, 0.05, 0.01):
                print('[INFO] p value:', p)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, p=p, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[p] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)
        elif algo in ('equal_size', 'false_positives'):
            print('[INFO] Starting fitting, no tuning')
            model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, method=algo)
            model.fit(train_data, train_labels, train_query_lens,
                      eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                      verbose=verbose, early_stopping_rounds=early_stopping_rounds)
            eval_results = model.get_eval_result()
            dump_obj(eval_results, results_path, algo)
        elif algo.startswith('random'):
            print('[INFO] Starting fitting, with tuning')
            eval_results = {}
            for max_p in (0.5, 0.25, 0.1, 0.05, 0.02):
                print('[INFO] max_p value:', max_p)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, max_p=max_p, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[max_p] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)
        elif algo == 'delta':
            print('[INFO] Starting fitting, with tuning')
            eval_results = {}
            for delta_pos in (3, 5, 10):
                delta = 0.25
                print('[INFO] delta_pos:', delta_pos)
                print('[INFO] delta:', delta)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample,
                                  delta_pos=delta_pos, delta=delta, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[(delta_pos, delta)] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)

            for delta in (1, 0.5, 0.1):
                delta_pos = 5
                print('[INFO] delta_pos:', delta_pos)
                print('[INFO] delta:', delta)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample,
                                  delta_pos=delta_pos, delta=delta, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[(delta_pos, delta)] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)

        elif algo == 'decay':
            print('[INFO] Starting fitting, with tuning')
            eval_results = {}
            for p in (0.5, 0.75):
                k = 0.985
                print('[INFO] p:', p)
                print('[INFO] k:', k)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample,
                                  p=p, k_factor=k, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[(p, k)] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)
            for k in (0.98, 0.99):
                p = 0.5
                print('[INFO] p:', p)
                print('[INFO] k:', k)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample,
                                  p=p, k_factor=k, method=algo)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[(p, k)] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)

        elif algo == 'limit_resample':
            print('[INFO] Starting fitting, with tuning')
            eval_results = {}
            algo = 'limit_resample_rndq'
            for max_resample in [250, 500, 0.5, 0.75]:
                print('[INFO] max_resample', max_resample)
                model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample,
                                  max_resample=max_resample, method='random_query', max_p=0.05)
                model.fit(train_data, train_labels, train_query_lens,
                          eval_set=eval_set, eval_group=eval_group, eval_names=eval_names,
                          verbose=verbose, early_stopping_rounds=early_stopping_rounds)
                eval_results[max_resample] = model.get_eval_result()
                dump_obj(eval_results, results_path, algo)
    else:
        raise ValueError('algo parameter is wrong')
    print('FITTING OVER!')
    dump_obj(eval_results, results_path, algo)
    print('dumped results')
    model.save_model(os.path.join(models_path, algo + '.txt'))
    print('saved model')
Пример #13
0
 def save_object(self, obj, output_dir):
     dump_obj(obj.to_dict(), output_dir=output_dir)
Пример #14
0
 def save_eval_result(self, path, filename=None):
     if filename is not None:
         dump_obj(self.get_eval_result(), path, filename)
     else:
         dump_obj(path, 'selgb-' + self.method)
Пример #15
0
def process_old_file(filename, metadata):
    data = json.load(open(filename))
    if data["leg_id"] != data["_id"]:
        raise Exception()
    if data.get("active"):
        print(data)
        return
        raise Exception()
    if data.get("roles", []):
        raise Exception()

    # remove unused fields
    for k in (
            "_yearly_contributions",
            "nimsp_candidate_id",
            "votesmart_id",
            "_contributions_start_year",
            "_scraped_name",
            "_total_contributions",
            "transparencydata_id",
            "_locked_fields",
            "level",
            "nimsp_id",
            "_type",
            "country",
            "updated_at",
            "_id",
            "active",
            "roles",
            "offices",
            "notice",
            "nickname",
            "district",
            "party",
            "chamber",
            "csrfmiddlewaretoken",
            "email",
            "created_at",
            "office_address",
            "office_phone",
            "occupation",
            "_guid",
            "_code",
            "all_ids",
            "2008-2011",
    ):
        data.pop(k, None)

    # remove plus fields
    for k in [k for k in data.keys() if k.startswith("+")]:
        data.pop(k)

    leg_obj = OrderedDict({"id": ocd_uuid("person")})

    leg_obj["name"] = data.pop("full_name")
    first_name = data.pop("first_name")
    middle_name = data.pop("middle_name")
    last_name = data.pop("last_name")
    suffixes = data.pop("suffixes", "")
    suffix = data.pop("suffix", "")
    if first_name:
        leg_obj["given_name"] = first_name
    if last_name:
        leg_obj["family_name"] = last_name
    if middle_name:
        leg_obj["middle_name"] = middle_name
    if suffix:
        leg_obj["suffix"] = suffixes or suffix

    state = data.pop("state")
    jurisdiction_id = get_jurisdiction_id(state)

    # pull useful fields
    old_roles = data.pop("old_roles", {})
    parties = set()
    new_roles = []
    for session, roles in old_roles.items():
        for role in roles:
            if role["type"] in (
                    "committee member",
                    "Minority Floor Leader",
                    "Majority Floor Leader",
                    "Majority Caucus Chair",
                    "Minority Caucus Chair",
                    "Speaker Pro Tem",
                    "President Pro Tem",
                    "Senate President",
                    "Speaker of the House",
                    "Minority Whip",
                    "Majority Whip",
                    "Lt. Governor",
            ) or role.get("committee"):
                continue
            parties.add(role["party"])
            new_roles.append({
                "term": role["term"],
                "chamber": role["chamber"],
                "district": role["district"]
            })

    leg_obj["party"] = [{"name": party} for party in parties]

    # add these to leg_obj
    roles = terms_to_roles(new_roles, metadata["terms"])
    formatted_roles = []
    for chamber, district, start, end in roles:
        formatted_roles.append(
            OrderedDict({
                "district": district,
                "jurisdiction": jurisdiction_id,
                "type": chamber,
                "start_date": f"{start}-01-01",
                "end_date": f"{end}-12-31",
            }))
    leg_obj["roles"] = formatted_roles

    all_ids = data.pop("_all_ids")
    leg_id = data.pop("leg_id")
    if leg_id not in all_ids:
        all_ids.append(leg_id)

    image = data.pop("photo_url", "")
    if image:
        leg_obj["image"] = image
    url = data.pop("url", "")
    if url:
        leg_obj["links"] = [{"url": url}]
    leg_obj["sources"] = data.pop("sources")
    leg_obj["other_identifiers"] = [{
        "identifier": id_,
        "scheme": "legacy_openstates"
    } for id_ in all_ids]

    if data:
        print(data)
        raise Exception()

    output_dir = get_data_dir(state)
    dump_obj(leg_obj, output_dir=os.path.join(output_dir, "retired"))
Пример #16
0
 def save(self):
     dump_obj(self.data, filename=self.filename)
Пример #17
0
 def prepare_train_dev_data(self):
     data = utils.get_obj_or_dump("data0.bin")
     if data is None:
         data = self._get_fold_data(0)
         utils.dump_obj(data, "data0.bin")
     self.data0 = data
Пример #18
0
from utils import load_yaml, dump_obj
import sys

for fn in sys.argv[1:]:
    data = load_yaml(open(fn))
    data.pop("contact_details")
    dump_obj(data, filename=fn)
Пример #19
0
import csv
from utils import find_file, load_yaml, dump_obj

with open("nyleg.csv") as f:
    for row in csv.DictReader(f):
        os_id = row["osid"]
        fname = find_file(os_id)
        with open(fname) as lf:
            obj = load_yaml(lf)
            for cd in obj["contact_details"]:
                if cd["note"] == "Capitol Office":
                    cd["voice"] = row["Capitol Phone"].replace("(",
                                                               "").replace(
                                                                   ") ", "-")
                if cd["note"] == "District Office":
                    cd["voice"] = row["District Phone"].replace("(",
                                                                "").replace(
                                                                    ") ", "-")
            obj["email"] = row["email"]
            if row["twitter"] and "ids" not in obj:
                obj["ids"] = {"twitter": row["twitter"].replace("@", "")}
        dump_obj(obj, filename=fname)
Пример #20
0
    new_reward_indices = defaultdict(list)
    for data_i in range(input_meta['size']):
        if is_valid(input_episode_file.actions[data_i]):
            new_reward_indices[int(input_episode_file.rewards[data_i])].append(
                len(valid_actions_indices))
            valid_actions_indices.append(data_i)

    if len(valid_actions_indices) > 0:
        output_file_dir = f'{output_data_files_dir}/{inter_dir}'
        Path(output_file_dir).mkdir(parents=True, exist_ok=True)

        output_meta_file = f'{output_file_dir}/{file_name}.meta'
        dump_obj(
            {
                'max_size': len(valid_actions_indices),
                'size': len(valid_actions_indices),
                'example': input_meta['example'],
                'reward_indices': new_reward_indices
            }, output_meta_file)
        output_episode_file = EpisodeFile(f'{output_file_dir}/{file_name}',
                                          len(valid_actions_indices),
                                          input_meta['example'], 'w+')
        for data_i in range(len(valid_actions_indices)):
            output_episode_file.set(
                input_episode_file.get(valid_actions_indices[data_i]), data_i)

        output_episode_file.flush()
        output_episode_file.close()

    input_episode_file.close()
Пример #21
0
    def render(self):
        read_game = self.env.read_game
        frame = self.env.frame
        #if not read_game.is_in_game or not keys["KEY_INSPECTOR"]: return
        if keys["KEY_INSPECTOR"]: 
            for i in range(PLAYERMAX):
                print "Player #%i: %s" % (i, read_game.player[i].name)
            for idx in range(ENTITIESMAX):
                e = read_game.mw2_entity.arr[idx]
                spot = read_game.world_to_screen(e.pos)
                if spot:
                    cur_angle_dist = self.sq(spot.x - read_game.screen_center_x, spot.y - read_game.screen_center_y)
                    if cur_angle_dist < 50 * 50:      # not too far from center
                        s = "[idx=%i(%x), typ=%i, weap=%i]" % (idx, idx, e.type, e.WeaponNum)
                        draw_string_center(frame.font, spot.x, spot.y, 0xFFFFFFFF, s)
                        print s
                        print dump_obj(e)
#                        if e.owner_scr1 >= 0 and e.owner_scr1 < 2047:
#                            ee = read_game.mw2_entity.arr[e.owner_scr1]
#                            print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr1, e.owner_scr1, ee.type, ee.WeaponNum)
#                            print dump_obj(ee)
#                        if e.owner_scr2 >= 0 and e.owner_scr2 < 2047:
#                            ee = read_game.mw2_entity.arr[e.owner_scr2]
#                            print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr2, e.owner_scr2, ee.type, ee.WeaponNum)
#                            print dump_obj(ee)
                        #=======================================================
                        # if e.type == ET_EXPLOSIVE:
                        #    print "dump explo"
                        #    print dump_obj(e)
                        #=======================================================
        
        if keys["KEY_INSPECT_POS"]:                 # print my player's position
            pos = read_game.mw2_mypos
            print "pos= (%.2f, %.2f, %.2f)" % (pos.x, pos.y, pos.z)
            ang = read_game.view_angles
            print "angles= (%.2f, %.2f, %.2f)" % (ang.x, ang.y, ang.z)
        
        if keys["KEY_INSPECT_DUMP"]:                # dump some memory structures
            #mem = dumped()
            #read_game._RPM(0x6727F13, mem)
            #read_game._RPM(0x6727F10, mem)
            #print dump_obj(mem)
            #read_game._RPM(0x64DA350, mem)
            for i in range(8):
                print "player #%i" % i
                print dump_obj(read_game.mw2_entity.arr[i])
                print "client info"
                print dump_obj(read_game.mw2_clientinfo.arr[i])
            #del mem
        
        if keys["KEY_INSPECT_DUMP_PLAYERS"]:
            for i in range(PLAYERMAX):
                print "Player #%i: %s" % (i, read_game.player[i].name)
            #===================================================================
            # print "refdef"
            # print dump_obj(read_game.mw2_refdef)
            # print "viewy"
            # print dump_obj(read_game.mw2_viewy)
            #===================================================================
        
        if False and read_game.is_in_game:
            print "time=%8i, pos2=%.1f %.1f %.1f, pos3=%.1f %.1f %.1f" % (read_game.game_time,
                                                                         read_game.my_player.pos2.x,
                                                                         read_game.my_player.pos2.y,
                                                                         read_game.my_player.pos2.z,
                                                                         read_game.my_player.pos3.x,
                                                                         read_game.my_player.pos3.y,
                                                                         read_game.my_player.pos3.z,
                                                                         )
            
        if False and read_game.is_in_game:
            print "time=%8i, motion=%.1f %.1f %.1f, abs=%.1f" % (read_game.game_time,
                                                                         read_game.my_player.motion.x,
                                                                         read_game.my_player.motion.y,
                                                                         read_game.my_player.motion.z,
                                                                         read_game.my_player.motion.length()
                                                                         )
        if False:
            for e in read_game.mw2_entity.arr:
                if e.type == ET_EXPLOSIVE and e.alive & ALIVE_FLAG:
                    print "time=%8i, pos=%.1f %.1f %.1f" % (read_game.game_time,
                                                                                 e.pos.x,
                                                                                 e.pos.y,
                                                                                 e.pos.z,
                                                                                 )
        if False and read_game.is_in_game:
            print "kills=%i, deaths=%i" % (read_game.kills, read_game.deaths)
                                          
        if False and read_game.is_in_game:
            print "weapon=%i" % read_game.my_player.weapon_num
            
        if keys["KEY_INSPECT_MOVE_MOUSE"]:
            self.env.sched.new(self.move_sequence())
Пример #22
0
def fix_offices(filename):
    with open(filename) as file:
        data = load_yaml(file)

    # office_type -> key -> set of values seen
    all_details = defaultdict(lambda: defaultdict(set))
    email = set()

    for office in data.get("contact_details", []):
        for key, value in office.items():
            if key == "note":
                continue
            if key == "email":
                email.add(value)
            else:
                otype = office["note"]
                # # Florida fixes
                # if "/fl" in filename and key == "address":
                #     value = "; ".join([v.strip() for v in value.split(";")])
                # if "/fl/" in filename and key == "voice" and not value.startswith("850-"):
                #     otype = "District Office"
                # if "/fl/" in filename and key == "address" and "32399-1300" not in value:
                #     otype = "District Office"
                all_details[otype][key].add(value)

    reformatted = defaultdict(dict)
    error = False

    for office_type, office_details in all_details.items():
        for ctype, values in office_details.items():
            if len(values) == 1:
                reformatted[office_type][ctype] = values.pop()
            else:
                click.secho(
                    f"multiple values for {office_type} {ctype}: {values}",
                    fg="red")
                error = True

    if len(email) == 1:
        email = email.pop()
    elif len(email) > 1:
        emails = list(email)
        if "leg.state.vt.us" in emails[0]:
            email = emails[0]
        elif "leg.state.vt.us" in emails[1]:
            email = emails[0]
        elif emails[0].lower() == emails[1].lower():
            email = emails[0]
        else:
            click.secho(f"multiple values for email: {email}", fg="red")
            error = True

    if not error:
        if email:
            data["email"] = email
        data["contact_details"] = []
        for otype in ("Capitol Office", "District Office", "Primary Office"):
            if otype in reformatted:
                data["contact_details"].append(
                    OrderedDict(note=otype, **reformatted[otype]))
        # click.echo(f"rewrite contact details as {data['contact_details']}")
        dump_obj(data, filename=filename)
Пример #23
0
def make_mayors(state_to_import):
    all_municipalities = []
    os.makedirs(f"data/{state_to_import}/municipalities")
    with open("mayors.csv") as f:
        data = csv.DictReader(f)
        for line in data:
            state = line["Postal Code"].lower()
            if state != state_to_import:
                continue
            city = line["City"].strip()
            given_name = line["First"].strip()
            family_name = line["Last"].strip()
            name = f"{given_name} {family_name}"
            email = line["Email"].strip()
            webform = line["Web Form"].strip()
            phone = reformat_phone_number(line["Phone"])
            fax = reformat_phone_number(line["Fax"])
            address1 = line["Address 1"].strip()
            address2 = line["Address 2"].strip()
            zipcode = line["Zip Code"].strip()
            if line["Zip Plus 4"].strip():
                zipcode += "-" + line["Zip Plus 4"].strip()
            if not line["Term End"]:
                term_end = "2021-01-01"  # temporary term end date for the unknowns
            else:
                term_end = datetime.datetime.strptime(
                    line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d")

            if address2:
                full_address = f"{address1};{address2};{city}, {state.upper()} {zipcode}"
            else:
                full_address = f"{address1};{city}, {state.upper()} {zipcode}"

            contact = {"note": "Primary Office"}
            if full_address:
                contact["address"] = full_address
            if fax:
                contact["fax"] = fax
            if phone:
                contact["voice"] = phone
            if email:
                contact["email"] = email

            jid = city_to_jurisdiction(city, state)
            all_municipalities.append(OrderedDict({"name": city, "id": jid}))

            obj = OrderedDict({
                "id":
                ocd_uuid("person"),
                "name":
                name,
                "given_name":
                given_name,
                "family_name":
                family_name,
                "roles": [{
                    "jurisdiction": jid,
                    "type": "mayor",
                    "end_date": term_end
                }],
                "contact_details": [contact],
                "sources": [{
                    "url": webform
                }] if webform else [],
                "links": [{
                    "url": webform
                }] if webform else [],
            })
            dump_obj(obj, output_dir=f"data/{state}/municipalities/")
        dump_obj(all_municipalities,
                 filename=f"data/{state_to_import}/municipalities.yml")
Пример #24
0
import glob
from utils import load_yaml, dump_obj, role_is_active

for file in glob.glob("data/ca/legislature/*.yml"):
    with open(file) as inf:
        data = load_yaml(inf)
        for role in data["roles"]:
            if role_is_active(role):
                letter = "A" if role["type"] == "lower" else "S"
                district = int(role["district"])
        url = f"https://lcmspubcontact.lc.ca.gov/PublicLCMS/ContactPopup.php?district={letter}D{district:02d}&inframe=N"
        data["links"].append({"url": url, "note": "Contact Form"},)
        dump_obj(data, filename=file)
Пример #25
0
def make_governors():
    with open("governors.csv") as f:
        data = csv.DictReader(f)
        for line in data:
            state = line["state"]
            name = line["name"]
            given_name = line["first_name"]
            family_name = line["last_name"]
            party = line["party"]
            birth_date = line["birth_date"]
            start_date = line["start_date"]
            end_date = line["end_date"]
            website = line["website"]
            twitter = line["twitter"]
            webform = line["webform"]

            full_address = "; ".join(
                [n.strip() for n in line["address"].splitlines()])
            phone = line["phone"]
            email = line["email"]
            fax = line["fax"]

            contact = {"note": "Capitol Office"}
            if full_address:
                contact["address"] = full_address
            if fax:
                contact["fax"] = reformat_phone_number(fax)
            if phone:
                contact["voice"] = reformat_phone_number(phone)
            if email:
                contact["email"] = email

            ids = {}
            if twitter:
                ids["twitter"] = twitter

            jid = metadata.lookup(name=state).jurisdiction_id
            abbr = metadata.lookup(name=state).abbr.lower()

            obj = OrderedDict({
                "id":
                ocd_uuid("person"),
                "name":
                name,
                "given_name":
                given_name,
                "family_name":
                family_name,
                "birth_date":
                birth_date,
                "party": [{
                    "name": party
                }],
                "roles": [{
                    "jurisdiction": jid,
                    "type": "governor",
                    "start_date": start_date,
                    "end_date": end_date,
                }],
                "contact_details": [contact],
                "ids":
                ids,
                "sources": [{
                    "url": website
                }],
                "links": [{
                    "url": website
                }, {
                    "url": webform,
                    "note": "webform"
                }],
            })
            outdir = f"data/{abbr}/executive/"
            os.makedirs(outdir)
            dump_obj(obj, output_dir=outdir)
Пример #26
0
def make_ceos():
    with open("ceo.csv") as f:
        data = csv.DictReader(f)
        for line in data:
            state = line["State"].strip()
            given_name = line["First"]
            family_name = line["Last"]
            name = f"{given_name} {family_name}"
            role = line["Role"].strip().lower()
            addr1 = line["Address 1"]
            addr2 = line["Address 2"]
            city = line["City"]
            state_abbr = line["Postal Code"]
            zip5 = line["Zip Code"]
            zip4 = line["Zip Plus 4"]
            phone = line["Phone"]
            email = line["Email"]
            fax = line["Fax"]
            contact_form = line["Contact Form"]
            source = line["Source"]
            twitter = line["Twitter"]
            party = line["Party"]

            if party == "R":
                party = "Republican"
            elif party == "D":
                party = "Democratic"
            else:
                party = "Independent"

            if role != "secretary of state":
                role = "chief election officer"

            full_address = "; ".join([addr1, addr2, f"{city}, {state_abbr} {zip5}-{zip4}"])

            contact = {"note": "Capitol Office"}
            contact["address"] = full_address
            if fax:
                contact["fax"] = reformat_phone_number(fax)
            if phone:
                contact["voice"] = reformat_phone_number(phone)
            if email:
                contact["email"] = email

            ids = {}
            if twitter:
                ids["twitter"] = twitter

            try:
                jid = metadata.lookup(name=state).jurisdiction_id
            except KeyError:
                continue
            abbr = metadata.lookup(name=state).abbr.lower()

            links = [{"url": source}]
            if contact_form:
                links.append({"url": contact_form, "note": "webform"})
            obj = OrderedDict(
                {
                    "id": ocd_uuid("person"),
                    "name": name,
                    "given_name": given_name,
                    "family_name": family_name,
                    "roles": [
                        {
                            "jurisdiction": jid,
                            "type": role.strip().lower(),
                            "end_date": "2021-12-31",
                        },
                    ],
                    "contact_details": [contact],
                    "ids": ids,
                    "sources": [{"url": source}],
                    "links": links,
                    "party": [{"name": party}],
                }
            )
            outdir = f"data/{abbr}/executive/"
            # os.makedirs(outdir)
            dump_obj(obj, output_dir=outdir)
Пример #27
0
    def render(self):
        if not DEBUG:   return
        
        read_game = self.env.read_game
        frame = self.env.frame
        #if not read_game.is_in_game or not keys["KEY_INSPECTOR"]: return
        if keys["KEY_INSPECTOR"]: 
#            for i in range(PLAYERMAX):
#                print "Player #%i: %s" % (i, read_game.player[i].name)
            for idx in range(ENTITIESMAX):
                e = read_game.cod7_entity.arr[idx]
                spot = read_game.world_to_screen(e.pos)
                if spot:
                    cur_angle_dist = self.sq(spot.x - read_game.screen_center_x, spot.y - read_game.screen_center_y)
                    if cur_angle_dist < 50 * 50:      # not too far from center
                        s = "[idx=%i(%x), typ=%i, weap=%i]" % (idx, idx, e.type, e.weapon)
                        draw_string_center(frame.font, spot.x, spot.y, 0xFFFFFFFF, s)
                        print s
                        #print dump_obj(e)
#                        if e.owner_scr1 >= 0 and e.owner_scr1 < 2047:
#                            ee = read_game.mw2_entity.arr[e.owner_scr1]
#                            print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr1, e.owner_scr1, ee.type, ee.WeaponNum)
#                            print dump_obj(ee)
#                        if e.owner_scr2 >= 0 and e.owner_scr2 < 2047:
#                            ee = read_game.mw2_entity.arr[e.owner_scr2]
#                            print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr2, e.owner_scr2, ee.type, ee.WeaponNum)
#                            print dump_obj(ee)
                        #=======================================================
                        # if e.type == ET_EXPLOSIVE:
                        #    print "dump explo"
                        #    print dump_obj(e)
                        #=======================================================
        
        if keys["KEY_INSPECT_POS"]:                 # print my player's position NUMPAD1
            pos = read_game.my_pos
            ang = read_game.view_angles
            print "time=%i ticks=%i perk=%i pos=(%.2f, %.2f, %.2f) angles=(%.2f, %.2f, %.2f)" % (read_game.game_time, self.env.ticks, read_game.my_player.perk,
                                                                                           pos.x, pos.y, pos.z, ang.x, ang.y, ang.z)
        
        if keys["KEY_INSPECT_DUMP"]:                # dump some memory structures NUMPAD9
            #mem = dumped()
            #read_game._RPM(0x6727F13, mem)
            #read_game._RPM(0x6727F10, mem)
            #print dump_obj(mem)
            #read_game._RPM(0x64DA350, mem)
            i = 0
            print "player #%i" % i
            print dump_obj(read_game.cod7_entity.arr[i])
            print "client info"
            print dump_obj(read_game.cod7_clientinfo.arr[i])
            #del mem
            
        if keys["KEY_INSPECT_DUMP_CG"]:        # NUMPAD 8
            #i = read_game.local_client_num
            #print "Current player, pose=%x, isalive=%x|%x" % (read_game.cod7_clientinfo.arr[i].pose, read_game.cod7_entity.arr[i].isalive, read_game.cod7_entity.arr[i].isalive2)
            #print dump_obj(read_game.cgs)
            mem = dumped()
            read_game._RPM(self.env.offsets.RXCD_T, mem)
            print dump_obj(mem)
            
        if keys["KEY_INSPECT_DUMP_PLAYERS"]:        # NUMPAD 7
            print "local_client=%i" % read_game.local_client_num
            for i in range(PLAYERMAX):
                print "Player #%i: %s, Team:%i" % (i, read_game.player[i].name, read_game.player[i].team)

        if keys["KEY_INSPECT_AMMO"]:
            for i in range(AMMOMAX):
                ammo = read_game.cg.ammos[i]
                print "Ammo[%i] (%i)%s = %i" % (i, ammo.weapon_id, self.env.weapon_names.get_weapon_model(ammo.weapon_id), ammo.ammo)
Пример #28
0
 def dump_state(self, exec_flag=False):
     self.logger.debug("state %s" % self._stage)
     if exec_flag:
         self.logger.debug("dumping state to file for %s" % self._stage)
         # dump_obj(self, 'run_state.pkl', force=True)  # too large
         utils.dump_obj(self, "run_state_%s.pkl" % self._stage, force=True)
Пример #29
0
def directory_merge(abbr, existing_people, new_people, remove_identical,
                    copy_new, interactive):
    perfect_matched = set()
    matches = []
    id_to_new_filename = {}

    for new in new_people:
        best_similarity = 0
        best_match = None

        id_to_new_filename[new['id']] = get_filename(new)

        for existing in existing_people:
            similarity = calculate_similarity(existing, new)
            if similarity > 0.999:
                perfect_matched.add(new['id'])
                continue

            if similarity > best_similarity:
                best_similarity = similarity
                best_match = existing

        matches.append((best_similarity, new, best_match))

    click.secho(f'{len(perfect_matched)} were perfect matches', fg='green')

    if remove_identical:
        for id in perfect_matched:
            fname = id_to_new_filename[id]
            fname = f'incoming/{abbr}/people/{fname}'.format(fname)
            click.secho('removing ' + fname, fg='red')
            os.remove(fname)

    unmatched = set(p['id'] for p in new_people) - perfect_matched

    for sim, new, old in sorted(matches, reverse=True, key=lambda x: x[0]):
        if sim < 0.001:
            break
        unmatched.remove(new['id'])
        oldfname = 'data/{}/people/{}'.format(abbr, get_filename(old))
        newfname = 'incoming/{}/people/{}'.format(abbr, get_filename(new))
        click.secho(' {:.2f} {} {}'.format(sim, oldfname, newfname),
                    fg='yellow')
        if interactive:
            differences = compare_objects(old, new)

            for difference in differences:
                click.echo('    ' + str(difference))
            ch = '~'
            while ch not in 'onsa':
                click.secho('Keep (o)ld? Keep (n)ew? (s)kip? (a)bort?',
                            bold=True)
                ch = click.getchar()
                if ch == 'a':
                    raise SystemExit(-1)
                elif ch == 'o':
                    keep_on_conflict = 'old'
                elif ch == 'n':
                    keep_on_conflict = 'new'
                elif ch == 's':
                    continue
                merged = merge_people(old,
                                      new,
                                      keep_both_ids=False,
                                      keep_on_conflict=keep_on_conflict)
                dump_obj(merged, filename=oldfname)
                os.remove(newfname)

    click.secho(f'{len(unmatched)} were unmatched')
    for id in unmatched:
        fname = id_to_new_filename[id]
        oldfname = f'incoming/{abbr}/people/{fname}'.format(fname)
        if copy_new:
            newfname = f'data/{abbr}/people/{fname}'.format(fname)
            click.secho(f'moving {oldfname} to {newfname}', fg='yellow')
            os.rename(oldfname, newfname)
Пример #30
0
def dir_to_mongo(abbr, create, clear_old_roles, verbose):
    db = pymongo.MongoClient(os.environ.get('BILLY_MONGO_HOST',
                                            'localhost'))['fiftystates']

    metadata = db.metadata.find({'_id': abbr})[0]
    latest_term = metadata['terms'][-1]['name']

    active_ids = []

    for person, filename in iter_objects(abbr, 'people'):

        legacy_ids = [
            oid['identifier'] for oid in person.get('other_identifiers', [])
            if oid['scheme'] == 'legacy_openstates'
        ]
        if not legacy_ids:
            if create:
                # get next ID
                new_id = get_next_id(db, abbr)
                legacy_ids = [new_id]
                if 'other_identifiers' not in person:
                    person['other_identifiers'] = []
                person['other_identifiers'].append({
                    'scheme': 'legacy_openstates',
                    'identifier': new_id
                })
                dump_obj(person, filename=filename)
            else:
                click.secho(
                    f'{filename} does not have legacy ID, run with --create',
                    fg='red')
                sys.exit(1)

        active_ids.append(legacy_ids[0])

        # handle name
        prefix, first_name, last_name, suffixes = name_tools.split(
            person['name'])

        # get chamber, district, party
        for role in person['roles']:
            if role_is_active(role):
                chamber = role['type']
                district = role['district']
                break
        for role in person['party']:
            if role_is_active(role):
                party = role['name']

        url = person['links'][0]['url']
        email = ''

        offices = []
        for cd in person.get('contact_details', []):
            office = {
                'fax':
                cd.get('fax'),
                'phone':
                cd.get('voice'),
                'address':
                cd.get('address'),
                'email':
                cd.get('email'),
                'name':
                cd['note'],
                'type':
                'capitol' if 'capitol' in cd['note'].lower() else 'district'
            }
            offices.append(office)
            if office['email'] and not email:
                email = office['email']

        # NE & DC
        if chamber == 'legislature':
            chamber = 'upper'

        # get some old data to keep around
        created_at = datetime.datetime.utcnow()
        old_roles = {}
        old_person = None
        try:
            old_person = db.legislators.find({'_id': legacy_ids[0]})[0]
            created_at = old_person['created_at']
            if not clear_old_roles:
                old_roles = old_person.get('old_roles', {})
        except IndexError:
            pass

        mongo_person = {
            '_id':
            legacy_ids[0],
            'leg_id':
            legacy_ids[0],
            '_all_ids':
            legacy_ids,
            '_type':
            'person',
            'active':
            True,
            'full_name':
            person['name'],
            '_scraped_name':
            person['name'],
            'photo_url':
            person.get('image'),
            'state':
            abbr,
            'district':
            district,
            'chamber':
            chamber,
            'party':
            party,
            'email':
            email,
            'url':
            url,
            'offices':
            offices,
            'created_at':
            created_at,
            'first_name':
            first_name,
            'middle_name':
            '',
            'last_name':
            last_name,
            'suffixes':
            suffixes,
            'sources':
            person['sources'],
            'old_roles':
            old_roles,
            'roles': [
                {
                    'term': latest_term,
                    'district': district,
                    'chamber': chamber,
                    'state': abbr,
                    'party': party,
                    'type': 'member',
                    'start_date': None,
                    'end_date': None
                },
            ],
        }
        # TODO: committee info
        # { "term" : "2017-2018", "committee_id" : "NCC000233", "chamber" : "lower",
        # "state" : "nc", "subcommittee" : null, "committee" : "State and Local Government II",
        # "position" : "member", "type" : "committee member" },

        # compare
        if old_person:
            old_person.pop('updated_at', None)
        if old_person == mongo_person:
            if verbose:
                click.secho(f'no updates to {mongo_person["_id"]}')
        else:
            # print(mongo_person, old_person)
            # raise Exception()
            click.secho(f'updating {mongo_person["_id"]}', fg='green')
            mongo_person['updated_at'] = datetime.datetime.utcnow()
            try:
                db.legislators.save(mongo_person)
            except Exception as e:
                print(e)
                continue

    to_retire = db.legislators.find({
        '_id': {
            '$nin': active_ids
        },
        'state': abbr
    })
    click.secho(f'going to try to retire {to_retire.count()}')
    for leg in to_retire:
        retire_person(db, leg)
Пример #31
0
 def save(self, directory):
     dump_obj(self.to_dict(), output_dir=directory)
Пример #32
0
def update_municipalities(municipalities, state):
    fname = f"data/{state}/municipalities.yml"
    with open(fname, 'r') as f:
        contents = load_yaml(f)
    dump_obj(contents + municipalities, filename=fname)