示例#1
0
def test_insert_with_idlevels():
    state_obj = {'_type': 'person', 'level': 'state', 'state': 'ex',
                 'country': 'us'}
    country_obj = {'_type': 'person', 'level': 'country', 'state': 'ex',
                   'country': 'us'}
    assert utils.insert_with_id(state_obj).startswith('EX')
    assert utils.insert_with_id(country_obj).startswith('US')
示例#2
0
def test_insert_with_id_increments():
    obj1 = {'full_name': 'a test legislator',
            '_type': 'person',
            'level': 'state',
            'state': 'ex'}
    obj2 = {'full_name': 'another legislator',
            '_type': 'person',
            'level': 'state',
            'state': 'ex'}

    leg_id_re = re.compile(r'^EXL\d{6,6}$')

    id1 = utils.insert_with_id(obj1)
    assert leg_id_re.match(id1)
    found = db.legislators.find_one({'_id': id1})
    assert found['_all_ids'] == [id1]

    id2 = utils.insert_with_id(obj2)
    assert leg_id_re.match(id2)
    assert id2 != id1
    found = db.legislators.find_one({'_id': id2})
    assert found
    assert found['_all_ids'] == [id2]

    # also check the timestamp creation
    assert found['created_at'] == found['updated_at']
    assert isinstance(found['created_at'], datetime.datetime)
示例#3
0
文件: test_utils.py 项目: JT5D/billy
def test_insert_with_id_increments():
    obj1 = {'full_name': 'a test legislator',
            '_type': 'person',
            'state': 'ex'}
    obj2 = {'full_name': 'another legislator',
            '_type': 'person',
            'state': 'ex'}

    leg_id_re = re.compile(r'^EXL\d{6,6}$')

    id1 = utils.insert_with_id(obj1)
    assert leg_id_re.match(id1)
    found = db.legislators.find_one({'_id': id1})
    assert found['_all_ids'] == [id1]

    id2 = utils.insert_with_id(obj2)
    assert leg_id_re.match(id2)
    assert id2 != id1
    found = db.legislators.find_one({'_id': id2})
    assert found
    assert found['_all_ids'] == [id2]

    # also check the timestamp creation
    assert found['created_at'] == found['updated_at']
    assert isinstance(found['created_at'], datetime.datetime)
示例#4
0
def import_committees_from_legislators(current_term, abbr):
    """ create committees from legislators that have committee roles """

    # for all current legislators
    for legislator in db.legislators.find({
            'roles': {
                '$elemMatch': {
                    'term': current_term,
                    settings.LEVEL_FIELD: abbr
                }
            }
    }):

        # for all committee roles
        for role in legislator['roles']:
            if (role['type'] == 'committee member'
                    and 'committee_id' not in role):

                spec = {
                    settings.LEVEL_FIELD: abbr,
                    'chamber': role['chamber'],
                    'committee': role['committee']
                }
                if 'subcommittee' in role:
                    spec['subcommittee'] = role['subcommittee']

                committee = db.committees.find_one(spec)

                if not committee:
                    committee = spec
                    committee['_type'] = 'committee'
                    # copy LEVEL_FIELD from legislator to committee
                    committee[settings.LEVEL_FIELD] = \
                        legislator[settings.LEVEL_FIELD]
                    committee['members'] = []
                    committee['sources'] = []
                    if 'subcommittee' not in committee:
                        committee['subcommittee'] = None
                    insert_with_id(committee)

                for member in committee['members']:
                    if member['leg_id'] == legislator['leg_id']:
                        break
                else:
                    committee['members'].append({
                        'name':
                        legislator['full_name'],
                        'leg_id':
                        legislator['leg_id'],
                        'role':
                        role.get('position') or 'member'
                    })
                    for source in legislator['sources']:
                        if source not in committee['sources']:
                            committee['sources'].append(source)
                    db.committees.save(committee, safe=True)

                    role['committee_id'] = committee['_id']

        db.legislators.save(legislator, safe=True)
示例#5
0
def import_legislator(data):
    data = prepare_obj(data)
    data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role.pop('role')

        # copy over country and/or state into role
        # TODO: base this on all possible level fields
        role['level'] = data['level']
        if 'country' in data:
            role['country'] = data['country']
        if 'state' in data:
            role['state'] = data['state']

    cur_role = data['roles'][0]
    term = cur_role['term']

    level = data['level']
    abbrev = data[level]

    prev_term = get_previous_term(abbrev, term)
    next_term = get_next_term(abbrev, term)

    spec = {level: abbrev,
            'type': cur_role['type'],
            'term': {'$in': [term, prev_term, next_term]}}
    if 'district' in cur_role:
        spec['district'] = cur_role['district']
    if 'chamber' in cur_role:
        spec['chamber'] = cur_role['chamber']

    leg = db.legislators.find_one(
        {'level': level, level: abbrev,
         '_scraped_name': data['full_name'],
         'roles': {'$elemMatch': spec}})

    if leg:
        if 'old_roles' not in leg:
            leg['old_roles'] = {}

        if leg['roles'][0]['term'] == prev_term:
            # Move to old
            leg['old_roles'][leg['roles'][0]['term']] = leg['roles']
        elif leg['roles'][0]['term'] == next_term:
            leg['old_roles'][term] = data['roles']
            data['roles'] = leg['roles']

        update(leg, data, db.legislators)
        return "update"
    else:
        insert_with_id(data)
        return "insert"
示例#6
0
def import_legislator(data):
    data = prepare_obj(data)
    data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role['role']
            del role['role']

        # copy over country and/or state into role
        # TODO: base this on all possible level fields
        role['level'] = data['level']
        if 'country' in data:
            role['country'] = data['country']
        if 'state' in data:
            role['state'] = data['state']

    cur_role = data['roles'][0]
    term = cur_role['term']

    level = data['level']
    abbrev = data[level]

    prev_term = get_previous_term(abbrev, term)
    next_term = get_next_term(abbrev, term)

    spec = {level: abbrev,
            'type': cur_role['type'],
            'term': {'$in': [term, prev_term, next_term]}}
    if 'district' in cur_role:
        spec['district'] = cur_role['district']
    if 'chamber' in cur_role:
        spec['chamber'] = cur_role['chamber']

    leg = db.legislators.find_one(
        {'level': level, level: abbrev,
         '_scraped_name': data['full_name'],
         'roles': {'$elemMatch': spec}})

    if leg:
        if 'old_roles' not in leg:
            leg['old_roles'] = {}

        if leg['roles'][0]['term'] == prev_term:
            # Move to old
            leg['old_roles'][leg['roles'][0]['term']] = leg['roles']
        elif leg['roles'][0]['term'] == next_term:
            leg['old_roles'][term] = data['roles']
            data['roles'] = leg['roles']

        update(leg, data, db.legislators)
    else:
        insert_with_id(data)
示例#7
0
def test_insert_with_id_types():
    person = {'_type': 'person', 'level': 'state', 'state': 'ex'}
    legislator = {'_type': 'person', 'level': 'state', 'state': 'ex'}
    committee = {'_type': 'committee', 'level': 'state', 'state': 'ex'}
    bill = {'_type': 'bill', 'level': 'state', 'state': 'ex'}
    other = {'_type': 'other', 'level': 'state', 'state': 'ex'}

    assert utils.insert_with_id(person).startswith('EXL')
    assert utils.insert_with_id(legislator).startswith('EXL')
    assert utils.insert_with_id(committee).startswith('EXC')
    assert utils.insert_with_id(bill).startswith('EXB')
    assert_raises(ValueError, utils.insert_with_id, other)
示例#8
0
文件: test_utils.py 项目: JT5D/billy
def test_insert_with_id_types():
    person = {'_type': 'person', 'state': 'ex'}
    legislator = {'_type': 'person', 'state': 'ex'}
    committee = {'_type': 'committee', 'state': 'ex'}
    bill = {'_type': 'bill', 'state': 'ex'}
    other = {'_type': 'other', 'state': 'ex'}

    assert utils.insert_with_id(person).startswith('EXL')
    assert utils.insert_with_id(legislator).startswith('EXL')
    assert utils.insert_with_id(committee).startswith('EXC')
    assert utils.insert_with_id(bill).startswith('EXB')
    assert_raises(ValueError, utils.insert_with_id, other)
示例#9
0
def test_activate_legislators():
    # Previous term
    leg1 = {'_type': 'person', 'level': 'state', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'level': 'state', 'state': 'ex',
                       'term': '2009-2010', 'district': '1',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}]}

    # Current term, no end date
    leg2 = {'_type': 'person', 'level': 'state', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'level': 'state', 'state': 'ex',
                       'term': '2011-2012', 'district': '2',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}]}

    # Current term, end date
    leg3 = {'_type': 'person', 'level': 'state', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'level': 'state', 'state': 'ex',
                       'term': '2011-2012', 'district': '3',
                       'party': 'Democrat',
                       'start_date': None,
                       'end_date': datetime.datetime(2011, 1, 1)}]}

    id1 = utils.insert_with_id(leg1)
    id2 = utils.insert_with_id(leg2)
    id3 = utils.insert_with_id(leg3)

    legislators.activate_legislators('2011-2012', 'ex', 'state')

    leg1 = db.legislators.find_one({'_id': id1})
    assert 'active' not in leg1
    assert 'district' not in leg1
    assert 'chamber' not in leg1
    assert 'party' not in leg1

    leg2 = db.legislators.find_one({'_id': id2})
    assert leg2['active'] == True
    assert leg2['district'] == '2'
    assert leg2['chamber'] == 'upper'
    assert leg2['party'] == 'Democrat'

    leg3 = db.legislators.find_one({'_id': id3})
    assert 'active' not in leg3
    assert 'district' not in leg3
    assert 'chamber' not in leg3
    assert 'party' not in leg3
示例#10
0
def import_legislator(data):
    data = prepare_obj(data)
    data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role['role']
            del role['role']

    cur_role = data['roles'][0]
    term = cur_role['term']
    prev_term = get_previous_term(data['state'], term)
    next_term = get_next_term(data['state'], term)

    spec = {
        'state': data['state'],
        'type': cur_role['type'],
        'term': {
            '$in': [term, prev_term, next_term]
        }
    }
    if 'district' in cur_role:
        spec['district'] = cur_role['district']
    if 'chamber' in cur_role:
        spec['chamber'] = cur_role['chamber']

    leg = db.legislators.find_one({
        'state': data['state'],
        '_scraped_name': data['full_name'],
        'roles': {
            '$elemMatch': spec
        }
    })

    if leg:
        if 'old_roles' not in leg:
            leg['old_roles'] = {}

        if leg['roles'][0]['term'] == prev_term:
            # Move to old
            leg['old_roles'][leg['roles'][0]['term']] = leg['roles']
        elif leg['roles'][0]['term'] == next_term:
            leg['old_roles'][term] = data['roles']
            data['roles'] = leg['roles']

        update(leg, data, db.legislators)
    else:
        insert_with_id(data)
示例#11
0
def import_committees_from_legislators(current_term, level, abbr):
    """ create committees from legislators that have committee roles """

    # for all current legislators
    for legislator in db.legislators.find({
        'level': level,
        'roles': {'$elemMatch': {'term': current_term,
                                 level: abbr}}}):

        # for all committee roles
        for role in legislator['roles']:
            if (role['type'] == 'committee member' and
                'committee_id' not in role):

                spec = {'level': level,
                        level: abbr,
                        'chamber': role['chamber'],
                        'committee': role['committee']}
                if 'subcommittee' in role:
                    spec['subcommittee'] = role['subcommittee']

                committee = db.committees.find_one(spec)

                if not committee:
                    committee = spec
                    committee['_type'] = 'committee'
                    # copy required fields from legislator to committee
                    for f in settings.BILLY_LEVEL_FIELDS:
                        committee[f] = legislator[f]
                    committee['members'] = []
                    committee['sources'] = []
                    if 'subcommittee' not in committee:
                        committee['subcommittee'] = None
                    insert_with_id(committee)

                for member in committee['members']:
                    if member['leg_id'] == legislator['leg_id']:
                        break
                else:
                    committee['members'].append(
                        {'name': legislator['full_name'],
                         'leg_id': legislator['leg_id'],
                         'role': role.get('position') or 'member'})
                    db.committees.save(committee, safe=True)

                    role['committee_id'] = committee['_id']

        db.legislators.save(legislator, safe=True)
示例#12
0
def test_update():
    dt = datetime.datetime.utcnow()
    obj1 = {'_type': 'bill', 'state': 'ex', 'field1': 'stuff',
            'field2': 'original', '_locked_fields': 'field2',
            'created_at': dt, 'updated_at': dt}

    id1 = utils.insert_with_id(obj1)
    obj1 = db.bills.find_one(id1)

    # Updating a bill with itself shouldn't cause 'updated_at' to be changed
    utils.update(obj1, obj1, db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] == obj2['updated_at']
    assert obj1['updated_at'] == obj2['updated_at']

    utils.update(obj1, {'_type': 'bill', 'field1': 'more stuff',
                        'field2': 'a change', 'state': 'ex'},
                 db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] != obj2['updated_at']
    assert obj1['updated_at'] != obj2['updated_at']
    assert obj2['field1'] == 'more stuff'

    # make sure locked fields don't get overwritten
    assert obj2['field2'] == 'original'
示例#13
0
def test_update():
    obj0 = {'_type': 'bill', 'level': 'state', 'state': 'ex',
            'field1': 'stuff', 'field2': 'original',
            '_locked_fields': ['field2']}

    id1 = utils.insert_with_id(obj0)
    obj1 = db.bills.find_one(id1)

    # Updating a bill with itself shouldn't cause 'updated_at' to be changed
    utils.update(obj1, obj1, db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] == obj2['updated_at'] == obj1['updated_at']

    initial_timestamp = obj2['created_at']   # we need this later

    # update with a few fields changed
    changes = {'field1': 'more stuff', 'field2': 'a change'}
    time.sleep(0.005)   # sleep long enough to avoid created_at == updated_at
    utils.update(obj1, changes, db.bills)
    obj2 = db.bills.find_one({'_id': id1})

    # check that timestamps have updated
    assert obj2['created_at'] < obj2['updated_at']
    assert initial_timestamp < obj2['updated_at']

    # make sure field1 gets overwritten and field 2 doesn't
    assert obj2['field1'] == 'more stuff'
    assert obj2['field2'] == 'original'
示例#14
0
def test_update():
    dt = datetime.datetime.utcnow()
    obj1 = {
        "_type": "bill",
        "state": "ex",
        "field1": "stuff",
        "field2": "original",
        "_locked_fields": "field2",
        "created_at": dt,
        "updated_at": dt,
    }

    id1 = utils.insert_with_id(obj1)
    obj1 = db.bills.find_one(id1)

    # Updating a bill with itself shouldn't cause 'updated_at' to be changed
    utils.update(obj1, obj1, db.bills)
    obj2 = db.bills.find_one({"_id": id1})
    assert obj2["created_at"] == obj2["updated_at"]
    assert obj1["updated_at"] == obj2["updated_at"]

    utils.update(obj1, {"_type": "bill", "field1": "more stuff", "field2": "a change", "state": "ex"}, db.bills)
    obj2 = db.bills.find_one({"_id": id1})
    assert obj2["created_at"] != obj2["updated_at"]
    assert obj1["updated_at"] != obj2["updated_at"]
    assert obj2["field1"] == "more stuff"

    # make sure locked fields don't get overwritten
    assert obj2["field2"] == "original"
示例#15
0
def test_update():
    dt = datetime.datetime.utcnow()
    obj1 = {
        '_type': 'bill',
        'state': 'ex',
        'field1': 'stuff',
        'field2': 'original',
        '_locked_fields': 'field2',
        'created_at': dt,
        'updated_at': dt
    }

    id1 = utils.insert_with_id(obj1)
    obj1 = db.bills.find_one(id1)

    # Updating a bill with itself shouldn't cause 'updated_at' to be changed
    utils.update(obj1, obj1, db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] == obj2['updated_at']
    assert obj1['updated_at'] == obj2['updated_at']

    utils.update(
        obj1, {
            '_type': 'bill',
            'field1': 'more stuff',
            'field2': 'a change',
            'state': 'ex'
        }, db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] != obj2['updated_at']
    assert obj1['updated_at'] != obj2['updated_at']
    assert obj2['field1'] == 'more stuff'

    # make sure locked fields don't get overwritten
    assert obj2['field2'] == 'original'
示例#16
0
def test_update_sneaky_filter():
    obj = {
        '_type': 'bill',
        'state': 'ex',
        'normal_field': 1,
        'set_field': [1, 2, 3]
    }

    def _set_changed(old, new):
        return set(old) != set(new)

    sneaky_filter = {'set_field': _set_changed}

    id = utils.insert_with_id(obj)
    obj = db.bills.find_one(id)

    # the set will be the same, shouldn't update
    utils.update(obj, {'set_field': [3, 2, 1]}, db.bills, sneaky_filter)
    assert obj['set_field'] == [1, 2, 3]
    assert obj['updated_at'] == obj['created_at']

    # the set now differs, should update
    utils.update(obj, {'set_field': [4, 3, 2, 1]}, db.bills, sneaky_filter)
    assert obj['set_field'] == [4, 3, 2, 1]
    assert obj['updated_at'] > obj['created_at']
示例#17
0
文件: test_utils.py 项目: JT5D/billy
def test_update():
    obj0 = {'_type': 'bill', 'state': 'ex', 'field1': 'stuff',
            'field2': 'original', '_locked_fields': ['field2']}

    id1 = utils.insert_with_id(obj0)
    obj1 = db.bills.find_one(id1)

    # Updating a bill with itself shouldn't cause 'updated_at' to be changed
    utils.update(obj1, obj1, db.bills)
    obj2 = db.bills.find_one({'_id': id1})
    assert obj2['created_at'] == obj2['updated_at'] == obj1['updated_at']

    initial_timestamp = obj2['created_at']   # we need this later

    # update with a few fields changed
    changes = {'field1': 'more stuff', 'field2': 'a change'}
    time.sleep(0.005)   # sleep long enough to avoid created_at == updated_at
    utils.update(obj1, changes, db.bills)
    obj2 = db.bills.find_one({'_id': id1})

    # check that timestamps have updated
    assert obj2['created_at'] < obj2['updated_at']
    assert initial_timestamp < obj2['updated_at']

    # make sure field1 gets overwritten and field 2 doesn't
    assert obj2['field1'] == 'more stuff'
    assert obj2['field2'] == 'original'
示例#18
0
def test_insert_with_id():
    obj1 = {"full_name": "a test legislator", "_type": "person", "state": "ex"}
    obj2 = {"full_name": "another legislator", "_type": "person", "state": "ex"}

    id_re = r"^EXL\d{6,6}$"

    id1 = utils.insert_with_id(obj1)
    assert re.match(id_re, id1)
    found = db.legislators.find_one({"_id": id1})
    assert found["_all_ids"] == [id1]

    id2 = utils.insert_with_id(obj2)
    assert re.match(id_re, id2)
    assert id2 != id1
    found = db.legislators.find_one({"_id": id2})
    assert found
    assert found["_all_ids"] == [id2]
示例#19
0
def test_deactivate_legislators():
    # Previous term
    leg1 = {'_type': 'person', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper', 'state': 'ex',
                       'term': '2009-2010', 'district': '1',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}],
            'active': True,
            'district': '1',
            'chamber': 'upper',
            'party': 'Democrat'}
    leg1_roles = leg1['roles']

    # Current term, no end date
    leg2 = {'_type': 'person', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper', 'state': 'ex',
                       'term': '2011-2012', 'district': '2',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}],
            'active': True,
            'district': '2',
            'chamber': 'upper',
            'party': 'Democrat'}
    leg2_roles = leg2['roles']

    id1 = utils.insert_with_id(leg1)
    id2 = utils.insert_with_id(leg2)

    legislators.deactivate_legislators('ex', '2011-2012')

    leg1 = db.legislators.find_one({'_id': id1})
    assert leg1['active'] == False
    assert 'chamber' not in leg1
    assert 'district' not in leg1
    assert 'party' not in leg1
    assert leg1['roles'] == []
    assert leg1['old_roles']['2009-2010'] == leg1_roles

    leg2 = db.legislators.find_one({'_id': id2})
    assert leg2['active'] == True
    assert leg2['chamber'] == 'upper'
    assert leg2['district'] == '2'
    assert leg2['party'] == 'Democrat'
    assert leg2['roles'] == leg2_roles
    assert 'old_roles' not in leg2
示例#20
0
def import_legislator(data):
    data = prepare_obj(data)
    data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role['role']
            del role['role']

    cur_role = data['roles'][0]
    term = cur_role['term']
    prev_term = get_previous_term(data['state'], term)
    next_term = get_next_term(data['state'], term)

    spec = {'state': data['state'],
            'type': cur_role['type'],
            'term': {'$in': [term, prev_term, next_term]}}
    if 'district' in cur_role:
        spec['district'] = cur_role['district']
    if 'chamber' in cur_role:
        spec['chamber'] = cur_role['chamber']

    leg = db.legislators.find_one(
        {'state': data['state'],
         '_scraped_name': data['full_name'],
         'roles': {'$elemMatch': spec}})

    if leg:
        if 'old_roles' not in leg:
            leg['old_roles'] = {}

        if leg['roles'][0]['term'] == prev_term:
            # Move to old
            leg['old_roles'][leg['roles'][0]['term']] = leg['roles']
        elif leg['roles'][0]['term'] == next_term:
            leg['old_roles'][term] = data['roles']
            data['roles'] = leg['roles']

        update(leg, data, db.legislators)
    else:
        insert_with_id(data)
示例#21
0
def test_insert_with_id():
    obj1 = {'full_name': 'a test legislator',
           '_type': 'person',
           'state': 'ex'}
    obj2 = {'full_name': 'another legislator',
            '_type': 'person',
            'state': 'ex'}

    id_re = r'^EXL\d{6,6}$'

    id1 = utils.insert_with_id(obj1)
    assert re.match(id_re, id1)
    found = db.legislators.find_one({'_id': id1})
    assert found['_all_ids'] == [id1]

    id2 = utils.insert_with_id(obj2)
    assert re.match(id_re, id2)
    assert id2 != id1
    found = db.legislators.find_one({'_id': id2})
    assert found
    assert found['_all_ids'] == [id2]
示例#22
0
def test_insert_with_id():
    obj1 = {'full_name': 'a test legislator', '_type': 'person', 'state': 'ex'}
    obj2 = {
        'full_name': 'another legislator',
        '_type': 'person',
        'state': 'ex'
    }

    id_re = r'^EXL\d{6,6}$'

    id1 = utils.insert_with_id(obj1)
    assert re.match(id_re, id1)
    found = db.legislators.find_one({'_id': id1})
    assert found['_all_ids'] == [id1]

    id2 = utils.insert_with_id(obj2)
    assert re.match(id_re, id2)
    assert id2 != id1
    found = db.legislators.find_one({'_id': id2})
    assert found
    assert found['_all_ids'] == [id2]
示例#23
0
def test_update_sneaky_filter():
    obj = {'_type': 'bill', 'level': 'state', 'state': 'ex',
            'normal_field': 1, 'set_field': [1,2,3]}
    def _set_changed(old, new):
        return set(old) != set(new)
    sneaky_filter = {'set_field': _set_changed}

    id = utils.insert_with_id(obj)
    obj = db.bills.find_one(id)

    # the set will be the same, shouldn't update
    utils.update(obj, {'set_field': [3,2,1]}, db.bills, sneaky_filter)
    assert obj['set_field'] == [1,2,3]
    assert obj['updated_at'] == obj['created_at']

    # the set now differs, should update
    utils.update(obj, {'set_field': [4,3,2,1]}, db.bills, sneaky_filter)
    assert obj['set_field'] == [4,3,2,1]
    assert obj['updated_at'] > obj['created_at']
示例#24
0
def import_bills(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    meta = db.metadata.find_one({'_id': state})

    # Build a session to term mapping
    sessions = {}
    for term in meta['terms']:
        for session in term['sessions']:
            sessions[session] = term['name']

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id
        data['bill_id'] = fix_bill_id(data['bill_id'])

        subjects = data.pop('subjects', None)
        if subjects:
            data['scraped_subjects'] = subjects

        bill = db.bills.find_one({'state': data['state'],
                                  'session': data['session'],
                                  'chamber': data['chamber'],
                                  'bill_id': data['bill_id']})

        for sponsor in data['sponsors']:
            id = get_legislator_id(state, data['session'], None,
                                   sponsor['name'])
            sponsor['leg_id'] = id

        for vote in data['votes']:
            if 'committee' in vote:
                committee_id = get_committee_id(state,
                                                vote['chamber'],
                                                vote['committee'])
                vote['committee_id'] = committee_id

            for vtype in ('yes_votes', 'no_votes', 'other_votes'):
                svlist = []
                for svote in vote[vtype]:
                    id = get_legislator_id(state, data['session'],
                                           vote['chamber'], svote)
                    svlist.append({'name': svote, 'leg_id': id})

                vote[vtype] = svlist

        data['_term'] = sessions[data['session']]

        # Merge any version titles into the alternate_titles list
        alt_titles = set(data.get('alternate_titles', []))
        for version in data['versions']:
            if 'title' in version:
                alt_titles.add(version['title'])
            if '+short_title' in version:
                alt_titles.add(version['+short_title'])
        try:
            # Make sure the primary title isn't included in the
            # alternate title list
            alt_titles.remove(data['title'])
        except KeyError:
            pass
        data['alternate_titles'] = list(alt_titles)

        if not bill:
            data['_keywords'] = list(bill_keywords(data))
            insert_with_id(data)
        else:
            data['_keywords'] = list(bill_keywords(data))
            update(bill, data, db.bills)

    print 'imported %s bill files' % len(paths)

    populate_current_fields(state)
    ensure_indexes()
示例#25
0
def import_legislator(data):
    data = prepare_obj(data)

    if data.get('_scraped_name') is None:
        data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role.pop('role')

        # copy over LEVEL_FIELD into role
        if settings.LEVEL_FIELD in data:
            role[settings.LEVEL_FIELD] = data[settings.LEVEL_FIELD]

    scraped_role = data['roles'][0]
    scraped_term = scraped_role['term']

    abbr = data[settings.LEVEL_FIELD]

    spec = {
        settings.LEVEL_FIELD: abbr,
        'type': scraped_role['type'],
        'term': scraped_term
    }
    if 'district' in scraped_role:
        spec['district'] = scraped_role['district']
    if 'chamber' in scraped_role:
        spec['chamber'] = scraped_role['chamber']

    # find matching legislator in current term
    leg = db.legislators.find_one({
        settings.LEVEL_FIELD: abbr,
        '_scraped_name': data['_scraped_name'],
        'roles': {
            '$elemMatch': spec
        }
    })

    # legislator with a matching old_role
    if not leg:
        spec.pop('term')
        leg = db.legislators.find_one({
            settings.LEVEL_FIELD: abbr,
            '_scraped_name': data['_scraped_name'],
            'old_roles.%s' % scraped_term: {
                '$elemMatch': spec
            }
        })

        if leg:
            if 'old_roles' not in data:
                data['old_roles'] = leg.get('old_roles', {})
            # put scraped roles into their old_roles
            data['old_roles'][scraped_term] = data['roles']
            data['roles'] = leg['roles']  # don't overwrite their current roles

    # active matching legislator from different term
    if not leg:
        spec.pop('term', None)
        leg = db.legislators.find_one({
            settings.LEVEL_FIELD: abbr,
            '_scraped_name': data['_scraped_name'],
            'roles': {
                '$elemMatch': spec
            }
        })
        if leg:
            if 'old_roles' not in data:
                data['old_roles'] = leg.get('old_roles', {})

            # scraped_term < leg's term
            if term_older_than(abbr, scraped_term, leg['roles'][0]['term']):
                # move scraped roles into old_roles
                data['old_roles'][scraped_term] = data['roles']
                data['roles'] = leg['roles']
            else:
                data['old_roles'][leg['roles'][0]['term']] = leg['roles']

    data = apply_filters(filters, data)

    if leg:
        update(leg, data, db.legislators)
        return "update"
    else:
        insert_with_id(data)
        return "insert"
示例#26
0
文件: bills.py 项目: VersaHQ/billy
def import_bill(data, standalone_votes, categorizer):
    """
        insert or update a bill

        data - raw bill JSON
        standalone_votes - votes scraped separately
        categorizer - SubjectCategorizer (None - no categorization)
    """
    abbr = data[settings.LEVEL_FIELD]

    # clean up bill_ids
    data['bill_id'] = fix_bill_id(data['bill_id'])
    if 'alternate_bill_ids' in data:
        data['alternate_bill_ids'] = [fix_bill_id(bid) for bid in
                                      data['alternate_bill_ids']]

    # move subjects to scraped_subjects
    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    subjects = data.pop('subjects', None)
    if subjects:
        data['scraped_subjects'] = subjects

    # update categorized subjects
    if categorizer:
        categorizer.categorize_bill(data)

    # companions
    for companion in data['companions']:
        companion['bill_id'] = fix_bill_id(companion['bill_id'])
        # query based on companion
        spec = companion.copy()
        spec[settings.LEVEL_FIELD] = abbr
        if not spec['chamber']:
            spec.pop('chamber')
        companion_obj = db.bills.find_one(spec)
        if companion_obj:
            companion['internal_id'] = companion_obj['_id']
        else:
            logger.warning('Unknown companion: {chamber} {session} {bill_id}'
                           .format(**companion))

    # look for a prior version of this bill
    bill = db.bills.find_one({settings.LEVEL_FIELD: abbr,
                              'session': data['session'],
                              'chamber': data['chamber'],
                              'bill_id': data['bill_id']})

    # keep doc ids consistent
    doc_matcher = DocumentMatcher(abbr)
    if bill:
        doc_matcher.learn_ids(bill['versions'] + bill['documents'])
    doc_matcher.set_ids(data['versions'] + data['documents'])

    # match sponsor leg_ids
    match_sponsor_ids(abbr, data)

    # process votes ############

    # pull votes off bill
    bill_votes = data.pop('votes', [])

    # grab the external bill votes if present
    if metadata(abbr).get('_partial_vote_bill_id'):
        # this is a hack initially added for Rhode Island where we can't
        # determine the full bill_id, if this key is in the metadata
        # we just use the numeric portion, not ideal as it won't work
        # where HB/SBs overlap, but in RI they never do
        # pull off numeric portion of bill_id
        numeric_bill_id = data['bill_id'].split()[1]
        bill_votes += standalone_votes.pop((data['chamber'], data['session'],
                                            numeric_bill_id), [])
    else:
        # add loaded votes to data
        bill_votes += standalone_votes.pop((data['chamber'], data['session'],
                                            data['bill_id']), [])

    # do id matching and other vote prep
    if bill:
        prepare_votes(abbr, data['session'], bill['_id'], bill_votes)
    else:
        prepare_votes(abbr, data['session'], None, bill_votes)

    # process actions ###########

    dates = {'first': None, 'last': None, 'passed_upper': None,
             'passed_lower': None, 'signed': None}

    vote_flags = {
        "bill:passed",
        "bill:failed",
        "bill:veto_override:passed",
        "bill:veto_override:failed",
        "amendment:passed",
        "amendment:failed",
        "committee:passed",
        "committee:passed:favorable",
        "committee:passed:unfavorable",
        "committee:passed:failed"
    }
    already_linked = set()
    remove_vote = set()

    for action in data['actions']:
        adate = action['date']

        def _match_committee(name):
            return get_committee_id(abbr, action['actor'], name)

        def _match_legislator(name):
            return get_legislator_id(abbr,
                                     data['session'],
                                     action['actor'],
                                     name)

        resolvers = {
            "committee": _match_committee,
            "legislator": _match_legislator
        }

        if "related_entities" in action:
            for entity in action['related_entities']:
                try:
                    resolver = resolvers[entity['type']]
                except KeyError as e:
                    # We don't know how to deal.
                    logger.error("I don't know how to sort a %s" % e)
                    continue

                id = resolver(entity['name'])
                entity['id'] = id

        # first & last dates
        if not dates['first'] or adate < dates['first']:
            dates['first'] = adate
        if not dates['last'] or adate > dates['last']:
            dates['last'] = adate

        # passed & signed dates
        if (not dates['passed_upper'] and action['actor'] == 'upper'
                and 'bill:passed' in action['type']):
            dates['passed_upper'] = adate
        elif (not dates['passed_lower'] and action['actor'] == 'lower'
                and 'bill:passed' in action['type']):
            dates['passed_lower'] = adate
        elif (not dates['signed'] and 'governor:signed' in action['type']):
            dates['signed'] = adate

        # vote-action matching
        action_attached = False
        # only attempt vote matching if action has a date and is one of the
        # designated vote action types
        if set(action['type']).intersection(vote_flags) and action['date']:
            for vote in bill_votes:
                if not vote['date']:
                    continue

                delta = abs(vote['date'] - action['date'])
                if (delta < datetime.timedelta(hours=20) and
                        vote['chamber'] == action['actor']):
                    if action_attached:
                        # multiple votes match, we can't guess
                        action.pop('related_votes', None)
                    else:
                        related_vote = vote['vote_id']
                        if related_vote in already_linked:
                            remove_vote.add(related_vote)

                        already_linked.add(related_vote)
                        action['related_votes'] = [related_vote]
                        action_attached = True

    # remove related_votes that we linked to multiple actions
    for action in data['actions']:
        for vote in remove_vote:
            if vote in action.get('related_votes', []):
                action['related_votes'].remove(vote)

    # save action dates to data
    data['action_dates'] = dates

    data['_term'] = term_for_session(abbr, data['session'])

    alt_titles = set(data.get('alternate_titles', []))

    for version in data['versions']:
        # add/update tracked_versions collection
        track_version(data, version)

        # Merge any version titles into the alternate_titles list
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)
    data = apply_filters(filters, data)

    if not bill:
        insert_with_id(data)
        git_add_bill(data)
        save_votes(data, bill_votes)
        return "insert"
    else:
        update(bill, data, db.bills)
        git_add_bill(bill)
        save_votes(bill, bill_votes)
        return "update"
示例#27
0
def import_bill(data, votes):
    level = data['level']
    abbr = data[level]
    # clean up bill_id
    data['bill_id'] = fix_bill_id(data['bill_id'])

    # move subjects to scraped_subjects
    subjects = data.pop('subjects', None)

    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    if subjects:
        data['scraped_subjects'] = subjects

    # add loaded votes to data
    bill_votes = votes.pop((data['chamber'], data['session'],
                            data['bill_id']), [])
    data['votes'].extend(bill_votes)

    bill = db.bills.find_one({'level': level, level: abbr,
                              'session': data['session'],
                              'chamber': data['chamber'],
                              'bill_id': data['bill_id']})

    vote_matcher = VoteMatcher(abbr)
    if bill:
        vote_matcher.learn_vote_ids(bill['votes'])
    vote_matcher.set_vote_ids(data['votes'])

    # match sponsor leg_ids
    for sponsor in data['sponsors']:
        id = get_legislator_id(abbr, data['session'], None,
                               sponsor['name'])
        sponsor['leg_id'] = id

    for vote in data['votes']:

        # committee_ids
        if 'committee' in vote:
            committee_id = get_committee_id(level, abbr, vote['chamber'],
                                            vote['committee'])
            vote['committee_id'] = committee_id

        # vote leg_ids
        for vtype in ('yes_votes', 'no_votes', 'other_votes'):
            svlist = []
            for svote in vote[vtype]:
                id = get_legislator_id(abbr, data['session'],
                                       vote['chamber'], svote)
                svlist.append({'name': svote, 'leg_id': id})

            vote[vtype] = svlist

    data['_term'] = term_for_session(abbr, data['session'])

    # Merge any version titles into the alternate_titles list
    alt_titles = set(data.get('alternate_titles', []))
    for version in data['versions']:
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)

    # update keywords
    data['_keywords'] = list(bill_keywords(data))

    if not bill:
        insert_with_id(data)
    else:
        update(bill, data, db.bills)
示例#28
0
文件: bills.py 项目: annerajb/billy
def import_bill(data, votes, categorizer):
    level = data['level']
    abbr = data[level]

    # clean up bill_ids
    data['bill_id'] = fix_bill_id(data['bill_id'])
    if 'alternate_bill_ids' in data:
        data['alternate_bill_ids'] = [fix_bill_id(bid) for bid in
                                      data['alternate_bill_ids']]

    # move subjects to scraped_subjects
    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    subjects = data.pop('subjects', None)
    if subjects:
        data['scraped_subjects'] = subjects

    # update categorized subjects
    if categorizer:
        categorizer.categorize_bill(data)

    # this is a hack added for Rhode Island where we can't
    # determine the full bill_id, if this key is in the metadata
    # we just use the numeric portion, not ideal as it won't work
    # in states where HB/SBs overlap, but in RI they never do
    if metadata(abbr).get('_partial_vote_bill_id'):
        # pull off numeric portion of bill_id
        numeric_bill_id = data['bill_id'].split()[1]
        bill_votes = votes.pop((data['chamber'], data['session'],
                                numeric_bill_id), [])
    else:
        # add loaded votes to data
        bill_votes = votes.pop((data['chamber'], data['session'],
                                data['bill_id']), [])

    data['votes'].extend(bill_votes)

    bill = db.bills.find_one({'level': level, level: abbr,
                              'session': data['session'],
                              'chamber': data['chamber'],
                              'bill_id': data['bill_id']})

    # keep vote/doc ids consistent
    vote_matcher = VoteMatcher(abbr)
    doc_matcher = DocumentMatcher(abbr)
    if bill:
        vote_matcher.learn_ids(bill['votes'])
        doc_matcher.learn_ids(bill['versions'] + bill['documents'])
    vote_matcher.set_ids(data['votes'])
    doc_matcher.set_ids(data['versions'] + data['documents'])

    # match sponsor leg_ids
    for sponsor in data['sponsors']:
        id = get_legislator_id(abbr, data['session'], None,
                               sponsor['name'])
        sponsor['leg_id'] = id
        if id is None:
            cid = get_committee_id(level, abbr, data['chamber'], sponsor['name'])
            if not cid is None:
                sponsor['committee_id'] = cid

    # process votes
    for vote in data['votes']:

        # committee_ids
        if 'committee' in vote:
            committee_id = get_committee_id(level, abbr, vote['chamber'],
                                            vote['committee'])
            vote['committee_id'] = committee_id

        # vote leg_ids
        for vtype in ('yes_votes', 'no_votes', 'other_votes'):
            svlist = []
            for svote in vote[vtype]:
                id = get_legislator_id(abbr, data['session'],
                                       vote['chamber'], svote)
                svlist.append({'name': svote, 'leg_id': id})

            vote[vtype] = svlist

    # process actions
    dates = {'first': None, 'last': None, 'passed_upper': None,
             'passed_lower': None, 'signed': None}
    for action in data['actions']:

        # We'll try to recover some Committee IDs here.
        if "committee" in action:
            cid = get_committee_id(level, abbr, data['chamber'],
                                   action['committee'])
            action['_scraped_committee_name'] = action['committee']
            if cid is not None:
                action['committee'] = cid
            else:
                del(action['committee'])

        adate = action['date']

        # first & last
        if not dates['first'] or adate < dates['first']:
            dates['first'] = adate
        elif not dates['last'] or adate > dates['last']:
            dates['last'] = adate

        # passed & signed
        if (not dates['passed_upper'] and action['actor'] == 'upper'
            and 'bill:passed' in action['type']):
            dates['passed_upper'] = adate
        elif (not dates['passed_lower'] and action['actor'] == 'lower'
            and 'bill:passed' in action['type']):
            dates['passed_lower'] = adate
        elif (not dates['signed'] and 'governor:signed' in action['type']):
            dates['signed'] = adate

    # save action dates to data
    data['action_dates'] = dates

    data['_term'] = term_for_session(abbr, data['session'])

    alt_titles = set(data.get('alternate_titles', []))

    for version in data['versions']:
        # push versions to oyster
        if settings.ENABLE_OYSTER and 'url' in version:
            oysterize_version(data, version)

        # Merge any version titles into the alternate_titles list
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)

    if not bill:
        bill_id = insert_with_id(data)
        denormalize_votes(data, bill_id)
        return "insert"
    else:
        update(bill, data, db.bills)
        denormalize_votes(data, bill['_id'])
        return "update"
示例#29
0
def import_legislator(data):
    data = prepare_obj(data)

    if data.get('_scraped_name') is None:
        data['_scraped_name'] = data['full_name']

    # Rename 'role' -> 'type'
    for role in data['roles']:
        if 'role' in role:
            role['type'] = role.pop('role')

        # copy over LEVEL_FIELD into role
        if settings.LEVEL_FIELD in data:
            role[settings.LEVEL_FIELD] = data[settings.LEVEL_FIELD]

    scraped_role = data['roles'][0]
    scraped_term = scraped_role['term']

    abbr = data[settings.LEVEL_FIELD]

    spec = {settings.LEVEL_FIELD: abbr,
            'type': scraped_role['type'],
            'term': scraped_term}
    if 'district' in scraped_role:
        spec['district'] = scraped_role['district']
    if 'chamber' in scraped_role:
        spec['chamber'] = scraped_role['chamber']

    # find matching legislator in current term
    leg = db.legislators.find_one(
        {settings.LEVEL_FIELD: abbr,
         '_scraped_name': data['_scraped_name'],
         'roles': {'$elemMatch': spec}})

    # legislator with a matching old_role
    if not leg:
        spec.pop('term')
        leg = db.legislators.find_one({
            settings.LEVEL_FIELD: abbr,
            '_scraped_name': data['_scraped_name'],
            'old_roles.%s' % scraped_term: {'$elemMatch': spec}
        })

        if leg:
            if 'old_roles' not in data:
                data['old_roles'] = leg.get('old_roles', {})
             # put scraped roles into their old_roles
            data['old_roles'][scraped_term] = data['roles']
            data['roles'] = leg['roles']  # don't overwrite their current roles

    # active matching legislator from different term
    if not leg:
        spec.pop('term', None)
        leg = db.legislators.find_one(
            {settings.LEVEL_FIELD: abbr,
             '_scraped_name': data['_scraped_name'],
             'roles': {'$elemMatch': spec}})
        if leg:
            if 'old_roles' not in data:
                data['old_roles'] = leg.get('old_roles', {})

            # scraped_term < leg's term
            if term_older_than(abbr, scraped_term, leg['roles'][0]['term']):
                # move scraped roles into old_roles
                data['old_roles'][scraped_term] = data['roles']
                data['roles'] = leg['roles']
            else:
                data['old_roles'][leg['roles'][0]['term']] = leg['roles']

    data = apply_filters(filters, data)

    if leg:
        update(leg, data, db.legislators)
        return "update"
    else:
        insert_with_id(data)
        return "insert"
示例#30
0
def import_committee(data, current_session, current_term):
    level = data['level']
    abbr = data[level]
    spec = {'level': level,
            level: abbr,
            'chamber': data['chamber'],
            'committee': data['committee']}
    if 'subcommittee' in data:
        spec['subcommittee'] = data['subcommittee']

    # insert/update the actual committee object
    committee = db.committees.find_one(spec)

    if not committee:
        insert_with_id(data)
        committee = data
    else:
        update(committee, data, db.committees)

    # deal with the members, add roles
    for member in committee['members']:
        if not member['name']:
            continue

        leg_id = get_legislator_id(abbr, current_session,
                                   data['chamber'],
                                   member['name'])

        if not leg_id:
            print "No matches for %s" % member['name'].encode(
                'ascii', 'ignore')
            member['leg_id'] = None
            continue

        legislator = db.legislators.find_one({'_id': leg_id})

        member['leg_id'] = leg_id

        for role in legislator['roles']:
            if (role['type'] == 'committee member' and
                role['term'] == current_term and
                role.get('committee_id') == committee['_id']):
                break
        else:
            new_role = {'type': 'committee member',
                        'committee': committee['committee'],
                        'term': current_term,
                        'chamber': committee['chamber'],
                        'committee_id': committee['_id'],
                        'level': level,
                       }
            # copy over all necessary fields from committee
            for f in settings.BILLY_LEVEL_FIELDS:
                new_role[f] = committee[f]

            if 'subcommittee' in committee:
                new_role['subcommittee'] = committee['subcommittee']
            legislator['roles'].append(new_role)
            legislator['updated_at'] = datetime.datetime.utcnow()
            db.legislators.save(legislator, safe=True)

    db.committees.save(committee, safe=True)
示例#31
0
文件: bills.py 项目: rzuck/openstates
def import_bills(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, "bills", "*.json")

    meta = db.metadata.find_one({"_id": state})

    # Build a session to term mapping
    sessions = {}
    for term in meta["terms"]:
        for session in term["sessions"]:
            sessions[session] = term["name"]

    votes = import_votes(state, data_dir)

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id
        data["bill_id"] = fix_bill_id(data["bill_id"])

        # move subjects to scraped_subjects
        subjects = data.pop("subjects", None)

        # NOTE: intentionally doesn't copy blank lists of subjects
        # this avoids the problem where a bill is re-run but we can't
        # get subjects anymore (quite common in fact)
        if subjects:
            data["scraped_subjects"] = subjects

        # add loaded votes to data
        bill_votes = votes.pop((data["chamber"], data["session"], data["bill_id"]), [])
        data["votes"].extend(bill_votes)

        bill = db.bills.find_one(
            {"state": data["state"], "session": data["session"], "chamber": data["chamber"], "bill_id": data["bill_id"]}
        )

        vote_matcher = VoteMatcher(data["state"])
        if bill:
            vote_matcher.learn_vote_ids(bill["votes"])
        vote_matcher.set_vote_ids(data["votes"])

        # match sponsor leg_ids
        for sponsor in data["sponsors"]:
            id = get_legislator_id(state, data["session"], None, sponsor["name"])
            sponsor["leg_id"] = id

        for vote in data["votes"]:

            # committee_ids
            if "committee" in vote:
                committee_id = get_committee_id(state, vote["chamber"], vote["committee"])
                vote["committee_id"] = committee_id

            # vote leg_ids
            for vtype in ("yes_votes", "no_votes", "other_votes"):
                svlist = []
                for svote in vote[vtype]:
                    id = get_legislator_id(state, data["session"], vote["chamber"], svote)
                    svlist.append({"name": svote, "leg_id": id})

                vote[vtype] = svlist

        data["_term"] = sessions[data["session"]]

        # Merge any version titles into the alternate_titles list
        alt_titles = set(data.get("alternate_titles", []))
        for version in data["versions"]:
            if "title" in version:
                alt_titles.add(version["title"])
            if "+short_title" in version:
                alt_titles.add(version["+short_title"])
        try:
            # Make sure the primary title isn't included in the
            # alternate title list
            alt_titles.remove(data["title"])
        except KeyError:
            pass
        data["alternate_titles"] = list(alt_titles)

        if not bill:
            data["_keywords"] = list(bill_keywords(data))
            insert_with_id(data)
        else:
            data["_keywords"] = list(bill_keywords(data))
            update(bill, data, db.bills)

    print "imported %s bill files" % len(paths)

    for remaining in votes.keys():
        print "Failed to match vote %s %s %s" % tuple([r.encode("ascii", "replace") for r in remaining])

    populate_current_fields(state)
    ensure_indexes()
示例#32
0
def import_committees(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'committees', '*.json')

    meta = db.metadata.find_one({'_id': state})
    current_term = meta['terms'][-1]['name']
    current_session = meta['terms'][-1]['sessions'][-1]

    paths = glob.glob(pattern)

    for committee in db.committees.find({'state': state}):
        committee['members'] = []
        db.committees.save(committee)

    if not paths:
        # Not standalone committees
        for legislator in db.legislators.find({
            'roles': {'$elemMatch': {'term': current_term,
                                     'state': state}}}):

            for role in legislator['roles']:
                if (role['type'] == 'committee member' and
                    'committee_id' not in role):

                    spec = {'state': role['state'],
                            'chamber': role['chamber'],
                            'committee': role['committee']}
                    if 'subcommittee' in role:
                        spec['subcommittee'] = role['subcommittee']

                    committee = db.committees.find_one(spec)

                    if not committee:
                        committee = spec
                        committee['_type'] = 'committee'
                        committee['members'] = []
                        committee['sources'] = []
                        if 'subcommittee' not in committee:
                            committee['subcommittee'] = None
                        insert_with_id(committee)

                    for member in committee['members']:
                        if member['leg_id'] == legislator['leg_id']:
                            break
                    else:
                        committee['members'].append(
                            {'name': legislator['full_name'],
                             'leg_id': legislator['leg_id'],
                             'role': role.get('position') or 'member'})
                        db.committees.save(committee, safe=True)

                        role['committee_id'] = committee['_id']

            db.legislators.save(legislator, safe=True)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        spec = {'state': state,
                'chamber': data['chamber'],
                'committee': data['committee']}
        if 'subcommittee' in data:
            spec['subcommittee'] = data['subcommittee']

        committee = db.committees.find_one(spec)

        if not committee:
            insert_with_id(data)
            committee = data
        else:
            update(committee, data, db.committees)

        for member in committee['members']:
            if not member['name']:
                continue

            leg_id = get_legislator_id(state, current_session,
                                       data['chamber'],
                                       member['name'])

            if not leg_id:
                print "No matches for %s" % member['name'].encode(
                    'ascii', 'ignore')
                member['leg_id'] = None
                continue

            legislator = db.legislators.find_one({'_id': leg_id})

            member['leg_id'] = leg_id

            for role in legislator['roles']:
                if (role['type'] == 'committee member' and
                    role['term'] == current_term and
                    role['committee_id'] == committee['_id']):
                    break
            else:
                new_role = {'type': 'committee member',
                            'committee': committee['committee'],
                            'term': current_term,
                            'chamber': committee['chamber'],
                            'committee_id': committee['_id'],
                            'state': state}
                if 'subcommittee' in committee:
                    new_role['subcommittee'] = committee['subcommittee']
                legislator['roles'].append(new_role)
                legislator['updated_at'] = datetime.datetime.utcnow()
                db.legislators.save(legislator, safe=True)

        db.committees.save(committee, safe=True)

    print 'imported %s committee files' % len(paths)

    link_parents(state)

    ensure_indexes()
示例#33
0
def import_bill(data, votes):
    level = data['level']
    abbr = data[level]
    # clean up bill_id
    data['bill_id'] = fix_bill_id(data['bill_id'])

    # move subjects to scraped_subjects
    subjects = data.pop('subjects', None)

    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    if subjects:
        data['scraped_subjects'] = subjects

    # add loaded votes to data
    bill_votes = votes.pop((data['chamber'], data['session'], data['bill_id']),
                           [])
    data['votes'].extend(bill_votes)

    bill = db.bills.find_one({
        'level': level,
        level: abbr,
        'session': data['session'],
        'chamber': data['chamber'],
        'bill_id': data['bill_id']
    })

    vote_matcher = VoteMatcher(abbr)
    if bill:
        vote_matcher.learn_vote_ids(bill['votes'])
    vote_matcher.set_vote_ids(data['votes'])

    # match sponsor leg_ids
    for sponsor in data['sponsors']:
        id = get_legislator_id(abbr, data['session'], None, sponsor['name'])
        sponsor['leg_id'] = id

    for vote in data['votes']:

        # committee_ids
        if 'committee' in vote:
            committee_id = get_committee_id(level, abbr, vote['chamber'],
                                            vote['committee'])
            vote['committee_id'] = committee_id

        # vote leg_ids
        for vtype in ('yes_votes', 'no_votes', 'other_votes'):
            svlist = []
            for svote in vote[vtype]:
                id = get_legislator_id(abbr, data['session'], vote['chamber'],
                                       svote)
                svlist.append({'name': svote, 'leg_id': id})

            vote[vtype] = svlist

    data['_term'] = term_for_session(abbr, data['session'])

    # Merge any version titles into the alternate_titles list
    alt_titles = set(data.get('alternate_titles', []))
    for version in data['versions']:
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)

    # update keywords
    data['_keywords'] = list(bill_keywords(data))

    if not bill:
        insert_with_id(data)
    else:
        update(bill, data, db.bills)
示例#34
0
def test_deactivate_legislators():
    # Previous term
    leg1 = {
        '_type':
        'person',
        'state':
        'ex',
        'roles': [{
            'type': 'member',
            'chamber': 'upper',
            'state': 'ex',
            'term': 'T1',
            'district': '1',
            'party': 'Democrat',
            'start_date': None,
            'end_date': None
        }],
        'active':
        True,
        'district':
        '1',
        'chamber':
        'upper',
        'party':
        'Democrat'
    }
    leg1_roles = leg1['roles']

    # Current term, no end date
    leg2 = {
        '_type':
        'person',
        'state':
        'ex',
        'roles': [{
            'type': 'member',
            'chamber': 'upper',
            'state': 'ex',
            'term': 'T2',
            'district': '2',
            'party': 'Democrat',
            'start_date': None,
            'end_date': None
        }],
        'active':
        True,
        'district':
        '2',
        'chamber':
        'upper',
        'party':
        'Democrat'
    }
    leg2_roles = leg2['roles']

    # Current term, with end date
    leg3 = {
        '_type':
        'person',
        'state':
        'ex',
        'roles': [{
            'type': 'member',
            'chamber': 'upper',
            'state': 'ex',
            'term': 'T2',
            'district': '3',
            'party': 'Democrat',
            'start_date': None,
            'end_date': datetime.datetime(2012, 1, 1)
        }]
    }
    leg3_roles = leg3['roles']

    id1 = utils.insert_with_id(leg1)
    id2 = utils.insert_with_id(leg2)
    id3 = utils.insert_with_id(leg3)

    legislators.deactivate_legislators('T2', 'ex')

    leg1 = db.legislators.find_one({'_id': id1})
    assert leg1['active'] is False
    assert 'chamber' not in leg1
    assert 'district' not in leg1
    assert 'party' not in leg1
    assert leg1['roles'] == []
    assert leg1['old_roles']['T1'] == leg1_roles

    leg2 = db.legislators.find_one({'_id': id2})
    assert leg2['active'] is True
    assert leg2['chamber'] == 'upper'
    assert leg2['district'] == '2'
    assert leg2['party'] == 'Democrat'
    assert leg2['roles'] == leg2_roles
    assert 'old_roles' not in leg2

    leg3 = db.legislators.find_one({'_id': id3})
    assert leg3['active'] is False
    assert 'chamber' not in leg3
    assert 'district' not in leg3
    assert 'party' not in leg3
    assert leg3['roles'] == []
    assert leg3['old_roles']['T2'] == leg3_roles
示例#35
0
def import_bills(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    meta = db.metadata.find_one({'_id': state})

    # Build a session to term mapping
    sessions = {}
    for term in meta['terms']:
        for session in term['sessions']:
            sessions[session] = term['name']

    votes = import_votes(state, data_dir)

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id
        data['bill_id'] = fix_bill_id(data['bill_id'])

        # move subjects to scraped_subjects
        subjects = data.pop('subjects', None)
        if subjects:
            data['scraped_subjects'] = subjects

        # add loaded votes to data
        bill_votes = votes.pop(
            (data['chamber'], data['session'], data['bill_id']), [])
        data['votes'].extend(bill_votes)

        bill = db.bills.find_one({
            'state': data['state'],
            'session': data['session'],
            'chamber': data['chamber'],
            'bill_id': data['bill_id']
        })

        vote_matcher = VoteMatcher(data['state'])
        if bill:
            vote_matcher.learn_vote_ids(bill['votes'])
        vote_matcher.set_vote_ids(data['votes'])

        # match sponsor leg_ids
        for sponsor in data['sponsors']:
            id = get_legislator_id(state, data['session'], None,
                                   sponsor['name'])
            sponsor['leg_id'] = id

        for vote in data['votes']:

            # committee_ids
            if 'committee' in vote:
                committee_id = get_committee_id(state, vote['chamber'],
                                                vote['committee'])
                vote['committee_id'] = committee_id

            # vote leg_ids
            for vtype in ('yes_votes', 'no_votes', 'other_votes'):
                svlist = []
                for svote in vote[vtype]:
                    id = get_legislator_id(state, data['session'],
                                           vote['chamber'], svote)
                    svlist.append({'name': svote, 'leg_id': id})

                vote[vtype] = svlist

        data['_term'] = sessions[data['session']]

        # Merge any version titles into the alternate_titles list
        alt_titles = set(data.get('alternate_titles', []))
        for version in data['versions']:
            if 'title' in version:
                alt_titles.add(version['title'])
            if '+short_title' in version:
                alt_titles.add(version['+short_title'])
        try:
            # Make sure the primary title isn't included in the
            # alternate title list
            alt_titles.remove(data['title'])
        except KeyError:
            pass
        data['alternate_titles'] = list(alt_titles)

        if not bill:
            data['_keywords'] = list(bill_keywords(data))
            insert_with_id(data)
        else:
            data['_keywords'] = list(bill_keywords(data))
            update(bill, data, db.bills)

    print 'imported %s bill files' % len(paths)

    for remaining in votes.keys():
        print 'Failed to match vote %s %s %s' % tuple(
            [r.encode('ascii', 'replace') for r in remaining])

    populate_current_fields(state)
    ensure_indexes()
示例#36
0
文件: bills.py 项目: msabramo/billy
def import_bill(data, votes, categorizer):
    level = data["level"]
    abbr = data[level]

    # clean up bill_ids
    data["bill_id"] = fix_bill_id(data["bill_id"])
    if "alternate_bill_ids" in data:
        data["alternate_bill_ids"] = [fix_bill_id(bid) for bid in data["alternate_bill_ids"]]

    # move subjects to scraped_subjects
    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    subjects = data.pop("subjects", None)
    if subjects:
        data["scraped_subjects"] = subjects

    # update categorized subjects
    if categorizer:
        categorizer.categorize_bill(data)

    # this is a hack added for Rhode Island where we can't
    # determine the full bill_id, if this key is in the metadata
    # we just use the numeric portion, not ideal as it won't work
    # in states where HB/SBs overlap, but in RI they never do
    if metadata(abbr).get("_partial_vote_bill_id"):
        # pull off numeric portion of bill_id
        numeric_bill_id = data["bill_id"].split()[1]
        bill_votes = votes.pop((data["chamber"], data["session"], numeric_bill_id), [])
    else:
        # add loaded votes to data
        bill_votes = votes.pop((data["chamber"], data["session"], data["bill_id"]), [])

    data["votes"].extend(bill_votes)

    bill = db.bills.find_one(
        {
            "level": level,
            level: abbr,
            "session": data["session"],
            "chamber": data["chamber"],
            "bill_id": data["bill_id"],
        }
    )

    # keep vote/doc ids consistent
    vote_matcher = VoteMatcher(abbr)
    doc_matcher = DocumentMatcher(abbr)
    if bill:
        vote_matcher.learn_ids(bill["votes"])
        doc_matcher.learn_ids(bill["versions"] + bill["documents"])
    vote_matcher.set_ids(data["votes"])
    doc_matcher.set_ids(data["versions"] + data["documents"])

    # match sponsor leg_ids
    for sponsor in data["sponsors"]:
        id = get_legislator_id(abbr, data["session"], None, sponsor["name"])
        sponsor["leg_id"] = id

    for vote in data["votes"]:

        # committee_ids
        if "committee" in vote:
            committee_id = get_committee_id(level, abbr, vote["chamber"], vote["committee"])
            vote["committee_id"] = committee_id

        # vote leg_ids
        for vtype in ("yes_votes", "no_votes", "other_votes"):
            svlist = []
            for svote in vote[vtype]:
                id = get_legislator_id(abbr, data["session"], vote["chamber"], svote)
                svlist.append({"name": svote, "leg_id": id})

            vote[vtype] = svlist

    data["_term"] = term_for_session(abbr, data["session"])

    alt_titles = set(data.get("alternate_titles", []))

    for version in data["versions"]:
        # push versions to oyster
        if settings.ENABLE_OYSTER and "url" in version:
            oysterize_version(data, version)

        # Merge any version titles into the alternate_titles list
        if "title" in version:
            alt_titles.add(version["title"])
        if "+short_title" in version:
            alt_titles.add(version["+short_title"])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data["title"])
    except KeyError:
        pass
    data["alternate_titles"] = list(alt_titles)

    if not bill:
        insert_with_id(data)
        return "insert"
    else:
        update(bill, data, db.bills)
        return "update"
示例#37
0
def import_committee(data, current_session, current_term):
    level = data['level']
    abbr = data[level]
    spec = {'level': level,
            level: abbr,
            'chamber': data['chamber'],
            'committee': data['committee']}
    if 'subcommittee' in data:
        spec['subcommittee'] = data['subcommittee']

    # insert/update the actual committee object
    committee = db.committees.find_one(spec)

    committee_return_status = None

    if not committee:
        insert_with_id(data)
        committee = data
        committee_return_status = "insert"
    else:
        update(committee, data, db.committees)
        committee_return_status = "update"

    # deal with the members, add roles
    for member in committee['members']:
        if not member['name']:
            continue

        leg_id = get_legislator_id(abbr, current_session,
                                   data['chamber'],
                                   member['name'])

        if not leg_id:
            logger.debug("No matches for %s" % member['name'].encode('ascii',
                                                                     'ignore'))
            member['leg_id'] = None
            continue

        legislator = db.legislators.find_one({'_id': leg_id})

        if not legislator:
            logger.warning('No legislator with ID %s' % leg_id)
            member['leg_id'] = None
            continue

        member['leg_id'] = leg_id

        for role in legislator['roles']:
            if (role['type'] == 'committee member' and
                role['term'] == current_term and
                role.get('committee_id') == committee['_id']):
                break
        else:
            new_role = {'type': 'committee member',
                        'committee': committee['committee'],
                        'term': current_term,
                        'chamber': committee['chamber'],
                        'committee_id': committee['_id'],
                        'level': level,
                       }
            # copy over all necessary fields from committee
            for f in settings.BILLY_LEVEL_FIELDS:
                new_role[f] = committee[f]

            if 'subcommittee' in committee:
                new_role['subcommittee'] = committee['subcommittee']
            legislator['roles'].append(new_role)
            legislator['updated_at'] = datetime.datetime.utcnow()
            db.legislators.save(legislator, safe=True)

    db.committees.save(committee, safe=True)
    return committee_return_status
示例#38
0
def import_bills(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    meta = db.metadata.find_one({'_id': state})

    # Build a session to term mapping
    sessions = {}
    for term in meta['terms']:
        for session in term['sessions']:
            sessions[session] = term['name']

    votes = import_votes(state, data_dir)

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id
        data['bill_id'] = fix_bill_id(data['bill_id'])

        # move subjects to scraped_subjects
        subjects = data.pop('subjects', None)

        # NOTE: intentionally doesn't copy blank lists of subjects
        # this avoids the problem where a bill is re-run but we can't
        # get subjects anymore (quite common in fact)
        if subjects:
            data['scraped_subjects'] = subjects

        # add loaded votes to data
        bill_votes = votes.pop((data['chamber'], data['session'],
                                data['bill_id']), [])
        data['votes'].extend(bill_votes)

        bill = db.bills.find_one({'state': data['state'],
                                  'session': data['session'],
                                  'chamber': data['chamber'],
                                  'bill_id': data['bill_id']})

        vote_matcher = VoteMatcher(data['state'])
        if bill:
            vote_matcher.learn_vote_ids(bill['votes'])
        vote_matcher.set_vote_ids(data['votes'])

        # match sponsor leg_ids
        for sponsor in data['sponsors']:
            id = get_legislator_id(state, data['session'], None,
                                   sponsor['name'])
            sponsor['leg_id'] = id

        for vote in data['votes']:

            # committee_ids
            if 'committee' in vote:
                committee_id = get_committee_id(state,
                                                vote['chamber'],
                                                vote['committee'])
                vote['committee_id'] = committee_id

            # vote leg_ids
            for vtype in ('yes_votes', 'no_votes', 'other_votes'):
                svlist = []
                for svote in vote[vtype]:
                    id = get_legislator_id(state, data['session'],
                                           vote['chamber'], svote)
                    svlist.append({'name': svote, 'leg_id': id})

                vote[vtype] = svlist

        data['_term'] = sessions[data['session']]

        # Merge any version titles into the alternate_titles list
        alt_titles = set(data.get('alternate_titles', []))
        for version in data['versions']:
            if 'title' in version:
                alt_titles.add(version['title'])
            if '+short_title' in version:
                alt_titles.add(version['+short_title'])
        try:
            # Make sure the primary title isn't included in the
            # alternate title list
            alt_titles.remove(data['title'])
        except KeyError:
            pass
        data['alternate_titles'] = list(alt_titles)

        if not bill:
            data['_keywords'] = list(bill_keywords(data))
            insert_with_id(data)
        else:
            data['_keywords'] = list(bill_keywords(data))
            update(bill, data, db.bills)

    print 'imported %s bill files' % len(paths)

    for remaining in votes.keys():
        print 'Failed to match vote %s %s %s' % tuple([
            r.encode('ascii', 'replace') for r in remaining])

    populate_current_fields(state)
    ensure_indexes()
示例#39
0
def import_committee(data, current_session, current_term):
    abbr = data[settings.LEVEL_FIELD]
    spec = {
        settings.LEVEL_FIELD: abbr,
        'chamber': data['chamber'],
        'committee': data['committee']
    }
    if 'subcommittee' in data:
        spec['subcommittee'] = data['subcommittee']

    # insert/update the actual committee object
    committee = db.committees.find_one(spec)

    committee_return_status = None

    if not committee:
        insert_with_id(data)
        committee = data
        committee_return_status = "insert"
    else:
        update(committee, data, db.committees)
        committee_return_status = "update"

    # deal with the members, add roles
    for member in committee['members']:
        if not member['name']:
            continue

        leg_id = get_legislator_id(abbr, current_session, data['chamber'],
                                   member['name'])

        if not leg_id:
            logger.debug("No matches for %s" %
                         member['name'].encode('ascii', 'ignore'))
            member['leg_id'] = None
            continue

        legislator = db.legislators.find_one({'_all_ids': leg_id})

        if not legislator:
            logger.warning('No legislator with ID %s' % leg_id)
            member['leg_id'] = None
            continue

        member['leg_id'] = legislator['_id']

        for role in legislator['roles']:
            if (role['type'] == 'committee member'
                    and role['term'] == current_term
                    and role.get('committee_id') == committee['_id']):
                # if the position hadn't been copied over before, copy it now
                if role.get('position') != member['role']:
                    role['position'] = member['role']
                    db.legislators.save(legislator, safe=True)
                break
        else:
            new_role = {
                'type': 'committee member',
                'committee': committee['committee'],
                'term': current_term,
                'chamber': committee['chamber'],
                'committee_id': committee['_id'],
                'position': member['role']
            }
            # copy over all necessary fields from committee
            new_role[settings.LEVEL_FIELD] = committee[settings.LEVEL_FIELD]

            if 'subcommittee' in committee:
                new_role['subcommittee'] = committee['subcommittee']
            legislator['roles'].append(new_role)
            legislator['updated_at'] = datetime.datetime.utcnow()
            db.legislators.save(legislator, safe=True)

    db.committees.save(committee, safe=True)
    return committee_return_status
示例#40
0
def test_deactivate_legislators():
    # Previous term
    leg1 = {'_type': 'person', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'state': 'ex',
                       'term': 'T1', 'district': '1',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}],
            'active': True,
            'district': '1',
            'chamber': 'upper',
            'party': 'Democrat'}
    leg1_roles = leg1['roles']

    # Current term, no end date
    leg2 = {'_type': 'person', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'state': 'ex', 'term': 'T2', 'district': '2',
                       'party': 'Democrat',
                       'start_date': None, 'end_date': None}],
            'active': True,
            'district': '2',
            'chamber': 'upper',
            'party': 'Democrat'}
    leg2_roles = leg2['roles']

    # Current term, with end date
    leg3 = {'_type': 'person', 'state': 'ex',
            'roles': [{'type': 'member', 'chamber': 'upper',
                       'state': 'ex', 'term': 'T2', 'district': '3',
                       'party': 'Democrat',
                       'start_date': None,
                       'end_date': datetime.datetime(2012, 1, 1)}]}
    leg3_roles = leg3['roles']

    id1 = utils.insert_with_id(leg1)
    id2 = utils.insert_with_id(leg2)
    id3 = utils.insert_with_id(leg3)

    legislators.deactivate_legislators('T2', 'ex')

    leg1 = db.legislators.find_one({'_id': id1})
    assert leg1['active'] is False
    assert 'chamber' not in leg1
    assert 'district' not in leg1
    assert 'party' not in leg1
    assert leg1['roles'] == []
    assert leg1['old_roles']['T1'] == leg1_roles

    leg2 = db.legislators.find_one({'_id': id2})
    assert leg2['active'] is True
    assert leg2['chamber'] == 'upper'
    assert leg2['district'] == '2'
    assert leg2['party'] == 'Democrat'
    assert leg2['roles'] == leg2_roles
    assert 'old_roles' not in leg2

    leg3 = db.legislators.find_one({'_id': id3})
    assert leg3['active'] is False
    assert 'chamber' not in leg3
    assert 'district' not in leg3
    assert 'party' not in leg3
    assert leg3['roles'] == []
    assert leg3['old_roles']['T2'] == leg3_roles
示例#41
0
def import_bill(data, standalone_votes, categorizer):
    """
        insert or update a bill

        data - raw bill JSON
        standalone_votes - votes scraped separately
        categorizer - SubjectCategorizer (None - no categorization)
    """
    abbr = data[settings.LEVEL_FIELD]

    # clean up bill_ids
    data['bill_id'] = fix_bill_id(data['bill_id'])
    if 'alternate_bill_ids' in data:
        data['alternate_bill_ids'] = [
            fix_bill_id(bid) for bid in data['alternate_bill_ids']
        ]

    # move subjects to scraped_subjects
    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    subjects = data.pop('subjects', None)
    if subjects:
        data['scraped_subjects'] = subjects

    # update categorized subjects
    if categorizer:
        categorizer.categorize_bill(data)

    # companions
    for companion in data['companions']:
        companion['bill_id'] = fix_bill_id(companion['bill_id'])
        # query based on companion
        spec = companion.copy()
        spec[settings.LEVEL_FIELD] = abbr
        if not spec['chamber']:
            spec.pop('chamber')
        companion_obj = db.bills.find_one(spec)
        if companion_obj:
            companion['internal_id'] = companion_obj['_id']
        else:
            logger.warning(
                'Unknown companion: {chamber} {session} {bill_id}'.format(
                    **companion))

    # look for a prior version of this bill
    bill = db.bills.find_one({
        settings.LEVEL_FIELD: abbr,
        'session': data['session'],
        'chamber': data['chamber'],
        'bill_id': data['bill_id']
    })

    # keep doc ids consistent
    doc_matcher = DocumentMatcher(abbr)
    if bill:
        doc_matcher.learn_ids(bill['versions'] + bill['documents'])
    doc_matcher.set_ids(data['versions'] + data['documents'])

    # match sponsor leg_ids
    match_sponsor_ids(abbr, data)

    # process votes ############

    # pull votes off bill
    bill_votes = data.pop('votes', [])

    # grab the external bill votes if present
    if metadata(abbr).get('_partial_vote_bill_id'):
        # this is a hack initially added for Rhode Island where we can't
        # determine the full bill_id, if this key is in the metadata
        # we just use the numeric portion, not ideal as it won't work
        # where HB/SBs overlap, but in RI they never do
        # pull off numeric portion of bill_id
        numeric_bill_id = data['bill_id'].split()[1]
        bill_votes += standalone_votes.pop(
            (data['chamber'], data['session'], numeric_bill_id), [])
    else:
        # add loaded votes to data
        bill_votes += standalone_votes.pop(
            (data['chamber'], data['session'], data['bill_id']), [])

    # do id matching and other vote prep
    if bill:
        prepare_votes(abbr, data['session'], bill['_id'], bill_votes)
    else:
        prepare_votes(abbr, data['session'], None, bill_votes)

    # process actions ###########

    dates = {
        'first': None,
        'last': None,
        'passed_upper': None,
        'passed_lower': None,
        'signed': None
    }

    vote_flags = {
        "bill:passed", "bill:failed", "bill:veto_override:passed",
        "bill:veto_override:failed", "amendment:passed", "amendment:failed",
        "committee:passed", "committee:passed:favorable",
        "committee:passed:unfavorable", "committee:passed:failed"
    }
    already_linked = set()
    remove_vote = set()

    for action in data['actions']:
        adate = action['date']

        def _match_committee(name):
            return get_committee_id(abbr, action['actor'], name)

        def _match_legislator(name):
            return get_legislator_id(abbr, data['session'], action['actor'],
                                     name)

        resolvers = {
            "committee": _match_committee,
            "legislator": _match_legislator
        }

        if "related_entities" in action:
            for entity in action['related_entities']:
                try:
                    resolver = resolvers[entity['type']]
                except KeyError as e:
                    # We don't know how to deal.
                    logger.error("I don't know how to sort a %s" % e)
                    continue

                id = resolver(entity['name'])
                entity['id'] = id

        # first & last dates
        if not dates['first'] or adate < dates['first']:
            dates['first'] = adate
        if not dates['last'] or adate > dates['last']:
            dates['last'] = adate

        # passed & signed dates
        if (not dates['passed_upper'] and action['actor'] == 'upper'
                and 'bill:passed' in action['type']):
            dates['passed_upper'] = adate
        elif (not dates['passed_lower'] and action['actor'] == 'lower'
              and 'bill:passed' in action['type']):
            dates['passed_lower'] = adate
        elif (not dates['signed'] and 'governor:signed' in action['type']):
            dates['signed'] = adate

        # vote-action matching
        action_attached = False
        # only attempt vote matching if action has a date and is one of the
        # designated vote action types
        if set(action['type']).intersection(vote_flags) and action['date']:
            for vote in bill_votes:
                if not vote['date']:
                    continue

                delta = abs(vote['date'] - action['date'])
                if (delta < datetime.timedelta(hours=20)
                        and vote['chamber'] == action['actor']):
                    if action_attached:
                        # multiple votes match, we can't guess
                        action.pop('related_votes', None)
                    else:
                        related_vote = vote['vote_id']
                        if related_vote in already_linked:
                            remove_vote.add(related_vote)

                        already_linked.add(related_vote)
                        action['related_votes'] = [related_vote]
                        action_attached = True

    # remove related_votes that we linked to multiple actions
    for action in data['actions']:
        for vote in remove_vote:
            if vote in action.get('related_votes', []):
                action['related_votes'].remove(vote)

    # save action dates to data
    data['action_dates'] = dates

    data['_term'] = term_for_session(abbr, data['session'])

    alt_titles = set(data.get('alternate_titles', []))

    for version in data['versions']:
        # Merge any version titles into the alternate_titles list
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)
    data = apply_filters(filters, data)

    if not bill:
        insert_with_id(data)
        elasticsearch_push(data)
        git_add_bill(data)
        save_votes(data, bill_votes)
        return "insert"
    else:
        update(bill, data, db.bills)
        elasticsearch_push(bill)
        git_add_bill(bill)
        save_votes(bill, bill_votes)
        return "update"
示例#42
0
def import_bill(data, votes, categorizer):
    level = data['level']
    abbr = data[level]

    # clean up bill_ids
    data['bill_id'] = fix_bill_id(data['bill_id'])
    if 'alternate_bill_ids' in data:
        data['alternate_bill_ids'] = [
            fix_bill_id(bid) for bid in data['alternate_bill_ids']
        ]

    # move subjects to scraped_subjects
    # NOTE: intentionally doesn't copy blank lists of subjects
    # this avoids the problem where a bill is re-run but we can't
    # get subjects anymore (quite common)
    subjects = data.pop('subjects', None)
    if subjects:
        data['scraped_subjects'] = subjects

    # update categorized subjects
    if categorizer:
        categorizer.categorize_bill(data)

    # this is a hack added for Rhode Island where we can't
    # determine the full bill_id, if this key is in the metadata
    # we just use the numeric portion, not ideal as it won't work
    # in states where HB/SBs overlap, but in RI they never do
    if metadata(abbr).get('_partial_vote_bill_id'):
        # pull off numeric portion of bill_id
        numeric_bill_id = data['bill_id'].split()[1]
        bill_votes = votes.pop(
            (data['chamber'], data['session'], numeric_bill_id), [])
    else:
        # add loaded votes to data
        bill_votes = votes.pop(
            (data['chamber'], data['session'], data['bill_id']), [])

    data['votes'].extend(bill_votes)

    bill = db.bills.find_one({
        'level': level,
        level: abbr,
        'session': data['session'],
        'chamber': data['chamber'],
        'bill_id': data['bill_id']
    })

    # keep vote/doc ids consistent
    vote_matcher = VoteMatcher(abbr)
    doc_matcher = DocumentMatcher(abbr)
    if bill:
        vote_matcher.learn_ids(bill['votes'])
        doc_matcher.learn_ids(bill['versions'] + bill['documents'])
    vote_matcher.set_ids(data['votes'])
    doc_matcher.set_ids(data['versions'] + data['documents'])

    # match sponsor leg_ids
    for sponsor in data['sponsors']:
        id = get_legislator_id(abbr, data['session'], None, sponsor['name'])
        sponsor['leg_id'] = id

    for vote in data['votes']:

        # committee_ids
        if 'committee' in vote:
            committee_id = get_committee_id(level, abbr, vote['chamber'],
                                            vote['committee'])
            vote['committee_id'] = committee_id

        # vote leg_ids
        for vtype in ('yes_votes', 'no_votes', 'other_votes'):
            svlist = []
            for svote in vote[vtype]:
                id = get_legislator_id(abbr, data['session'], vote['chamber'],
                                       svote)
                svlist.append({'name': svote, 'leg_id': id})

            vote[vtype] = svlist

    data['_term'] = term_for_session(abbr, data['session'])

    alt_titles = set(data.get('alternate_titles', []))

    for version in data['versions']:
        # push versions to oyster
        if settings.ENABLE_OYSTER and 'url' in version:
            oysterize_version(data, version)

        # Merge any version titles into the alternate_titles list
        if 'title' in version:
            alt_titles.add(version['title'])
        if '+short_title' in version:
            alt_titles.add(version['+short_title'])
    try:
        # Make sure the primary title isn't included in the
        # alternate title list
        alt_titles.remove(data['title'])
    except KeyError:
        pass
    data['alternate_titles'] = list(alt_titles)

    if not bill:
        insert_with_id(data)
        return "insert"
    else:
        update(bill, data, db.bills)
        return "update"