Exemplo n.º 1
0
def import_bills(abbr, data_dir):
    data_dir = os.path.join(data_dir, abbr)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    git_prelod(abbr)

    counts = {"update": 0, "insert": 0, "total": 0}

    votes = load_standalone_votes(data_dir)
    try:
        categorizer = SubjectCategorizer(abbr)
    except Exception as e:
        logger.debug('Proceeding without subject categorizer: %s' % e)
        categorizer = None

    paths = glob.glob(pattern)
    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        counts["total"] += 1
        ret = import_bill(data, votes, categorizer)
        counts[ret] += 1

    logger.info('imported %s bill files' % len(paths))

    for remaining in votes.keys():
        logger.debug('Failed to match vote %s %s %s' %
                     tuple([r.encode('ascii', 'replace') for r in remaining]))

    populate_current_fields(abbr)

    git_commit("Import Update")

    return counts
Exemplo n.º 2
0
def test_basic_categorization():

    db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47',
                        'normal': ['Guns', 'Crime']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # simple
    bill = {'scraped_subjects': ['AK-47']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['AK-47'],
                        'subjects': [u'Guns', u'Crime']})

    # no subjects
    bill = {'scraped_subjects': ['Welfare']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['Welfare'],
                        'subjects': []})

    # two subjects
    bill = {'scraped_subjects': ['AK-47', 'Candy']}
    categorizer.categorize_bill(bill)
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food']))

    # avoid duplicates
    bill = {'scraped_subjects': ['AK-47', 'Hunting']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'],
                        'subjects': [u'Guns', u'Crime']})
Exemplo n.º 3
0
def import_bills(abbr, data_dir):
    data_dir = os.path.join(data_dir, abbr)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    counts = {"update": 0, "insert": 0, "total": 0}

    votes = import_votes(data_dir)
    try:
        categorizer = SubjectCategorizer(abbr)
    except Exception as e:
        logger.debug('Proceeding without subject categorizer: %s' % e)
        categorizer = None

    paths = glob.glob(pattern)
    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        counts["total"] += 1
        ret = import_bill(data, votes, categorizer)
        counts[ret] += 1

    logger.info('imported %s bill files' % len(paths))

    for remaining in votes.keys():
        logger.debug('Failed to match vote %s %s %s' %
                     tuple([r.encode('ascii', 'replace') for r in remaining]))

    meta = db.metadata.find_one({'_id': abbr})
    level = meta['level']
    populate_current_fields(level, abbr)

    ensure_indexes()

    return counts
Exemplo n.º 4
0
def test_all_bills_categorization():

    db.subjects.insert({
        'abbr': 'ex',
        'remote': 'AK-47',
        'normal': ['Guns', 'Crime']
    })
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # can insert dummy bills w/ state
    bills = [{
        'scraped_subjects': ['AK-47'],
        'bill_id': '1',
        'state': 'ex'
    }, {
        'scraped_subjects': ['Welfare'],
        'bill_id': '2',
        'state': 'ex'
    }, {
        'scraped_subjects': ['AK-47', 'Candy'],
        'bill_id': '3',
        'state': 'ex'
    }, {
        'scraped_subjects': ['AK-47', 'Hunting'],
        'bill_id': '4',
        'state': 'ex'
    }]
    map(db.bills.insert, bills)

    # run categorization on all bills
    categorizer.categorize_bills()

    # simple
    bill = db.bills.find_one({'bill_id': '1'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))

    # no subjects
    bill = db.bills.find_one({'bill_id': '2'})
    assert_equal(bill['subjects'], [])

    # two subjects
    bill = db.bills.find_one({'bill_id': '3'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food']))

    # avoid duplicates
    bill = db.bills.find_one({'bill_id': '4'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))