Exemplo n.º 1
0
    def read(self, request, **kwargs):
        kwargs.update({'name': request.GET.get('name', '')})

        out = super(DetailExplorerHandler, self).read(request, **kwargs)

        from name_cleaver import OrganizationNameCleaver, IndividualNameCleaver, PoliticianNameCleaver
        from django.contrib.humanize.templatetags.humanize import intcomma
        from django.template.defaultfilters import slugify

        for row in out:
            row['lobbyist_name_standardized'] = IndividualNameCleaver(
                row['lobbyist_name']).parse(
                ) if row['lobbyist_name'] else row['lobbyist_name']
            row['lobbyist_name_slug'] = slugify(
                row['lobbyist_name_standardized'])

            row['firm_name_standardized'] = OrganizationNameCleaver(
                row['firm_name']).parse(
                ) if row['firm_name'] else row['firm_name']
            row['firm_name_slug'] = slugify(row['firm_name_standardized'])

            if row['recipient_id']:
                row['recipient_name_standardized'] = PoliticianNameCleaver(
                    row['recipient_name']).parse()
            else:
                row['recipient_name_standardized'] = OrganizationNameCleaver(
                    row['recipient_name']).parse()
            row['recipient_name_slug'] = slugify(
                row['recipient_name_standardized'])

            row['total_amount_standardized'] = intcomma(row['total_amount'])
        return out
Exemplo n.º 2
0
 def test_capitalizes_letter_after_slash(self):
     self.assertEqual(
         'Health Services/Hmos',
         str(OrganizationNameCleaver('HEALTH SERVICES/HMOS').parse()))
     self.assertEqual(
         'Lawyers/Law Firms',
         str(OrganizationNameCleaver('LAWYERS/LAW FIRMS').parse()))
Exemplo n.º 3
0
 def test_capitalize_scottish_names(self):
     self.assertEqual(
         'McDonnell Douglas',
         str(OrganizationNameCleaver('MCDONNELL DOUGLAS').parse()))
     self.assertEqual(
         'MacDonnell Douglas',
         str(OrganizationNameCleaver('MACDONNELL DOUGLAS').parse()))
Exemplo n.º 4
0
 def test_expand(self):
     self.assertEqual(
         'Raytheon Corporation',
         OrganizationNameCleaver('Raytheon Corp.').parse().expand())
     self.assertEqual(
         'Massachusetts Institute of Technology',
         OrganizationNameCleaver(
             'Massachusetts Inst. of Technology').parse().expand())
Exemplo n.º 5
0
 def test_dont_strip_after_hyphens_too_soon_in_a_name(self):
     self.assertEqual(
         'US-Russia Business Council',
         OrganizationNameCleaver(
             'US-Russia Business Council').parse().kernel())
     self.assertEqual(
         'Wal-Mart Stores',
         OrganizationNameCleaver('Wal-Mart Stores, Inc.').parse().kernel())
Exemplo n.º 6
0
def normalize_organization(alias):
    parts = OrganizationNameCleaver(alias).parse(safe=True)

    if isinstance(parts, (str, unicode)):
        return [parts]

    standardized = parts.__str__()
    expanded = parts.expand()

    if standardized == expanded:
        return [standardized]
    else:
        return [standardized, expanded]
Exemplo n.º 7
0
def normalize_organization(alias):
    parts = OrganizationNameCleaver(alias).parse(safe=True)

    if isinstance(parts, (str, unicode)):
        return [parts]

    standardized = parts.__str__()
    expanded = parts.expand()

    if standardized == expanded:
        return [standardized]
    else:
        return [standardized, expanded]
def process_file(filingnum, csvwriter, name):
    f1 = filing(filingnum)
    f1.download()
    form = f1.get_form_type()
    version = f1.get_version()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        print "Not a parseable form: %s - %s" % (form, filingnum)
        return

    print "Found form: %s - %s" % (form, filingnum)
    #rows =  f1.get_all_rows()
    rows = f1.get_rows('^SB')
    #print "rows: %s" % rows
    for row in rows:
        # the last line is empty, so don't try to parse it
        if len(row) > 1:
            #print "in filing: %s" % filingnum
            parsed_line = fp.parse_form_line(row, version)
            orgname = parsed_line['payee_organization_name'].replace('"', '')

            parsed_line['orgname_parsed'] = str(
                OrganizationNameCleaver(orgname).parse())
            parsed_line['committee_name'] = name
            #map_parsed_line(parsed_line)
            csvwriter.writerow(parsed_line)
Exemplo n.º 9
0
 def test_strip_hyphens_more_than_three_characters_into_a_name(self):
     # This is not ideal for this name, but we can't get the best for all cases
     self.assertEqual(
         'F Hoffmann',
         OrganizationNameCleaver(
             'F. HOFFMANN-LA ROCHE LTD and its Affiliates').parse().kernel(
             ))
Exemplo n.º 10
0
    def test_kernel(self):
        """
        Intended to get only the unique/meaningful words out of a name
        """
        self.assertEqual(
            'Massachusetts Technology',
            OrganizationNameCleaver(
                'Massachusetts Inst. of Technology').parse().kernel())
        self.assertEqual(
            'Massachusetts Technology',
            OrganizationNameCleaver(
                'Massachusetts Institute of Technology').parse().kernel())

        self.assertEqual(
            'Walsh',
            OrganizationNameCleaver('The Walsh Group').parse().kernel())

        self.assertEqual(
            'Health Net',
            OrganizationNameCleaver('Health Net Inc').parse().kernel())
        self.assertEqual(
            'Health Net',
            OrganizationNameCleaver('Health Net, Inc.').parse().kernel())

        self.assertEqual(
            'Distilled Spirits Council',
            OrganizationNameCleaver(
                'Distilled Spirits Council of the U.S., Inc.').parse().kernel(
                ))
Exemplo n.º 11
0
from django.http import Http404
from django.template.defaultfilters import slugify
from settings import api, LATEST_CYCLE, DEFAULT_CYCLE
import datetime
import googleanalytics
import re
from django.utils.datastructures import SortedDict
from name_cleaver import PoliticianNameCleaver, OrganizationNameCleaver, \
        IndividualNameCleaver
from name_cleaver.names import PoliticianName


_standardizers = {
    'politician': lambda n: PoliticianNameCleaver(n).parse(),
    'individual': lambda n: IndividualNameCleaver(n).parse(),
    'industry': lambda n: OrganizationNameCleaver(n).parse(),
    'organization': lambda n: OrganizationNameCleaver(n).parse(),
}

def standardize_name(name, type):
    try:
        standardized_name = _standardizers[type](name)
        if standardized_name.honorific:
            standardized_name.honorific = "("+standardized_name.honorific+")"
        return standardized_name
    except AttributeError:
        return _standardizers[type](name)

def bar_validate(data):
    ''' take a dict formatted for submission to the barchart
     generation function, and make sure there's data worth displaying.
Exemplo n.º 12
0
    def build_section_data(self):
        entity_id, cycle, standardized_name, external_ids = self.entity.entity_id, self.entity.cycle, self.entity.standardized_name, self.entity.external_ids
        self.contributions_data = True

        candidates_barchart_data = []
        for record in self.data['recipient_candidates']:
            candidates_barchart_data.append({
                'key':
                generate_label(
                    str(
                        PoliticianNameCleaver(
                            record['recipient_name']).parse().plus_metadata(
                                record['party'], record['state']))),
                'value':
                record['amount'],
                'href':
                barchart_href(record, cycle, entity_type="politician"),
            })
        self.candidates_barchart_data = json.dumps(
            bar_validate(candidates_barchart_data))

        orgs_barchart_data = []
        for record in self.data['recipient_orgs']:
            orgs_barchart_data.append({
                'key':
                generate_label(
                    str(
                        OrganizationNameCleaver(
                            record['recipient_name']).parse())),
                'value':
                record['amount'],
                'href':
                barchart_href(record, cycle, entity_type="organization"),
            })
        self.orgs_barchart_data = json.dumps(bar_validate(orgs_barchart_data))

        for key, values in self.data['party_breakdown'].iteritems():
            self.data['party_breakdown'][key] = float(values[1])
        self.party_breakdown = json.dumps(
            pie_validate(self.data['party_breakdown']))

        # if none of the charts have data, or if the aggregate total
        # received was negative, then suppress that whole content
        # section except the overview bar
        amount = int(
            float(self.entity.metadata['entity_info']['totals']
                  ['contributor_amount']))
        if amount < 0:
            self.suppress_contrib_graphs = True
            self.reason = "negative"

        elif (not self.candidates_barchart_data and not self.orgs_barchart_data
              and not self.party_breakdown):
            self.suppress_contrib_graphs = True
            self.reason = 'empty'

        self.external_links = external_sites.get_contribution_links(
            'individual', standardized_name, external_ids, cycle)

        self.bundling_data = [[
            x[key] for key in
            'recipient_entity recipient_name recipient_type firm_entity firm_name amount'
            .split()
        ] for x in self.data['bundling']]
Exemplo n.º 13
0
 def test_expand_with_two_tokens_to_expand(self):
     self.assertEqual(
         'Merck & Company Incorporated',
         OrganizationNameCleaver('Merck & Co., Inc.').parse().expand())
Exemplo n.º 14
0
 def test_organization(self):
     self.assertEqual(u'\u00C6tna, Inc.'.encode('utf-8'), \
             str(OrganizationNameCleaver(u'\u00C6tna, Inc.').parse()))
Exemplo n.º 15
0
 def test_dont_capitalize_just_anything_starting_with_mac(self):
     self.assertEqual(
         'Machinists/Aerospace Workers Union',
         str(
             OrganizationNameCleaver(
                 'MACHINISTS/AEROSPACE WORKERS UNION').parse()))
Exemplo n.º 16
0
 def test_doesnt_bother_names_containing_string_pac(self):
     self.assertEqual('Pacific Trust',
                      str(OrganizationNameCleaver('PACIFIC TRUST').parse()))
Exemplo n.º 17
0
    def build_section_data(self):
        entity_id, cycle, type, standardized_name, external_ids = self.entity.entity_id, self.entity.cycle, self.entity.type, self.entity.standardized_name, self.entity.external_ids
        amount = int(
            float(self.entity.metadata['entity_info']['totals']
                  ['contributor_amount']))

        if type == 'industry':
            self.top_orgs = json.dumps([{
                'key':
                generate_label(
                    str(OrganizationNameCleaver(org['name']).parse())),
                'value':
                org['total_amount'],
                'value_employee':
                org['employee_amount'],
                'value_pac':
                org['direct_amount'],
                'href':
                barchart_href(org, cycle, 'organization')
            } for org in self.data['industry_orgs']])

        self.contributions_data = True

        pol_recipients_barchart_data = []
        for record in self.data['recipients']:
            pol_recipients_barchart_data.append({
                'key':
                generate_label(
                    str(
                        PoliticianNameCleaver(
                            record['name']).parse().plus_metadata(
                                record['party'], record['state']))),
                'value':
                record['total_amount'],
                'value_employee':
                record['employee_amount'],
                'value_pac':
                record['direct_amount'],
                'href':
                barchart_href(record, cycle, entity_type='politician')
            })
        self.pol_recipients_barchart_data = json.dumps(
            bar_validate(pol_recipients_barchart_data))

        pacs_barchart_data = []
        for record in self.data['recipient_pacs']:
            pacs_barchart_data.append({
                'key':
                generate_label(
                    str(OrganizationNameCleaver(record['name']).parse())),
                'value':
                record['total_amount'],
                'value_employee':
                record['employee_amount'],
                'value_pac':
                record['direct_amount'],
                'href':
                barchart_href(record, cycle, entity_type="organization"),
            })
        self.pacs_barchart_data = json.dumps(bar_validate(pacs_barchart_data))

        for key, values in self.data['party_breakdown'].iteritems():
            self.data['party_breakdown'][key] = float(values[1])
        self.party_breakdown = json.dumps(
            pie_validate(self.data['party_breakdown']))

        for key, values in self.data['level_breakdown'].iteritems():
            self.data['level_breakdown'][key] = float(values[1])
        self.level_breakdown = json.dumps(
            pie_validate(self.data['level_breakdown']))

        # if none of the charts have data, or if the aggregate total
        # received was negative, then suppress that whole content
        # section except the overview bar
        if amount <= 0:
            self.suppress_contrib_graphs = True
            if amount < 0:
                self.reason = "negative"

        elif (not self.pol_recipients_barchart_data
              and not self.party_breakdown and not self.level_breakdown
              and not self.pacs_barchart_data):
            self.suppress_contrib_graphs = True
            self.reason = 'empty'

        self.external_links = external_sites.get_contribution_links(
            type, standardized_name, external_ids, cycle)

        self.bundling_data = [[
            x[key] for key in
            'recipient_entity recipient_name recipient_type lobbyist_entity lobbyist_name firm_name amount'
            .split()
        ] for x in self.data['bundling']]

        if int(cycle) != -1:
            self.fec_indexp = self.data['fec_indexp']

            if self.data['fec_summary'] and self.data['fec_summary'][
                    'num_committee_filings'] > 0 and self.data[
                        'fec_summary'].get('first_filing_date'):
                self.fec_summary = self.data['fec_summary']
                self.fec_summary['clean_date'] = datetime.datetime.strptime(
                    self.fec_summary['first_filing_date'], "%Y-%m-%d")
                top_contribs_data = [
                    dict(key=generate_label(
                        row['contributor_name']
                        if row['contributor_name'] else '<Name Missing>', 27),
                         value=row['amount'],
                         href='') for row in self.data['fec_top_contribs']
                    if float(row['amount']) >= 100000
                ]
                if top_contribs_data:
                    self.fec_top_contribs_data = json.dumps(top_contribs_data)

            if getattr(self, 'fec_indexp', False) or getattr(
                    self, 'fec_summary', False):
                self.include_fec = True
Exemplo n.º 18
0
 def test_parse_safe__organization(self):
     self.assertEqual('', OrganizationNameCleaver(None).parse(safe=True))
Exemplo n.º 19
0
 def test_handles_empty_names(self):
     self.assertEqual('', str(OrganizationNameCleaver('').parse()))
def standardize_industry_name_filter(name):
    return str(OrganizationNameCleaver(name).parse())
Exemplo n.º 21
0
 def test_capitalize_pac(self):
     self.assertEqual(
         'Nancy Pelosi Leadership PAC',
         str(
             OrganizationNameCleaver(
                 'NANCY PELOSI LEADERSHIP PAC').parse()))
Exemplo n.º 22
0
 def test_make_single_word_names_ending_in_pac_all_uppercase(self):
     self.assertEqual('ECEPAC',
                      str(OrganizationNameCleaver('ECEPAC').parse()))
Exemplo n.º 23
0
 def test_overrides_dumb_python_titlecasing_for_apostrophes(self):
     self.assertEqual(
         'Phoenix Women\'s Health Center',
         str(
             OrganizationNameCleaver(
                 'PHOENIX WOMEN\'S HEALTH CENTER').parse()))
Exemplo n.º 24
0
 def test_names_starting_with_PAC(self):
     self.assertEqual(
         'PAC For Engineers',
         str(OrganizationNameCleaver('PAC FOR ENGINEERS').parse()))
     self.assertEqual('PAC 102',
                      str(OrganizationNameCleaver('PAC 102').parse()))
Exemplo n.º 25
0
 def test_capitalizes_letter_after_hyphen(self):
     self.assertEqual(
         'Non-Profit Institutions',
         str(OrganizationNameCleaver('NON-PROFIT INSTITUTIONS').parse()))
     self.assertEqual('Pro-Israel',
                      str(OrganizationNameCleaver('PRO-ISRAEL').parse()))
Exemplo n.º 26
0
    def build_section_data(self):
        entity_id, standardized_name, cycle, external_ids = self.entity.entity_id, self.entity.standardized_name, self.entity.cycle, self.entity.external_ids

        self.contributions_data = True

        contributors_barchart_data = []
        for record in self.data['top_contributors']:
            contributors_barchart_data.append({
                'key':
                generate_label(
                    str(OrganizationNameCleaver(record['name']).parse())),
                'value':
                record['total_amount'],
                'value_employee':
                record['employee_amount'],
                'value_pac':
                record['direct_amount'],
                'href':
                barchart_href(record, cycle, 'organization')
            })
        contributors_barchart_data = bar_validate(contributors_barchart_data)
        self.contributors_barchart_data = json.dumps(
            contributors_barchart_data)

        industries_barchart_data = []
        for record in self.data['top_industries']:
            industries_barchart_data.append({
                'key':
                generate_label(
                    str(OrganizationNameCleaver(record['name']).parse())),
                'href':
                barchart_href(record, cycle, 'industry'),
                'value':
                record['amount'],
            })
        industries_barchart_data = bar_validate(industries_barchart_data)
        self.industries_barchart_data = json.dumps(industries_barchart_data)

        for key, values in self.data['local_breakdown'].iteritems():
            # values is a list of [count, amount]
            self.data['local_breakdown'][key] = float(values[1])
        self.data['local_breakdown'] = pie_validate(
            self.data['local_breakdown'])
        self.local_breakdown = json.dumps(self.data['local_breakdown'])

        for key, values in self.data['entity_breakdown'].iteritems():
            # values is a list of [count, amount]
            self.data['entity_breakdown'][key] = float(values[1])
        self.data['entity_breakdown'] = pie_validate(
            self.data['entity_breakdown'])
        self.entity_breakdown = json.dumps(self.data['entity_breakdown'])

        # if none of the charts have data, or if the aggregate total
        # received was negative, then suppress that whole content
        # section except the overview bar
        amount = int(
            float(self.entity.metadata['entity_info']['totals']
                  ['recipient_amount']))
        if amount < 0:
            self.suppress_contrib_graphs = True
            self.reason = "negative"
        elif not any(
            (industries_barchart_data, contributors_barchart_data,
             self.data['local_breakdown'], self.data['entity_breakdown'])):
            self.suppress_contrib_graphs = True
            self.reason = 'empty'

        pct_unknown = 0
        if amount:
            pct_unknown = float(self.data['industries_unknown_amount'].get(
                'amount', 0)) * 100 / amount
        self.pct_known = int(round(100 - pct_unknown))

        self.external_links = external_sites.get_contribution_links(
            'politician', standardized_name.name_str(), external_ids, cycle)
        if self.partytime_link:
            self.external_links.append({
                'url': self.partytime_link,
                'text': 'Party Time'
            })

        self.bundling_data = [[
            x[key] for key in
            'lobbyist_entity lobbyist_name firm_entity firm_name amount'.split(
            )
        ] for x in self.data['bundling']]

        if self.fec_summary:
            self.include_fec = True

            if self.fec_summary and 'date' in self.fec_summary:
                self.fec_summary['clean_date'] = datetime.datetime.strptime(
                    self.fec_summary['date'], "%Y-%m-%d")

            timelines = []
            for pol in self.data['fec_timeline']:
                tl = {
                    'name':
                    pol['candidate_name'],
                    'party':
                    pol['party'],
                    'is_this':
                    pol['entity_id'] == entity_id,
                    'timeline':
                    map(lambda item: item
                        if item >= 0 else 0, pol['timeline']),
                    'href':
                    '/politician/%s/%s?cycle=%s' % (slugify(
                        PoliticianNameCleaver(
                            pol['candidate_name']).parse().name_str()),
                                                    pol['entity_id'], cycle)
                }
                tl['sum'] = sum(tl['timeline'])
                timelines.append(tl)
            timelines.sort(key=lambda t: (int(t['is_this']), t['sum']),
                           reverse=True)
            # restrict to top 5, and only those receiving at least 10% of this pol's total
            if timelines:
                this_sum = timelines[0]['sum']
                timelines = [
                    timeline for timeline in timelines
                    if timeline['sum'] > 0.1 * this_sum
                ]
                timelines = timelines[:5]

            self.fec_timelines = json.dumps(timelines)