def do_logic(self, raw_string): """ Method performing the logic of parsing raw data into item class """ # NOTE: usually the register uses 'Hours' to note time taken - very clunky but works the moment # TODO: need to be able to parse hours values into mins, hours, days - basically interpret whatever # was filled in on their form. # TODO: need to decipher if the amount earned, was donated to party, charity etc # TODO: if possible find the employer - could also check companies house to investigate further the links # of the employer, other directors, other MPs, relatives etc if 'Hours:' in raw_string: amount = regex_for_amount(raw_string) if 'monthly salary' in raw_string.lower(): amount = amount * 12 elif 'a month' in raw_string.lower(): amount = amount * 12 next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) self.items.append( EmploymentItem(item_id, self.category_id, raw_string, pretty, registered, amount))
def do_logic(self, raw_string): """ Method performing the logic of parsing raw data into dictionary """ amount = 0 next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) # hold a list of lists with search pair, to then regex out the value between them regex_pairs = [] regex_pairs.append(['Name of donor: ', 'Address of donor: ']) regex_pairs.append(['Address of donor: ', 'Amount of donation ']) regex_pairs.append( ['Address of donor: ', 'Estimate of the probable value']) regex_pairs.append(['Destination of visit: ', 'Dates of visit: ']) regex_pairs.append(['Destination of visit: ', 'Date of visit: ']) regex_pairs.append(['Dates of visit: ', 'Purpose of visit: ']) regex_pairs.append(['Date of visit: ', 'Purpose of visit: ']) regex_pairs.append(['Purpose of visit: ', r'\(Registered ']) amount = regex_for_amount(raw_string) donor = None address = None destination = None dates = None purpose = None for pair in regex_pairs: regex_search = get_regex_pair_search(pair, raw_string) if regex_search: value = regex_search.group(1) if 'name of donor' in pair[0].lower(): donor = value elif 'address of donor' in pair[0].lower(): address = value elif 'destination' in pair[0].lower(): destination = value elif 'date' in pair[0].lower(): dates = value elif 'dates' in pair[0].lower(): dates = value elif 'purpose' in pair[0].lower(): pretty = value item = VisitsOutsideUKItem(item_id, self.category_id, raw_string, pretty, registered, amount) item.donor = donor item.address = address item.destination = destination item.dates = dates self.items.append(item)
def do_logic(self, raw_string, raw_data): """ Method performing the logic of parsing raw data into dictionary """ next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) amount = regex_for_amount(raw_string) donor = raw_data['raw_string'] address = '' status = '' for key in raw_data: if 'name of donor' in key.lower(): donor = raw_data[key] pretty = donor elif 'address' in key.lower(): address = raw_data[key] elif 'status' in key.lower(): status = raw_data[key] item = GiftsItem(item_id, self.category_id, raw_string, pretty, registered, amount) item.donor = donor item.address = address item.status = status item.raw_data = raw_data item.lookup() self.items.append(item)
def do_logic(self, raw_string): """ Method performing the logic of parsing raw data into dictionary """ amount = 0 next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string registered = regex_for_registered(raw_string) # hold a list of lists with search pair, to then regex out the value between them regex_pairs = [] regex_pairs.append(['Name of donor: ', 'Address of donor: ']) regex_pairs.append([ 'Address of donor: ', 'Amount of donation or nature and value if donation in kind: ' ]) regex_pairs.append([ 'Amount of donation or nature and value if donation in kind:', 'Date received: ' ]) regex_pairs.append(['Donor status: ', r'\(Registered ']) amount = regex_for_amount(raw_string) donor = None address = None status = None for pair in regex_pairs: regex_search = get_regex_pair_search(pair, raw_string) if regex_search: value = regex_search.group(1) if 'name of donor' in pair[0].lower(): donor = value pretty = value elif 'address of donor' in pair[0].lower(): address = value elif 'donor status' in pair[0].lower(): status = value item = IndirectDonationsItem(item_id, self.category_id, raw_string, pretty, registered, amount) item.donor = donor item.address = address item.status = status self.items.append(item)
def do_logic(self, raw_string, raw_data): """ Method performing the logic of parsing raw data into item class """ next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) amount = regex_for_amount(raw_string) self.items.append(MiscellaneousItem(item_id, self.category_id, raw_string, pretty, registered, amount))
def do_logic(self, raw_string, raw_data): """ Method performing the logic of parsing raw data into dictionary """ amount = 0 next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) amount = regex_for_amount(raw_string) donor = raw_data['raw_string'] address = '' destination = '' purpose = '' status = 'visit' indiv = re.search('\([0-9]+\)', donor) for key in raw_data: if 'purpose' in key.lower(): purpose = raw_data[key] elif 'destination' in key.lower(): destination = raw_data[key] for key in raw_data: if 'name of donor' in key.lower(): # name of donor might be: (1) Policy Network (2) Les Gracques # split to list # TODO donor = raw_data[key] pretty = donor elif 'address' in key.lower(): address = raw_data[key] item = VisitsOutsideUKItem(item_id, self.category_id, raw_string, pretty, registered, amount) item.donor = donor item.address = address item.destination = destination item.purpose = purpose item.status = status item.lookup() self.items.append(item)
def do_logic(self, raw_string, raw_data): """ Method performing the logic of parsing raw data into item class. Here im just look for the company link. I need to verify that the mp has significant control or is an officer to double check. """ matched_company = {} next_id = len(self.items) + 1 item_id = '%04d' % next_id # not much we can really split on pretty = raw_string.split(' (Registered')[0] registered = regex_for_registered(raw_string) if self.category_description == 'Shareholdings': amount = 1 amount = regex_for_percent(raw_string) elif self.category_description == 'Other Shareholdings': amount = regex_for_amount(raw_string) if amount == 0: amount = 70000 # were looking for: company = None url = '' # print '\tRaw String : %s' % raw_string # check if we have pre-defined this if raw_string in urls.keys(): # print '\tPatched : %s' % urls[raw_string] company = patched_company(urls[raw_string]) # test the patch url, may have been set to '' if company.has_key('company_name'): company['title'] = company['company_name'] else: # spoof the record company['title'] = pretty company['links'] = {'self': ''} company_search_string = '' if not company and company != {}: # no patches have been applied company_search_string = make_search_string(raw_string) # print '\tSearch String : %s' % company_search_string # do the query limit = '50' if len(company_search_string.split(' ')) > 5: limit = '100' companies = CompaniesHouseCompanySearch( queries=[company_search_string], limit=limit) for i in companies.data: # check for matches, when one if found, break the loop if check_match(i, company_search_string, self.month, self.year, self.first, self.middle, self.last, self.display): company = i break if company: # print '\tMatched Company : %s' % company['title'] link = company['links']['self'] url = 'https://beta.companieshouse.gov.uk%s' % link else: company = { 'title': raw_string, 'links': { 'self': '' }, 'company_status': 'active', 'company_name': raw_string, } # print '\tUnmatched Company : %s' % company['title'] self.items.append( ShareholdingsItem(item_id, self.category_id, raw_string, pretty, registered, amount, company, url))