def build_registration(rows, name_type, name_data): # logging.debug('Head Entry:') # logging.debug(json.dumps(rows)) coc = class_without_brackets(rows['class_type']) if coc in ['PAB', 'WOB']: eo_type = "Debtor" occupation = rows['occupation'] else: eo_type = "Estate Owner" occupation = '' county_text = rows['property_county'].strip() logging.info(' County_text is "%s"', county_text) banks_county = '' if county_text in ['BANKS', ''] and coc in ['PA', 'WO', 'DA']: # Special case for <1% of the data... banks_county = rows['counties'] logging.info(' BANKS county of "%s"', county_text) if county_text in ['NO COUNTY', 'NO COUNTIES', 'BANKS']: county_text = '' pty_desc = rows['property'] parish_district = rows['parish_district'] registration = { "class_of_charge": coc, "registration": { "date": rows['registration_date'], "registration_no": re.sub("[^0-9]", "", str(rows['registration_no'])) }, "parties": [{ "type": eo_type, }], "applicant": { 'name': '', 'address': '', 'key_number': '', 'reference': '' }, "additional_information": "", "migration_data": { 'unconverted_reg_no': rows['registration_no'], 'amend_info': rows['amendment_info'], 'flags': [], 'bankruptcy_county': banks_county } } amend = parse_amend_info(rows['amendment_info']) registration['additional_information'] = amend['additional_information'] if coc in ['PAB', 'WOB']: registration['parties'][0]['occupation'] = occupation registration['parties'][0]['trading_name'] = '' registration['parties'][0]['residence_withheld'] = False registration['parties'][0]['case_reference'] = amend['reference'] registration['parties'][0]['addresses'] = [] address_strings = rows['address'].split(' ') for address in address_strings: addr_obj = { 'type': 'Residence', 'address_string': address } registration['parties'][0]['addresses'].append(addr_obj) if amend['court'] is not None: registration['parties'].append({ 'type': 'Court', 'names': [{ 'type': 'Other', 'other': amend['court'] }] }) else: if rows['address'] is not None and rows['address'] != '': # Some old registers have addresses on non-PAB/WOB regns registration['parties'][0]['addresses'] = [] address_strings = rows['address'].split(' ') for address in address_strings: addr_obj = { 'type': 'Residence', 'address_string': address } registration['parties'][0]['addresses'].append(addr_obj) registration['particulars'] = { 'counties': [reformat_county(county_text)], 'district': parish_district, 'description': pty_desc } registration['parties'][0]['names'] = [name_data] registration['parties'][0]['names'][0]['type'] = name_type return registration
def build_registration(rows, name_type, name_data): # logging.debug('Head Entry:') # logging.debug(json.dumps(rows)) coc = class_without_brackets(rows['class_type']) if coc in ['PAB', 'WOB']: eo_type = "Debtor" occupation = rows['occupation'] else: eo_type = "Estate Owner" occupation = '' county_text = rows['property_county'].strip() logging.info(' County_text is "%s"', county_text) banks_county = '' if county_text in ['BANKS', ''] and coc in [ 'PA', 'WO', 'DA' ]: # Special case for <1% of the data... banks_county = rows['counties'] logging.info(' BANKS county of "%s"', county_text) if county_text in ['NO COUNTY', 'NO COUNTIES', 'BANKS']: county_text = '' pty_desc = rows['property'] parish_district = rows['parish_district'] registration = { "class_of_charge": coc, "registration": { "date": rows['registration_date'], "registration_no": re.sub("[^0-9]", "", str(rows['registration_no'])) }, "parties": [{ "type": eo_type, }], "applicant": { 'name': '', 'address': '', 'key_number': '', 'reference': '' }, "additional_information": "", "migration_data": { 'unconverted_reg_no': rows['registration_no'], 'amend_info': rows['amendment_info'], 'flags': [], 'bankruptcy_county': banks_county } } amend = parse_amend_info(rows['amendment_info']) registration['additional_information'] = amend['additional_information'] if coc in ['PAB', 'WOB']: registration['parties'][0]['occupation'] = occupation registration['parties'][0]['trading_name'] = '' registration['parties'][0]['residence_withheld'] = False registration['parties'][0]['case_reference'] = amend['reference'] registration['parties'][0]['addresses'] = [] address_strings = rows['address'].split(' ') for address in address_strings: addr_obj = {'type': 'Residence', 'address_string': address} registration['parties'][0]['addresses'].append(addr_obj) if amend['court'] is not None: registration['parties'].append({ 'type': 'Court', 'names': [{ 'type': 'Other', 'other': amend['court'] }] }) else: if rows['address'] is not None and rows['address'] != '': # Some old registers have addresses on non-PAB/WOB regns registration['parties'][0]['addresses'] = [] address_strings = rows['address'].split(' ') for address in address_strings: addr_obj = {'type': 'Residence', 'address_string': address} registration['parties'][0]['addresses'].append(addr_obj) registration['particulars'] = { 'counties': [reformat_county(county_text)], 'district': parish_district, 'description': pty_desc } registration['parties'][0]['names'] = [name_data] registration['parties'][0]['names'][0]['type'] = name_type return registration
def handle_additional_rows(registration, rows, app_type): # It's possible for the data to turn up some interesting variants where we have multiple index entries for # a registration. This is uncommon (about 1.3% of the entries), but 1.3% of several million is still a good # number of rows. # Identified cases: # Additional county add extra county to existing row # Additional name add extra names # Additional class of charge add new registration add_regs = [] #"migration_data": { #registration['migration_data']['additional_data'] = [] additional_data = {} for row in rows[1:]: changes = whats_different(row, rows[0]) if "class_type" in changes: add_regs.append(extract_data([row], app_type)) else: # Lovely unrolled loop... well, it's a one-off if "amendment_info" in changes: if 'amendment_info' not in additional_data: additional_data['amendment_info'] = [] additional_data['amendment_info'].append(row['amendment_info']) if "priority_notice" in changes: if 'priority_notice' not in additional_data: additional_data['priority_notice'] = [] additional_data['priority_notice'].append(row['priority_notice']) if "parish_district" in changes: if 'parish_district' not in additional_data: additional_data['parish_district'] = [] additional_data['parish_district'].append(row['parish_district']) if "address" in changes: if 'address' not in additional_data: additional_data['address'] = [] additional_data['address'].append(row['address']) if "property" in changes: if 'property' not in additional_data: additional_data['property'] = [] additional_data['property'].append(row['property']) if "name" in changes: if 'name' not in additional_data: additional_data['name'] = [] additional_data['name'].append(row['name']) if "occupation" in changes: if 'occupation' not in additional_data: additional_data['occupation'] = [] additional_data['occupation'].append(row['occupation']) if "priority_notice_ref" in changes: if 'priority_notice_ref' not in additional_data: additional_data['priority_notice_ref'] = [] additional_data['priority_notice_ref'].append(row['priority_notice_ref']) if "counties" in changes: if 'counties' not in additional_data: additional_data['counties'] = [] additional_data['counties'].append(row['counties']) if "reverse_name" in changes or "remainder_name" in changes or "punctuation_code" in changes: alt_regn = extract_data([row], app_type)[0] if len(alt_regn['parties']) > 0: # logging.debug('Copying names...') for name in alt_regn['parties'][0]['names']: registration['parties'][0]['names'].append(name) if "property_county" in changes and 'particulars' in registration: if row['property_county'] not in ['BANKS', 'NO COUNTY', 'NO COUNTIES'] and row['property_county'] not in registration['particulars']['counties']: registration['particulars']['counties'].append(reformat_county(row['property_county'])) registration['migration_data']['additional_rows'] = additional_data return add_regs
def handle_additional_rows(registration, rows, app_type): # It's possible for the data to turn up some interesting variants where we have multiple index entries for # a registration. This is uncommon (about 1.3% of the entries), but 1.3% of several million is still a good # number of rows. # Identified cases: # Additional county add extra county to existing row # Additional name add extra names # Additional class of charge add new registration add_regs = [] #"migration_data": { #registration['migration_data']['additional_data'] = [] additional_data = {} for row in rows[1:]: changes = whats_different(row, rows[0]) if "class_type" in changes: add_regs.append(extract_data([row], app_type)) else: # Lovely unrolled loop... well, it's a one-off if "amendment_info" in changes: if 'amendment_info' not in additional_data: additional_data['amendment_info'] = [] additional_data['amendment_info'].append(row['amendment_info']) if "priority_notice" in changes: if 'priority_notice' not in additional_data: additional_data['priority_notice'] = [] additional_data['priority_notice'].append( row['priority_notice']) if "parish_district" in changes: if 'parish_district' not in additional_data: additional_data['parish_district'] = [] additional_data['parish_district'].append( row['parish_district']) if "address" in changes: if 'address' not in additional_data: additional_data['address'] = [] additional_data['address'].append(row['address']) if "property" in changes: if 'property' not in additional_data: additional_data['property'] = [] additional_data['property'].append(row['property']) if "name" in changes: if 'name' not in additional_data: additional_data['name'] = [] additional_data['name'].append(row['name']) if "occupation" in changes: if 'occupation' not in additional_data: additional_data['occupation'] = [] additional_data['occupation'].append(row['occupation']) if "priority_notice_ref" in changes: if 'priority_notice_ref' not in additional_data: additional_data['priority_notice_ref'] = [] additional_data['priority_notice_ref'].append( row['priority_notice_ref']) if "counties" in changes: if 'counties' not in additional_data: additional_data['counties'] = [] additional_data['counties'].append(row['counties']) if "reverse_name" in changes or "remainder_name" in changes or "punctuation_code" in changes: alt_regn = extract_data([row], app_type)[0] if len(alt_regn['parties']) > 0: # logging.debug('Copying names...') for name in alt_regn['parties'][0]['names']: registration['parties'][0]['names'].append(name) if "property_county" in changes and 'particulars' in registration: if row['property_county'] not in [ 'BANKS', 'NO COUNTY', 'NO COUNTIES' ] and row['property_county'] not in registration[ 'particulars']['counties']: registration['particulars']['counties'].append( reformat_county(row['property_county'])) registration['migration_data']['additional_rows'] = additional_data return add_regs