def parse_row(self, record): record = self.clean_fields(record) org_types = [ self.orgtype_cache["local-authority"], ] if record.get("local-authority-type"): org_types.append( self.add_org_type( LA_TYPES.get( record.get("local-authority-type"), record.get("local-authority-type"), ) ) ) org_ids = [self.get_org_id(record)] locations = [] # @TODO: map local authority code to GSS to add locations self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("name"), "charityNumber": None, "companyNumber": None, "streetAddress": None, "addressLocality": None, "addressRegion": None, "addressCountry": "Northern Ireland", "postalCode": None, "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": record.get("website"), "location": locations, "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("start-date"), "dateRemoved": record.get("end-date"), "active": record.get("end-date") is None, "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, } ) )
def propose_properties(request): type_ = request.GET.get("type", "Organization") if type_ != "Organization": raise Http404("type must be Organization") limit = int(request.GET.get("limit", "500")) return JsonResponse({ "limit": limit, "type": type_, "properties": Organisation.get_fields_as_properties(), })
def parse_row(self, record): record = self.clean_fields(record) if "casc_orgid" in record.keys(): if not hasattr(self, "coynos"): self.coynos = {} self.coynos[record["casc_orgid"]] = record["ch_orgid"] return address = dict( enumerate( [v.strip() for v in record["address"].split(",", maxsplit=2)])) org_ids = [record["id"]] orgtypes = [ self.orgtype_cache["community-amateur-sports-club"], self.orgtype_cache["sports-club"], ] if record["id"] in self.coynos: org_ids.append(self.coynos[record["id"]]) orgtypes.append(self.orgtype_cache["registered-company"]) self.records.append( Organisation( **{ "org_id": record["id"], "name": record["name"], "charityNumber": None, "companyNumber": None, "streetAddress": address.get(0), "addressLocality": address.get(1), "addressRegion": address.get(2), "addressCountry": None, "postalCode": self.parse_postcode(record["postcode"]), "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in orgtypes], "organisationTypePrimary": orgtypes[0], "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": None, "dateRemoved": None, "active": True, "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_file(self, response, source_url): self.set_download_url(source_url) for row in response.html.find("table#heps-table tbody tr"): cells = [c.text for c in row.find("td")] orgids = [ "-".join([self.org_id_prefix, str(cells[1])]), "-".join(["GB-UKPRN", str(cells[0])]), ] org_types = [ self.orgtype_cache["higher-education"], self.add_org_type( self.hesa_org_types.get(cells[4].strip(), cells[4].strip())), ] self.records.append( Organisation( **{ "org_id": "-".join([self.org_id_prefix, str(cells[1])]), "name": cells[2].strip(), "charityNumber": None, "companyNumber": None, "streetAddress": None, "addressLocality": None, "addressRegion": None, "addressCountry": None, "postalCode": None, "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": None, "dateRemoved": None, "active": True, "parent": None, "orgIDs": orgids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record): if record.get(self.id_field) == "UNKNOWN": return record = self.clean_fields(record) address = ", ".join([ record.get("Address Line {}".format(f)) for f in [1, 2, 3] if record.get("Address Line {}".format(f)) ]) org_types = [ self.orgtype_cache["education"], self.add_org_type(record.get("Management")), self.add_org_type(record.get("Type", "") + " School"), ] self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("Institution Name"), "charityNumber": None, "companyNumber": None, "streetAddress": address, "addressLocality": record.get("Town"), "addressRegion": record.get("Count"), "addressCountry": "Northern Ireland", "postalCode": self.parse_postcode(record.get("Postcode")), "telephone": record.get("Telephone"), "alternateName": [], "email": record.get("Email"), "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[2], "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": None, "dateRemoved": record.get("Date Closed"), "active": record.get("Status") == "Open", "parent": None, "orgIDs": [self.get_org_id(record)], "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def test_fullorganisation_sortname(self): names = ( ("the charity the name", "charity the name"), ("charity the name", "charity the name"), (" the charity the name", "charity the name"), ("the charity the name", "charity the name"), ("' charity the name'", "charity the name"), ("' charity the name'", "charity the name"), (" charity the name", "charity the name"), ("(2charity the name", "2charity the name"), ("Above Us Only Sky?", "above us only sky"), ) for n1, n2 in names: o = Organisation(name=n1) d = FullOrganisation() assert d.prepare_sortname(o) == n2
def parse_row(self, record): record = self.clean_fields(record) address4 = ", ".join([ record.get("Address {}".format(f)) for f in [3, 4] if record.get("Address {}".format(f)) ]) org_types = self.get_org_types(record) if not record.get("School Name") or not record.get("School Number"): return self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("School Name"), "charityNumber": None, "companyNumber": None, "streetAddress": record.get("Address 1"), "addressLocality": record.get("Address 2"), "addressRegion": address4, "addressCountry": "Wales", "postalCode": self.parse_postcode(record.get("Postcode")), "telephone": record.get("Phone Number"), "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": None, "location": self.get_locations(record), "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": None, "dateRemoved": None, "active": True, "parent": None, "orgIDs": [self.get_org_id(record)], "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def do_extend_query(ids, properties): result = {"meta": [], "rows": {}} all_fields = {f["id"]: f for f in Organisation.get_fields_as_properties()} fields = [p["id"] for p in properties if p["id"] in all_fields.keys()] result["meta"] = [all_fields[f] for f in fields] for r in Organisation.objects.filter(org_id__in=ids).values( "org_id", *fields): result["rows"][r["org_id"]] = { k: v for k, v in r.items() if k in fields } # add in rows for any data that is missing for i in ids: if i not in result["rows"]: result["rows"][i] = {k: None for k in fields} return result
def parse_row(self, record): record = self.clean_fields(record) org_types = [ self.orgtype_cache["education"], self.add_org_type( self.depluralise(record.get("EstablishmentTypeGroup (name)"))), self.add_org_type( self.depluralise(record.get("TypeOfEstablishment (name)"))), ] self.records.append( Organisation( org_id=self.get_org_id(record), name=record.get("EstablishmentName"), charityNumber=None, companyNumber=None, streetAddress=record.get("Street"), addressLocality=record.get("Locality"), addressRegion=record.get("Address3"), addressCountry=record.get("Country (name)"), postalCode=self.parse_postcode(record.get("Postcode")), telephone=record.get("TelephoneNum"), alternateName=[], email=None, description=None, organisationType=[o.slug for o in org_types], organisationTypePrimary=org_types[1], url=self.parse_url(record.get("SchoolWebsite")), location=self.get_locations(record), latestIncome=None, dateModified=datetime.datetime.now(), dateRegistered=record.get("OpenDate"), dateRemoved=record.get("CloseDate"), active=record.get("EstablishmentStatus (name)") != "Closed", parent=record.get("PropsName"), orgIDs=self.get_org_ids(record), scrape=self.scrape, source=self.source, spider=self.name, org_id_scheme=self.orgid_scheme, ))
def test_organisation_url(self): urls = ( # original url, cleanUrl, displayUrl ("university.ac.uk", "http://university.ac.uk", "university.ac.uk" ), ("http://www.charity.org.uk/", "http://www.charity.org.uk/", "charity.org.uk"), ("https://www.charity.org.uk/", "https://www.charity.org.uk/", "charity.org.uk"), ("https://charity.org.uk/", "https://charity.org.uk/", "charity.org.uk"), ("//charity.org.uk/", "//charity.org.uk/", "charity.org.uk"), ("https://www.charity.org.uk/www.html", "https://www.charity.org.uk/www.html", "charity.org.uk/www.html"), ("www.charity.org.uk/www.html", "http://www.charity.org.uk/www.html", "charity.org.uk/www.html"), ) for u in urls: o = Organisation(url=u[0]) assert o.cleanUrl == u[1] assert o.displayUrl == u[2]
def parse_row(self, record): record = self.clean_fields(record) self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("name"), "charityNumber": None, "companyNumber": None, "streetAddress": None, "addressLocality": None, "addressRegion": None, "addressCountry": None, "postalCode": None, "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": list(self.orgtype_cache.keys()), "organisationTypePrimary": self.orgtype_cache[ "government-organisation" ], "url": record.get("website"), "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("start-date"), "dateRemoved": record.get("end-date"), "active": record.get("end-date") is None, "parent": None, "orgIDs": [self.get_org_id(record)], "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, } ) )
def parse_row(self, record): record = self.clean_fields(record) if "Charity_number" in record: if not hasattr(self, "extra_names"): self.extra_names = defaultdict(list) self.extra_names[record["Charity_number"]].append( record["Other_names"]) return address, postcode = self.split_address(record.get( "Public address", "")) org_types = [ self.orgtype_cache["registered-charity"], self.orgtype_cache["registered-charity-northern-ireland"], ] org_ids = [self.get_org_id(record)] coyno = self.parse_company_number(record.get("Company number")) if coyno: company_type = self.add_org_type("Registered Company") org_types.append(company_type) org_ids.append("GB-COH-{}".format(coyno)) self.raw_records.append({ **record, "address": ", ".join([a for a in address if a]), "postcode": postcode, }) self.add_org_record( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("Charity name").replace("`", "'"), "charityNumber": "NIC{}".format(record.get(self.id_field)), "companyNumber": coyno, "streetAddress": address[0], "addressLocality": address[1], "addressRegion": address[2], "addressCountry": "Northern Ireland", "postalCode": postcode, "telephone": record.get("Telephone"), "alternateName": self.extra_names.get(record.get(self.id_field), []), "email": record.get("Email"), "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": self.orgtype_cache["registered-charity"], "url": self.parse_url(record.get("Website")), "location": [], "latestIncome": int(record["Total income"]) if record.get("Total income" ) else None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("Date registered"), "dateRemoved": None, "active": record.get("Status") != "Removed", "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record): record = self.clean_fields(record) address, _ = self.split_address(record.get( "Principal Office/Trustees Address", ""), get_postcode=False) org_types = [ self.orgtype_cache["registered-charity"], self.orgtype_cache["registered-charity-scotland"], ] if record.get("Regulatory Type") != "Standard" and record.get( "Regulatory Type"): org_types.append(self.add_org_type(record.get("Regulatory Type"))) if record.get("Designated religious body") == "Yes": org_types.append(self.add_org_type("Designated religious body")) if (record.get("Constitutional Form") == "SCIO (Scottish Charitable Incorporated Organisation)"): org_types.append( self.add_org_type( "Scottish Charitable Incorporated Organisation")) elif (record.get("Constitutional Form") == "CIO (Charitable Incorporated Organisation, E&W)"): org_types.append( self.add_org_type("Charitable Incorporated Organisation")) elif (record.get("Constitutional Form") == "Company (the charity is registered with Companies House)"): org_types.append(self.add_org_type("Registered Company")) org_types.append(self.add_org_type("Incorporated Charity")) elif (record.get( "Constitutional Form" ) == "Trust (founding document is a deed of trust) (other than educational endowment)" ): org_types.append(self.add_org_type("Trust")) elif record.get("Constitutional Form") != "Other" and record.get( "Constitutional Form"): org_types.append( self.add_org_type(record.get("Constitutional Form"))) org_ids = [self.get_org_id(record)] self.raw_records.append(record) self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("Charity Name"), "charityNumber": record.get(self.id_field), "companyNumber": None, "streetAddress": address[0], "addressLocality": address[1], "addressRegion": address[2], "addressCountry": "Scotland", "postalCode": self.parse_postcode(record.get("Postcode")), "telephone": None, "alternateName": [record.get("Known As")] if record.get("Known As") else [], "email": None, "description": record.get("Objectives"), "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": self.parse_url(record.get("Website")), "location": [], "latestIncome": int(record["Most recent year income"] ) if record.get("Most recent year income") else None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("Registered Date"), "dateRemoved": record.get("Ceased Date"), "active": record.get("Charity Status") != "Removed", "parent": record.get( "Parent Charity Name" ), # @TODO: More sophisticated getting of parent charities here "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, row): row = {k.strip().replace(".", "_"): row[k] for k in row} row = self.clean_fields(row) if row.get("CompanyCategory") in self.clg_types: row["CompanyCategory"] = "Company Limited by Guarantee" previous_names = {} sic_codes = [] record = {} for k in row: if k.startswith("PreviousName_"): pn = k.split("_") if row[k] and row[k] != "": if pn[1] not in previous_names: previous_names[pn[1]] = {} if pn[2] == "CONDATE": previous_names[pn[1]][ pn[2]] = datetime.datetime.strptime( row[k], "%d/%m/%Y").date() previous_names[pn[1]]["nameno"] = pn[1] else: previous_names[pn[1]][pn[2]] = row[k] elif k.startswith("SICCode_"): if row[k] and row[k].replace("None Supplied", "") != "": sic_code = row[k].split(" - ", maxsplit=1) sic_codes.append({ "code": sic_code[0].strip(), "name": sic_code[1].strip() }) else: record[k] = row[k] record["previous_names"] = list(previous_names.values()) record["sic_codes"] = sic_codes address1 = [] for f in [ "RegAddress_CareOf", "RegAddress_POBox", "RegAddress_AddressLine1", "RegAddress_AddressLine2", ]: if record.get(f): address1.append(record.get(f)) orgtypes = [ self.orgtype_cache["registered-company"], self.add_org_type(record.get("CompanyCategory")), ] self.add_org_record( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("CompanyName"), "charityNumber": None, "companyNumber": record.get(self.id_field), "streetAddress": ", ".join(address1), "addressLocality": record.get("RegAddress_PostTown"), "addressRegion": record.get("RegAddress_County"), "addressCountry": record.get("RegAddress_Country"), "postalCode": record.get("RegAddress_PostCode"), "telephone": None, "alternateName": record["previous_names"], "email": None, "description": None, "organisationType": [o.slug for o in orgtypes], "organisationTypePrimary": self.add_org_type(record.get("CompanyCategory")), "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("IncorporationDate"), "dateRemoved": record.get("DissolutionDate"), "active": (record.get("CompanyStatus") not in ["Dissolved", "Inactive", "Converted / Closed"] and not record.get("DissolutionDate")), "parent": None, "orgIDs": [self.get_org_id(record)], "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record): if not hasattr(self, "org_ids_seen"): self.org_ids_seen = [] record = self.clean_fields(record) if self.get_org_id(record) in self.org_ids_seen: return self.org_ids_seen.append(self.get_org_id(record)) if not record.get("Society Name"): return add = record["Society Address"] postcode = None if isinstance(add, str): add = add.strip().split(" ") if len(add) > 2: if self.postcode_regex.match(" ".join(add[-2:])): postcode = " ".join(add[-2:]).upper() record["Society Address"] = record[ "Society Address"].replace(postcode, "") address, _ = self.split_address(record["Society Address"], get_postcode=False) address = dict(enumerate(address)) org_ids = [self.get_org_id(record)] orgtypes = [ self.orgtype_cache["mutual"], self.add_org_type(record.get("Registered As")), ] description = "" if record.get("Registration Act"): description = "Registered under {}".format( record.get("Registration Act")) # add org ids for companies if record.get("Registered As") in [ "Community Benefit Society", "Co-operative Society", ]: org_ids.append("GB-COH-RS{}".format( record["Full Registation Number"].zfill(6))) org_record = { "org_id": self.get_org_id(record), "name": record.get("Society Name"), "charityNumber": None, "companyNumber": None, "streetAddress": address.get(0), "addressLocality": address.get(1), "addressRegion": address.get(2), "addressCountry": None, "postalCode": postcode, "telephone": None, "alternateName": [], "email": None, "description": description, "organisationType": [o.slug for o in orgtypes], "organisationTypePrimary": orgtypes[1], "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("Registration Date"), "dateRemoved": record.get("Deregistration Date"), "active": record.get("Society Status", "") != "Deregistered", "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, } self.records.append(Organisation(**org_record))
def parse_row(self, record): record = self.clean_fields(record) address = [] for a in ["line_1", "line_2", "line_3"]: an = record.get("addresses", [{}])[0].get(a) if an and an != "": address.append(an) postcode = record.get("addresses", [{}])[0].get("postcode") if not postcode or postcode == "": postcode = None url = record.get("links")[0] if record.get("links") else None parent = None for r in record.get("relationships", [{}]): if r.get("type") == "Parent": parent = self.org_id_prefix + "-" + r.get("id") orgtype = record.get("types", [])[0] if record.get("types", []) else "Education" orgtype = self.add_org_type(orgtype) self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("name"), "charityNumber": None, "companyNumber": None, "streetAddress": ", ".join(address), "addressLocality": record.get("addresses", [{}])[0].get("city"), "addressRegion": record.get("addresses", [{}])[0].get("state"), "addressCountry": record.get("addresses", [{}])[0].get("country"), "postalCode": postcode, "telephone": None, "alternateName": record.get("aliases", []) + record.get("acronyms", []), "email": record.get("email_address"), "description": None, "organisationType": [orgtype.slug], "organisationTypePrimary": orgtype, "url": url, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": None, "dateRemoved": None, "active": record.get("status") == "active", "parent": parent, "orgIDs": [self.get_org_id(record)] + self.get_org_ids(record.get("external_ids", {})), "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record): record = self.clean_fields(record) org_ids = [self.get_org_id(record)] locations = [] # @TODO: map local authority code to GSS to add locations self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("official-name"), "charityNumber": None, "companyNumber": None, "streetAddress": None, "addressLocality": None, "addressRegion": None, "addressCountry": "Wales", "postalCode": None, "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": list(self.orgtype_cache.keys()), "organisationTypePrimary": self.orgtype_cache["local-authority"], "url": None, "location": locations, "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("start-date"), "dateRemoved": record.get("end-date"), "active": record.get("end-date") is None, "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record): record = self.clean_fields(record) org_types = [ self.add_org_type("Registered Provider of Social Housing"), ] if record.get("Corporate Form"): if record["Corporate Form"] == "Company": org_types.append(self.add_org_type("Registered Company")) org_types.append( self.add_org_type("{} {}".format( record["Designation"], record["Corporate Form"]))) elif record[ "Corporate Form"] == "CIO-Charitable Incorporated Organisation": org_types.append( self.add_org_type("Charitable Incorporated Organisation")) org_types.append(self.add_org_type("Registered Charity")) elif record["Corporate Form"] == "Charitable Company": org_types.append(self.add_org_type("Registered Company")) org_types.append(self.add_org_type("Incorporated Charity")) org_types.append(self.add_org_type("Registered Charity")) elif record["Corporate Form"] == "Unincorporated Charity": org_types.append(self.add_org_type("Registered Charity")) else: org_types.append(self.add_org_type(record["Corporate Form"])) elif record.get("Designation"): org_types.append(self.add_org_type(record["Designation"])) org_ids = [self.get_org_id(record)] locations = [] if record.get("Designation") == "Local Authority": la_codes = LA_LOOKUP.get(record.get(self.id_field)) if la_codes: org_ids.append("GB-LAE-{}".format( la_codes.get("register-code"))) locations.append({ "id": la_codes.get("GSS"), "name": la_codes.get("name"), "geoCode": la_codes.get("GSS"), "geoCodeType": AREA_TYPES.get( la_codes.get("GSS")[0:3], "Local Authority"), }) self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("Organisation Name"), "charityNumber": None, "companyNumber": None, "streetAddress": None, "addressLocality": None, "addressRegion": None, "addressCountry": "England", "postalCode": None, "telephone": None, "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": None, "location": locations, "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("Registration Date"), "dateRemoved": None, "active": True, "parent": None, "orgIDs": org_ids, "scrape": self.scrape, "source": self.source, "spider": self.name, "org_id_scheme": self.orgid_scheme, }))
def parse_row(self, record, org_type=None): record = self.clean_fields(record) org_types = [ self.orgtype_cache["health"], self.orgtype_cache["nhs"], ] if org_type: o = self.add_org_type(org_type) org_types.append(o) address = { "streetAddress": record.get("Address Line 1"), "addressLocality": record.get("Address Line 3"), "addressRegion": record.get("Address Line 5"), "addressCountry": None, } if record.get("Address Line 2"): if address["streetAddress"]: address["streetAddress"] += ", {}".format(record.get("Address Line 2")) else: address["streetAddress"] = record.get("Address Line 2") if record.get("Address Line 4"): if address["addressLocality"]: address["addressLocality"] += ", {}".format( record.get("Address Line 4") ) else: address["addressLocality"] = record.get("Address Line 4") self.records.append( Organisation( **{ "org_id": self.get_org_id(record), "name": record.get("Name"), "charityNumber": None, "companyNumber": None, "streetAddress": address["streetAddress"], "addressLocality": address["addressLocality"], "addressRegion": address["addressRegion"], "addressCountry": address["addressCountry"], "postalCode": record.get("Postcode"), "telephone": record.get("Contact Telephone Number"), "alternateName": [], "email": None, "description": None, "organisationType": [o.slug for o in org_types], "organisationTypePrimary": org_types[0], "url": None, "location": [], "latestIncome": None, "dateModified": datetime.datetime.now(), "dateRegistered": record.get("Open Date"), "dateRemoved": record.get("Close Date"), "active": record.get("Close Date") is None, "parent": record.get("Parent Organisation Code"), "orgIDs": [self.get_org_id(record)], "scrape": self.scrape, "source": self.sources[org_type], "spider": self.name, "org_id_scheme": self.orgid_scheme, } ) )