Пример #1
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        org_types = [
            self.orgtype_cache["local-authority"],
        ]

        if record.get("local-authority-type"):
            org_types.append(
                self.add_org_type(
                    LA_TYPES.get(
                        record.get("local-authority-type"),
                        record.get("local-authority-type"),
                    )
                )
            )
        org_ids = [self.get_org_id(record)]

        locations = []
        # @TODO: map local authority code to GSS to add locations

        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": None,
                    "addressLocality": None,
                    "addressRegion": None,
                    "addressCountry": "Northern Ireland",
                    "postalCode": None,
                    "telephone": None,
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary": org_types[0],
                    "url": record.get("website"),
                    "location": locations,
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": record.get("start-date"),
                    "dateRemoved": record.get("end-date"),
                    "active": record.get("end-date") is None,
                    "parent": None,
                    "orgIDs": org_ids,
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }
            )
        )
Пример #2
0
def propose_properties(request):
    type_ = request.GET.get("type", "Organization")
    if type_ != "Organization":
        raise Http404("type must be Organization")

    limit = int(request.GET.get("limit", "500"))

    return JsonResponse({
        "limit": limit,
        "type": type_,
        "properties": Organisation.get_fields_as_properties(),
    })
Пример #3
0
    def parse_row(self, record):

        record = self.clean_fields(record)
        if "casc_orgid" in record.keys():
            if not hasattr(self, "coynos"):
                self.coynos = {}
            self.coynos[record["casc_orgid"]] = record["ch_orgid"]
            return

        address = dict(
            enumerate(
                [v.strip() for v in record["address"].split(",", maxsplit=2)]))
        org_ids = [record["id"]]
        orgtypes = [
            self.orgtype_cache["community-amateur-sports-club"],
            self.orgtype_cache["sports-club"],
        ]
        if record["id"] in self.coynos:
            org_ids.append(self.coynos[record["id"]])
            orgtypes.append(self.orgtype_cache["registered-company"])

        self.records.append(
            Organisation(
                **{
                    "org_id": record["id"],
                    "name": record["name"],
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": address.get(0),
                    "addressLocality": address.get(1),
                    "addressRegion": address.get(2),
                    "addressCountry": None,
                    "postalCode": self.parse_postcode(record["postcode"]),
                    "telephone": None,
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": [o.slug for o in orgtypes],
                    "organisationTypePrimary": orgtypes[0],
                    "url": None,
                    "location": [],
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": None,
                    "dateRemoved": None,
                    "active": True,
                    "parent": None,
                    "orgIDs": org_ids,
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }))
Пример #4
0
    def parse_file(self, response, source_url):
        self.set_download_url(source_url)

        for row in response.html.find("table#heps-table tbody tr"):
            cells = [c.text for c in row.find("td")]

            orgids = [
                "-".join([self.org_id_prefix,
                          str(cells[1])]),
                "-".join(["GB-UKPRN", str(cells[0])]),
            ]

            org_types = [
                self.orgtype_cache["higher-education"],
                self.add_org_type(
                    self.hesa_org_types.get(cells[4].strip(),
                                            cells[4].strip())),
            ]

            self.records.append(
                Organisation(
                    **{
                        "org_id": "-".join([self.org_id_prefix,
                                            str(cells[1])]),
                        "name": cells[2].strip(),
                        "charityNumber": None,
                        "companyNumber": None,
                        "streetAddress": None,
                        "addressLocality": None,
                        "addressRegion": None,
                        "addressCountry": None,
                        "postalCode": None,
                        "telephone": None,
                        "alternateName": [],
                        "email": None,
                        "description": None,
                        "organisationType": [o.slug for o in org_types],
                        "organisationTypePrimary": org_types[0],
                        "url": None,
                        "location": [],
                        "latestIncome": None,
                        "dateModified": datetime.datetime.now(),
                        "dateRegistered": None,
                        "dateRemoved": None,
                        "active": True,
                        "parent": None,
                        "orgIDs": orgids,
                        "scrape": self.scrape,
                        "source": self.source,
                        "spider": self.name,
                        "org_id_scheme": self.orgid_scheme,
                    }))
    def parse_row(self, record):

        if record.get(self.id_field) == "UNKNOWN":
            return

        record = self.clean_fields(record)

        address = ", ".join([
            record.get("Address Line {}".format(f)) for f in [1, 2, 3]
            if record.get("Address Line {}".format(f))
        ])
        org_types = [
            self.orgtype_cache["education"],
            self.add_org_type(record.get("Management")),
            self.add_org_type(record.get("Type", "") + " School"),
        ]

        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("Institution Name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": address,
                    "addressLocality": record.get("Town"),
                    "addressRegion": record.get("Count"),
                    "addressCountry": "Northern Ireland",
                    "postalCode": self.parse_postcode(record.get("Postcode")),
                    "telephone": record.get("Telephone"),
                    "alternateName": [],
                    "email": record.get("Email"),
                    "description": None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary": org_types[2],
                    "url": None,
                    "location": [],
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": None,
                    "dateRemoved": record.get("Date Closed"),
                    "active": record.get("Status") == "Open",
                    "parent": None,
                    "orgIDs": [self.get_org_id(record)],
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }))
Пример #6
0
 def test_fullorganisation_sortname(self):
     names = (
         ("the charity the name", "charity the name"),
         ("charity the name", "charity the name"),
         (" the charity the name", "charity the name"),
         ("the  charity the name", "charity the name"),
         ("' charity the name'", "charity the name"),
         ("'  charity the name'", "charity the name"),
         (" charity the name", "charity the name"),
         ("(2charity the name", "2charity the name"),
         ("Above Us Only Sky?", "above us only sky"),
     )
     for n1, n2 in names:
         o = Organisation(name=n1)
         d = FullOrganisation()
         assert d.prepare_sortname(o) == n2
Пример #7
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        address4 = ", ".join([
            record.get("Address {}".format(f)) for f in [3, 4]
            if record.get("Address {}".format(f))
        ])
        org_types = self.get_org_types(record)

        if not record.get("School Name") or not record.get("School Number"):
            return

        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("School Name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": record.get("Address 1"),
                    "addressLocality": record.get("Address 2"),
                    "addressRegion": address4,
                    "addressCountry": "Wales",
                    "postalCode": self.parse_postcode(record.get("Postcode")),
                    "telephone": record.get("Phone Number"),
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary": org_types[0],
                    "url": None,
                    "location": self.get_locations(record),
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": None,
                    "dateRemoved": None,
                    "active": True,
                    "parent": None,
                    "orgIDs": [self.get_org_id(record)],
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }))
Пример #8
0
def do_extend_query(ids, properties):
    result = {"meta": [], "rows": {}}
    all_fields = {f["id"]: f for f in Organisation.get_fields_as_properties()}
    fields = [p["id"] for p in properties if p["id"] in all_fields.keys()]
    result["meta"] = [all_fields[f] for f in fields]
    for r in Organisation.objects.filter(org_id__in=ids).values(
            "org_id", *fields):
        result["rows"][r["org_id"]] = {
            k: v
            for k, v in r.items() if k in fields
        }

    # add in rows for any data that is missing
    for i in ids:
        if i not in result["rows"]:
            result["rows"][i] = {k: None for k in fields}

    return result
    def parse_row(self, record):

        record = self.clean_fields(record)

        org_types = [
            self.orgtype_cache["education"],
            self.add_org_type(
                self.depluralise(record.get("EstablishmentTypeGroup (name)"))),
            self.add_org_type(
                self.depluralise(record.get("TypeOfEstablishment (name)"))),
        ]

        self.records.append(
            Organisation(
                org_id=self.get_org_id(record),
                name=record.get("EstablishmentName"),
                charityNumber=None,
                companyNumber=None,
                streetAddress=record.get("Street"),
                addressLocality=record.get("Locality"),
                addressRegion=record.get("Address3"),
                addressCountry=record.get("Country (name)"),
                postalCode=self.parse_postcode(record.get("Postcode")),
                telephone=record.get("TelephoneNum"),
                alternateName=[],
                email=None,
                description=None,
                organisationType=[o.slug for o in org_types],
                organisationTypePrimary=org_types[1],
                url=self.parse_url(record.get("SchoolWebsite")),
                location=self.get_locations(record),
                latestIncome=None,
                dateModified=datetime.datetime.now(),
                dateRegistered=record.get("OpenDate"),
                dateRemoved=record.get("CloseDate"),
                active=record.get("EstablishmentStatus (name)") != "Closed",
                parent=record.get("PropsName"),
                orgIDs=self.get_org_ids(record),
                scrape=self.scrape,
                source=self.source,
                spider=self.name,
                org_id_scheme=self.orgid_scheme,
            ))
Пример #10
0
 def test_organisation_url(self):
     urls = (
         # original url, cleanUrl, displayUrl
         ("university.ac.uk", "http://university.ac.uk", "university.ac.uk"
          ),
         ("http://www.charity.org.uk/", "http://www.charity.org.uk/",
          "charity.org.uk"),
         ("https://www.charity.org.uk/", "https://www.charity.org.uk/",
          "charity.org.uk"),
         ("https://charity.org.uk/", "https://charity.org.uk/",
          "charity.org.uk"),
         ("//charity.org.uk/", "//charity.org.uk/", "charity.org.uk"),
         ("https://www.charity.org.uk/www.html",
          "https://www.charity.org.uk/www.html", "charity.org.uk/www.html"),
         ("www.charity.org.uk/www.html",
          "http://www.charity.org.uk/www.html", "charity.org.uk/www.html"),
     )
     for u in urls:
         o = Organisation(url=u[0])
         assert o.cleanUrl == u[1]
         assert o.displayUrl == u[2]
Пример #11
0
    def parse_row(self, record):

        record = self.clean_fields(record)
        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": None,
                    "addressLocality": None,
                    "addressRegion": None,
                    "addressCountry": None,
                    "postalCode": None,
                    "telephone": None,
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": list(self.orgtype_cache.keys()),
                    "organisationTypePrimary": self.orgtype_cache[
                        "government-organisation"
                    ],
                    "url": record.get("website"),
                    "location": [],
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": record.get("start-date"),
                    "dateRemoved": record.get("end-date"),
                    "active": record.get("end-date") is None,
                    "parent": None,
                    "orgIDs": [self.get_org_id(record)],
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }
            )
        )
Пример #12
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        if "Charity_number" in record:
            if not hasattr(self, "extra_names"):
                self.extra_names = defaultdict(list)
            self.extra_names[record["Charity_number"]].append(
                record["Other_names"])
            return

        address, postcode = self.split_address(record.get(
            "Public address", ""))

        org_types = [
            self.orgtype_cache["registered-charity"],
            self.orgtype_cache["registered-charity-northern-ireland"],
        ]

        org_ids = [self.get_org_id(record)]
        coyno = self.parse_company_number(record.get("Company number"))
        if coyno:
            company_type = self.add_org_type("Registered Company")
            org_types.append(company_type)
            org_ids.append("GB-COH-{}".format(coyno))

        self.raw_records.append({
            **record,
            "address":
            ", ".join([a for a in address if a]),
            "postcode":
            postcode,
        })

        self.add_org_record(
            Organisation(
                **{
                    "org_id":
                    self.get_org_id(record),
                    "name":
                    record.get("Charity name").replace("`", "'"),
                    "charityNumber":
                    "NIC{}".format(record.get(self.id_field)),
                    "companyNumber":
                    coyno,
                    "streetAddress":
                    address[0],
                    "addressLocality":
                    address[1],
                    "addressRegion":
                    address[2],
                    "addressCountry":
                    "Northern Ireland",
                    "postalCode":
                    postcode,
                    "telephone":
                    record.get("Telephone"),
                    "alternateName":
                    self.extra_names.get(record.get(self.id_field), []),
                    "email":
                    record.get("Email"),
                    "description":
                    None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary":
                    self.orgtype_cache["registered-charity"],
                    "url":
                    self.parse_url(record.get("Website")),
                    "location": [],
                    "latestIncome":
                    int(record["Total income"]) if record.get("Total income"
                                                              ) else None,
                    "dateModified":
                    datetime.datetime.now(),
                    "dateRegistered":
                    record.get("Date registered"),
                    "dateRemoved":
                    None,
                    "active":
                    record.get("Status") != "Removed",
                    "parent":
                    None,
                    "orgIDs":
                    org_ids,
                    "scrape":
                    self.scrape,
                    "source":
                    self.source,
                    "spider":
                    self.name,
                    "org_id_scheme":
                    self.orgid_scheme,
                }))
Пример #13
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        address, _ = self.split_address(record.get(
            "Principal Office/Trustees Address", ""),
                                        get_postcode=False)

        org_types = [
            self.orgtype_cache["registered-charity"],
            self.orgtype_cache["registered-charity-scotland"],
        ]
        if record.get("Regulatory Type") != "Standard" and record.get(
                "Regulatory Type"):
            org_types.append(self.add_org_type(record.get("Regulatory Type")))
        if record.get("Designated religious body") == "Yes":
            org_types.append(self.add_org_type("Designated religious body"))

        if (record.get("Constitutional Form") ==
                "SCIO (Scottish Charitable Incorporated Organisation)"):
            org_types.append(
                self.add_org_type(
                    "Scottish Charitable Incorporated Organisation"))
        elif (record.get("Constitutional Form") ==
              "CIO (Charitable Incorporated Organisation, E&W)"):
            org_types.append(
                self.add_org_type("Charitable Incorporated Organisation"))
        elif (record.get("Constitutional Form") ==
              "Company (the charity is registered with Companies House)"):
            org_types.append(self.add_org_type("Registered Company"))
            org_types.append(self.add_org_type("Incorporated Charity"))
        elif (record.get(
                "Constitutional Form"
        ) == "Trust (founding document is a deed of trust) (other than educational endowment)"
              ):
            org_types.append(self.add_org_type("Trust"))
        elif record.get("Constitutional Form") != "Other" and record.get(
                "Constitutional Form"):
            org_types.append(
                self.add_org_type(record.get("Constitutional Form")))

        org_ids = [self.get_org_id(record)]

        self.raw_records.append(record)

        self.records.append(
            Organisation(
                **{
                    "org_id":
                    self.get_org_id(record),
                    "name":
                    record.get("Charity Name"),
                    "charityNumber":
                    record.get(self.id_field),
                    "companyNumber":
                    None,
                    "streetAddress":
                    address[0],
                    "addressLocality":
                    address[1],
                    "addressRegion":
                    address[2],
                    "addressCountry":
                    "Scotland",
                    "postalCode":
                    self.parse_postcode(record.get("Postcode")),
                    "telephone":
                    None,
                    "alternateName":
                    [record.get("Known As")] if record.get("Known As") else [],
                    "email":
                    None,
                    "description":
                    record.get("Objectives"),
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary":
                    org_types[0],
                    "url":
                    self.parse_url(record.get("Website")),
                    "location": [],
                    "latestIncome":
                    int(record["Most recent year income"]
                        ) if record.get("Most recent year income") else None,
                    "dateModified":
                    datetime.datetime.now(),
                    "dateRegistered":
                    record.get("Registered Date"),
                    "dateRemoved":
                    record.get("Ceased Date"),
                    "active":
                    record.get("Charity Status") != "Removed",
                    "parent":
                    record.get(
                        "Parent Charity Name"
                    ),  # @TODO: More sophisticated getting of parent charities here
                    "orgIDs":
                    org_ids,
                    "scrape":
                    self.scrape,
                    "source":
                    self.source,
                    "spider":
                    self.name,
                    "org_id_scheme":
                    self.orgid_scheme,
                }))
    def parse_row(self, row):
        row = {k.strip().replace(".", "_"): row[k] for k in row}
        row = self.clean_fields(row)

        if row.get("CompanyCategory") in self.clg_types:
            row["CompanyCategory"] = "Company Limited by Guarantee"

        previous_names = {}
        sic_codes = []
        record = {}
        for k in row:
            if k.startswith("PreviousName_"):
                pn = k.split("_")
                if row[k] and row[k] != "":
                    if pn[1] not in previous_names:
                        previous_names[pn[1]] = {}

                    if pn[2] == "CONDATE":
                        previous_names[pn[1]][
                            pn[2]] = datetime.datetime.strptime(
                                row[k], "%d/%m/%Y").date()
                        previous_names[pn[1]]["nameno"] = pn[1]
                    else:
                        previous_names[pn[1]][pn[2]] = row[k]

            elif k.startswith("SICCode_"):
                if row[k] and row[k].replace("None Supplied", "") != "":
                    sic_code = row[k].split(" - ", maxsplit=1)
                    sic_codes.append({
                        "code": sic_code[0].strip(),
                        "name": sic_code[1].strip()
                    })
            else:
                record[k] = row[k]

        record["previous_names"] = list(previous_names.values())
        record["sic_codes"] = sic_codes

        address1 = []
        for f in [
                "RegAddress_CareOf",
                "RegAddress_POBox",
                "RegAddress_AddressLine1",
                "RegAddress_AddressLine2",
        ]:
            if record.get(f):
                address1.append(record.get(f))

        orgtypes = [
            self.orgtype_cache["registered-company"],
            self.add_org_type(record.get("CompanyCategory")),
        ]

        self.add_org_record(
            Organisation(
                **{
                    "org_id":
                    self.get_org_id(record),
                    "name":
                    record.get("CompanyName"),
                    "charityNumber":
                    None,
                    "companyNumber":
                    record.get(self.id_field),
                    "streetAddress":
                    ", ".join(address1),
                    "addressLocality":
                    record.get("RegAddress_PostTown"),
                    "addressRegion":
                    record.get("RegAddress_County"),
                    "addressCountry":
                    record.get("RegAddress_Country"),
                    "postalCode":
                    record.get("RegAddress_PostCode"),
                    "telephone":
                    None,
                    "alternateName":
                    record["previous_names"],
                    "email":
                    None,
                    "description":
                    None,
                    "organisationType": [o.slug for o in orgtypes],
                    "organisationTypePrimary":
                    self.add_org_type(record.get("CompanyCategory")),
                    "url":
                    None,
                    "location": [],
                    "latestIncome":
                    None,
                    "dateModified":
                    datetime.datetime.now(),
                    "dateRegistered":
                    record.get("IncorporationDate"),
                    "dateRemoved":
                    record.get("DissolutionDate"),
                    "active": (record.get("CompanyStatus") not in
                               ["Dissolved", "Inactive", "Converted / Closed"]
                               and not record.get("DissolutionDate")),
                    "parent":
                    None,
                    "orgIDs": [self.get_org_id(record)],
                    "scrape":
                    self.scrape,
                    "source":
                    self.source,
                    "spider":
                    self.name,
                    "org_id_scheme":
                    self.orgid_scheme,
                }))
    def parse_row(self, record):

        if not hasattr(self, "org_ids_seen"):
            self.org_ids_seen = []

        record = self.clean_fields(record)

        if self.get_org_id(record) in self.org_ids_seen:
            return
        self.org_ids_seen.append(self.get_org_id(record))

        if not record.get("Society Name"):
            return

        add = record["Society Address"]
        postcode = None
        if isinstance(add, str):
            add = add.strip().split(" ")
            if len(add) > 2:
                if self.postcode_regex.match(" ".join(add[-2:])):
                    postcode = " ".join(add[-2:]).upper()
                    record["Society Address"] = record[
                        "Society Address"].replace(postcode, "")
        address, _ = self.split_address(record["Society Address"],
                                        get_postcode=False)
        address = dict(enumerate(address))
        org_ids = [self.get_org_id(record)]
        orgtypes = [
            self.orgtype_cache["mutual"],
            self.add_org_type(record.get("Registered As")),
        ]
        description = ""
        if record.get("Registration Act"):
            description = "Registered under {}".format(
                record.get("Registration Act"))

        # add org ids for companies
        if record.get("Registered As") in [
                "Community Benefit Society",
                "Co-operative Society",
        ]:
            org_ids.append("GB-COH-RS{}".format(
                record["Full Registation Number"].zfill(6)))

        org_record = {
            "org_id": self.get_org_id(record),
            "name": record.get("Society Name"),
            "charityNumber": None,
            "companyNumber": None,
            "streetAddress": address.get(0),
            "addressLocality": address.get(1),
            "addressRegion": address.get(2),
            "addressCountry": None,
            "postalCode": postcode,
            "telephone": None,
            "alternateName": [],
            "email": None,
            "description": description,
            "organisationType": [o.slug for o in orgtypes],
            "organisationTypePrimary": orgtypes[1],
            "url": None,
            "location": [],
            "latestIncome": None,
            "dateModified": datetime.datetime.now(),
            "dateRegistered": record.get("Registration Date"),
            "dateRemoved": record.get("Deregistration Date"),
            "active": record.get("Society Status", "") != "Deregistered",
            "parent": None,
            "orgIDs": org_ids,
            "scrape": self.scrape,
            "source": self.source,
            "spider": self.name,
            "org_id_scheme": self.orgid_scheme,
        }
        self.records.append(Organisation(**org_record))
Пример #16
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        address = []
        for a in ["line_1", "line_2", "line_3"]:
            an = record.get("addresses", [{}])[0].get(a)
            if an and an != "":
                address.append(an)

        postcode = record.get("addresses", [{}])[0].get("postcode")
        if not postcode or postcode == "":
            postcode = None

        url = record.get("links")[0] if record.get("links") else None

        parent = None
        for r in record.get("relationships", [{}]):
            if r.get("type") == "Parent":
                parent = self.org_id_prefix + "-" + r.get("id")

        orgtype = record.get("types", [])[0] if record.get("types",
                                                           []) else "Education"
        orgtype = self.add_org_type(orgtype)

        self.records.append(
            Organisation(
                **{
                    "org_id":
                    self.get_org_id(record),
                    "name":
                    record.get("name"),
                    "charityNumber":
                    None,
                    "companyNumber":
                    None,
                    "streetAddress":
                    ", ".join(address),
                    "addressLocality":
                    record.get("addresses", [{}])[0].get("city"),
                    "addressRegion":
                    record.get("addresses", [{}])[0].get("state"),
                    "addressCountry":
                    record.get("addresses", [{}])[0].get("country"),
                    "postalCode":
                    postcode,
                    "telephone":
                    None,
                    "alternateName":
                    record.get("aliases", []) + record.get("acronyms", []),
                    "email":
                    record.get("email_address"),
                    "description":
                    None,
                    "organisationType": [orgtype.slug],
                    "organisationTypePrimary":
                    orgtype,
                    "url":
                    url,
                    "location": [],
                    "latestIncome":
                    None,
                    "dateModified":
                    datetime.datetime.now(),
                    "dateRegistered":
                    None,
                    "dateRemoved":
                    None,
                    "active":
                    record.get("status") == "active",
                    "parent":
                    parent,
                    "orgIDs": [self.get_org_id(record)] +
                    self.get_org_ids(record.get("external_ids", {})),
                    "scrape":
                    self.scrape,
                    "source":
                    self.source,
                    "spider":
                    self.name,
                    "org_id_scheme":
                    self.orgid_scheme,
                }))
Пример #17
0
    def parse_row(self, record):

        record = self.clean_fields(record)
        org_ids = [self.get_org_id(record)]

        locations = []
        # @TODO: map local authority code to GSS to add locations

        self.records.append(
            Organisation(
                **{
                    "org_id":
                    self.get_org_id(record),
                    "name":
                    record.get("official-name"),
                    "charityNumber":
                    None,
                    "companyNumber":
                    None,
                    "streetAddress":
                    None,
                    "addressLocality":
                    None,
                    "addressRegion":
                    None,
                    "addressCountry":
                    "Wales",
                    "postalCode":
                    None,
                    "telephone":
                    None,
                    "alternateName": [],
                    "email":
                    None,
                    "description":
                    None,
                    "organisationType":
                    list(self.orgtype_cache.keys()),
                    "organisationTypePrimary":
                    self.orgtype_cache["local-authority"],
                    "url":
                    None,
                    "location":
                    locations,
                    "latestIncome":
                    None,
                    "dateModified":
                    datetime.datetime.now(),
                    "dateRegistered":
                    record.get("start-date"),
                    "dateRemoved":
                    record.get("end-date"),
                    "active":
                    record.get("end-date") is None,
                    "parent":
                    None,
                    "orgIDs":
                    org_ids,
                    "scrape":
                    self.scrape,
                    "source":
                    self.source,
                    "spider":
                    self.name,
                    "org_id_scheme":
                    self.orgid_scheme,
                }))
Пример #18
0
    def parse_row(self, record):

        record = self.clean_fields(record)

        org_types = [
            self.add_org_type("Registered Provider of Social Housing"),
        ]
        if record.get("Corporate Form"):
            if record["Corporate Form"] == "Company":
                org_types.append(self.add_org_type("Registered Company"))
                org_types.append(
                    self.add_org_type("{} {}".format(
                        record["Designation"], record["Corporate Form"])))
            elif record[
                    "Corporate Form"] == "CIO-Charitable Incorporated Organisation":
                org_types.append(
                    self.add_org_type("Charitable Incorporated Organisation"))
                org_types.append(self.add_org_type("Registered Charity"))
            elif record["Corporate Form"] == "Charitable Company":
                org_types.append(self.add_org_type("Registered Company"))
                org_types.append(self.add_org_type("Incorporated Charity"))
                org_types.append(self.add_org_type("Registered Charity"))
            elif record["Corporate Form"] == "Unincorporated Charity":
                org_types.append(self.add_org_type("Registered Charity"))
            else:
                org_types.append(self.add_org_type(record["Corporate Form"]))
        elif record.get("Designation"):
            org_types.append(self.add_org_type(record["Designation"]))

        org_ids = [self.get_org_id(record)]
        locations = []
        if record.get("Designation") == "Local Authority":
            la_codes = LA_LOOKUP.get(record.get(self.id_field))
            if la_codes:
                org_ids.append("GB-LAE-{}".format(
                    la_codes.get("register-code")))
                locations.append({
                    "id":
                    la_codes.get("GSS"),
                    "name":
                    la_codes.get("name"),
                    "geoCode":
                    la_codes.get("GSS"),
                    "geoCodeType":
                    AREA_TYPES.get(
                        la_codes.get("GSS")[0:3], "Local Authority"),
                })

        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("Organisation Name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": None,
                    "addressLocality": None,
                    "addressRegion": None,
                    "addressCountry": "England",
                    "postalCode": None,
                    "telephone": None,
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary": org_types[0],
                    "url": None,
                    "location": locations,
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": record.get("Registration Date"),
                    "dateRemoved": None,
                    "active": True,
                    "parent": None,
                    "orgIDs": org_ids,
                    "scrape": self.scrape,
                    "source": self.source,
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }))
Пример #19
0
    def parse_row(self, record, org_type=None):

        record = self.clean_fields(record)

        org_types = [
            self.orgtype_cache["health"],
            self.orgtype_cache["nhs"],
        ]
        if org_type:
            o = self.add_org_type(org_type)
            org_types.append(o)

        address = {
            "streetAddress": record.get("Address Line 1"),
            "addressLocality": record.get("Address Line 3"),
            "addressRegion": record.get("Address Line 5"),
            "addressCountry": None,
        }
        if record.get("Address Line 2"):
            if address["streetAddress"]:
                address["streetAddress"] += ", {}".format(record.get("Address Line 2"))
            else:
                address["streetAddress"] = record.get("Address Line 2")
        if record.get("Address Line 4"):
            if address["addressLocality"]:
                address["addressLocality"] += ", {}".format(
                    record.get("Address Line 4")
                )
            else:
                address["addressLocality"] = record.get("Address Line 4")

        self.records.append(
            Organisation(
                **{
                    "org_id": self.get_org_id(record),
                    "name": record.get("Name"),
                    "charityNumber": None,
                    "companyNumber": None,
                    "streetAddress": address["streetAddress"],
                    "addressLocality": address["addressLocality"],
                    "addressRegion": address["addressRegion"],
                    "addressCountry": address["addressCountry"],
                    "postalCode": record.get("Postcode"),
                    "telephone": record.get("Contact Telephone Number"),
                    "alternateName": [],
                    "email": None,
                    "description": None,
                    "organisationType": [o.slug for o in org_types],
                    "organisationTypePrimary": org_types[0],
                    "url": None,
                    "location": [],
                    "latestIncome": None,
                    "dateModified": datetime.datetime.now(),
                    "dateRegistered": record.get("Open Date"),
                    "dateRemoved": record.get("Close Date"),
                    "active": record.get("Close Date") is None,
                    "parent": record.get("Parent Organisation Code"),
                    "orgIDs": [self.get_org_id(record)],
                    "scrape": self.scrape,
                    "source": self.sources[org_type],
                    "spider": self.name,
                    "org_id_scheme": self.orgid_scheme,
                }
            )
        )