Python EntityEmitter示例，ftmstore.memorious.EntityEmitter Python示例

示例#1

0

显示文件

文件： us_bis_denied.py 项目： givve/opensanctions-1

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        with open(res.file_path, "r") as csvfile:
            for row in csv.DictReader(csvfile, delimiter="\t"):
                parse_row(emitter, row)
    emitter.finalize()

示例#2

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    entity = emitter.make("LegalEntity")

    name = data.get("SUPP_NAME")
    ent_id = data.get("SUPP_ID")
    city = data.get("SUPP_CITY")
    address = data.get("SUPP_ADDR")
    start_date = clean_date(data.get("DEBAR_FROM_DATE"))

    entity.make_id("WBDEBAR", name, ent_id)
    names = clean_name(name)
    entity.add("name", names[0])
    entity.add("address", address)
    entity.add("address", city)
    entity.add("country", data.get("COUNTRY_NAME"))
    for name in names[1:]:
        entity.add("alias", name)

    sanction = emitter.make("Sanction")
    sanction.make_id("Sanction", entity.id)
    sanction.add("authority", "World Bank Debarrment")
    sanction.add("program", data.get("DEBAR_REASON"))
    sanction.add("startDate", start_date)
    sanction.add("endDate", clean_date(data.get("DEBAR_TO_DATE")))
    sanction.add("sourceUrl", SOURCE)
    emitter.emit(entity)
    emitter.emit(sanction)

    emitter.finalize()

示例#3

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    entity = emitter.make('LegalEntity')

    name = data.get('SUPP_NAME')
    ent_id = data.get('SUPP_ID')
    reason = data.get('DEBAR_REASON')
    country = data.get('COUNTRY_NAME')
    city = data.get('SUPP_CITY')
    address = data.get('SUPP_ADDR')
    start_date = data.get('DEBAR_FROM_DATE')
    end_date = data.get('DEBAR_TO_DATE')

    entity.make_id(name, ent_id, country)
    names = clean_name(name)
    entity.add('name', names[0])
    entity.add('address', address)
    entity.add('address', city)
    entity.add('country', normalize_country(country))
    for name in names[1:]:
        entity.add('alias', name)

    sanction = emitter.make('Sanction')
    sanction.make_id('Sanction', entity.id)
    sanction.add('authority', 'World Bank Debarrment')
    sanction.add('program', reason)
    sanction.add('startDate', clean_date(start_date))
    sanction.add('endDate', clean_date(end_date))
    sanction.add('sourceUrl', SOURCE)
    emitter.emit(entity)
    emitter.emit(sanction)

    emitter.finalize()

示例#4

0

显示文件

文件： au_dfat_sanctions.py 项目： givve/opensanctions-1

def parse(context, data):
    emitter = EntityEmitter(context)
    references = defaultdict(list)
    with context.http.rehash(data) as res:
        xls = xlrd.open_workbook(res.file_path)
        ws = xls.sheet_by_index(0)
        headers = [slugify(h, sep="_") for h in ws.row_values(0)]
        for r in range(1, ws.nrows):
            row = ws.row(r)
            row = dict(zip(headers, row))
            for header, cell in row.items():
                if cell.ctype == 2:
                    row[header] = str(int(cell.value))
                elif cell.ctype == 3:
                    date = xldate_as_datetime(cell.value, xls.datemode)
                    row[header] = date.isoformat()
                elif cell.ctype == 0:
                    row[header] = None
                row[header] = cell.value

            reference = clean_reference(row.get("reference"))
            references[reference].append(row)

    for ref, rows in references.items():
        parse_reference(emitter, ref, rows)
    emitter.finalize()

示例#5

0

显示文件

def fsf_parse(context, data):
    emitter = EntityEmitter(context)

    with context.http.rehash(data) as res:
        for entry in res.xml.findall(".//default:sanctionEntity", NS):
            parse_entry(emitter, entry)
    emitter.finalize()

示例#6

0

显示文件

文件： kg_fiu_national.py 项目： givve/opensanctions-1

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for person in res.xml.findall(".//KyrgyzPhysicPerson"):
            parse_person(emitter, person)
        for legal in res.xml.findall(".//KyrgyzLegalPerson"):
            parse_legal(emitter, legal)
    emitter.finalize()

示例#7

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for node in res.xml.findall('.//INDIVIDUAL'):
            parse_individual(emitter, node)

        for node in res.xml.findall('.//ENTITY'):
            parse_entity(emitter, node)
    emitter.finalize()

示例#8

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for row in extract_rows(res.xml):
            if len(row) == 5:
                parse_organisation(emitter, row)
            if len(row) == 9:
                parse_individual(emitter, row)
    emitter.finalize()

示例#9

0

显示文件

文件： us_ofac.py 项目： x0rzkov/opensanctions

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        doc = res.xml
        for distinct_party in doc.findall(qpath('DistinctParty')):
            parse_party(emitter, doc, distinct_party)

        for entry in doc.findall(qpath('SanctionsEntry')):
            parse_entry(emitter, doc, entry)

        for relation in doc.findall(qpath('ProfileRelationship')):
            parse_relation(emitter, doc, relation)

    emitter.finalize()

示例#10

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    groups = defaultdict(list)
    with context.http.rehash(data) as res:
        with open(res.file_path, 'r', encoding='iso-8859-1') as csvfile:
            # ignore first line
            next(csvfile)
            for row in csv.DictReader(csvfile):
                group = row.pop('Group ID')
                if group is not None:
                    groups[group].append(row)

    for group, rows in groups.items():
        parse_entry(emitter, group, rows)

    emitter.finalize()

示例#11

0

显示文件

def seco_parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        updated_at = res.xml.getroot().get("date")

        programs = {}
        for sanc in res.xml.findall(".//sanctions-program"):
            ssid = sanc.find("./sanctions-set").get("ssid")
            programs[ssid] = sanc.findtext('./program-name[@lang="eng"]')

        places = {}
        for place in res.xml.findall(".//place"):
            places[place.get("ssid")] = parse_address(place)

        for target in res.xml.findall("./target"):
            parse_entry(emitter, target, programs, places, updated_at)
    emitter.finalize()

示例#12

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    country = data.get('country', {}).get('code')
    with context.http.rehash(data) as res:
        persons = {}
        for person in res.json.get('persons', []):
            ep_id, ftm_id = parse_person(emitter, person, country)
            persons[ep_id] = ftm_id

        organizations = {}
        for organization in res.json.get('organizations', []):
            ep_id, ftm_id = parse_organization(emitter, organization, country)
            organizations[ep_id] = ftm_id

        for membership in res.json.get('memberships', []):
            parse_membership(emitter, membership, persons, organizations)
    emitter.finalize()

示例#13

0

显示文件

def parse_reference(emitter: EntityEmitter, row: dict):
    first_name = row.pop("firstname")
    last_name = row.pop("lastname")
    birth_date = row.pop("birthdate")

    entity = emitter.make("Person")
    entity.make_id("FREEZING", "{}{}{}".format(first_name, last_name,
                                               birth_date))

    entity.add("status", NATURES.get(row.pop("nature")))
    entity.add("birthDate", birth_date)
    entity.add("birthPlace", row.pop("birthplace"))
    entity.add("name", "{} {}".format(first_name, last_name))
    entity.add("alias", row.pop("aliases"))
    entity.add("keywords", "ASSETS_FREEZING")

    emitter.emit(entity)

示例#14

0

显示文件

文件： gb_coh_disqualified.py 项目： jonesing007/opensanctions

def officer(context, data):
    emitter = EntityEmitter(context)
    officer_id = data.get("officer_id")
    url = API_URL % officer_id
    with context.http.get(url, auth=AUTH) as res:
        if res.status_code != 200:
            context.log.info("CoH error: %r", res.json)
            return
        data = res.json
        # pprint(data)
        person = emitter.make("Person")
        person.make_id(officer_id)
        source_url = urljoin(WEB_URL, data.get("links", {}).get("self", "/"))
        person.add("sourceUrl", source_url)

        last_name = data.pop("surname", None)
        person.add("lastName", last_name)
        forename = data.pop("forename", None)
        person.add("firstName", forename)
        other_forenames = data.pop("other_forenames", None)
        person.add("middleName", other_forenames)
        person.add("name", jointext(forename, other_forenames, last_name))
        person.add("title", data.pop("title", None))

        person.add("nationality", data.pop("nationality", None))
        person.add("birthDate", data.pop("date_of_birth", None))
        person.add("topics", "crime")

        for disqual in data.pop("disqualifications", []):
            case = disqual.get("case_identifier")
            sanction = emitter.make("Sanction")
            sanction.make_id(person.id, case)
            sanction.add("entity", person)
            sanction.add("authority", "UK Companies House")
            sanction.add("program", case)
            from_date = disqual.pop("disqualified_from", None)
            person.context["created_at"] = from_date
            sanction.add("startDate", from_date)
            sanction.add("endDate", disqual.pop("disqualified_until", None))
            emitter.emit(sanction)

            address = disqual.pop("address", {})
            locality = address.get("locality")
            locality = jointext(locality, address.get("postal_code"))
            street = address.get("address_line_1")
            premises = address.get("premises")
            street = jointext(street, premises)
            address = jointext(
                street,
                address.get("address_line_2"),
                locality,
                address.get("region"),
                sep=", ",
            )
            person.add("address", address)
        emitter.emit(person)

示例#15

0

显示文件

文件： interpol_red_notices.py 项目： givve/opensanctions-1

def parse_notice(context, data):
    with context.http.rehash(data) as res:
        res = res.json
        first_name = res["forename"] or ""
        last_name = res["name"] or ""
        dob = res["date_of_birth"]
        nationalities = res["nationalities"]
        place_of_birth = res["place_of_birth"]
        warrants = [
            (warrant["charge"], warrant["issuing_country_id"])
            for warrant in res["arrest_warrants"]  # noqa
        ]
        gender = SEXES.get(res["sex_id"])
        emitter = EntityEmitter(context)
        entity = emitter.make("Person")
        entity.make_id("INTERPOL", first_name, last_name, res["entity_id"])
        entity.add("name", first_name + " " + last_name)
        entity.add("firstName", first_name)
        entity.add("lastName", last_name)
        entity.add("nationality", nationalities)
        for charge, country in warrants:
            entity.add("program", country)
            entity.add("summary", charge)
        entity.add("gender", gender)
        entity.add("birthPlace", place_of_birth)
        entity.add("birthDate", parse_date(dob))
        entity.add("sourceUrl", res["_links"]["self"]["href"])
        entity.add("keywords", "REDNOTICE")
        entity.add("topics", "crime")
        emitter.emit(entity)
        emitter.finalize()

示例#16

0

显示文件

def parse_notice(context, data):
    with context.http.rehash(data) as res:
        res = res.json
        first_name = res['forename'] or ''
        last_name = res['name'] or ''
        dob = res['date_of_birth']
        nationalities = res['nationalities']
        place_of_birth = res['place_of_birth']
        warrants = [
            (warrant['charge'], warrant['issuing_country_id'])
            for warrant in res['arrest_warrants']  # noqa
        ]
        gender = SEXES.get(res['sex_id'])
        emitter = EntityEmitter(context)
        entity = emitter.make('Person')
        entity.make_id(first_name, last_name, res['entity_id'])
        entity.add('name', first_name + ' ' + last_name)
        entity.add('firstName', first_name)
        entity.add('lastName', last_name)
        entity.add('nationality', nationalities)
        for charge, country in warrants:
            entity.add('program', country)
            entity.add('summary', charge)
        entity.add('gender', gender)
        entity.add('birthPlace', place_of_birth)
        entity.add('birthDate', parse_date(dob))
        entity.add('sourceUrl', res['_links']['self']['href'])
        entity.add('keywords', 'REDNOTICE')
        entity.add('keywords', 'CRIME')
        emitter.emit(entity)
        emitter.finalize()

示例#17

0

显示文件

文件： us_cia_world_leaders.py 项目： x0rzkov/opensanctions

def parse(context, data):
    emitter = EntityEmitter(context)
    url = data.get('url')
    country = normalize_country(data.get('country'))
    with context.http.rehash(data) as res:
        doc = res.html
        # updated_at = doc.findtext('.//span[@id="lastUpdateDate"]')
        output = doc.find('.//div[@id="countryOutput"]')
        if output is None:
            return
        # component = None
        for row in output.findall('.//li'):
            # next_comp = row.findtext('./td[@class="componentName"]/strong')
            # if next_comp is not None:
            #     component = next_comp
            #     continue
            function = element_text(row.find('.//span[@class="title"]'))
            if function is None:
                continue
            name = element_text(row.find('.//span[@class="cos_name"]'))
            if name is None:
                continue

            person = emitter.make('Person')
            person.make_id(country, name, function)
            person.add('name', name)
            person.add('country', country)
            person.add('position', function)
            person.add('sourceUrl', url)
            emitter.emit(person)
    emitter.finalize()

示例#18

0

显示文件

文件： us_cia_world_leaders.py 项目： suraj8136/opensanctions

def crawl_country(context, path, country):
    emitter = EntityEmitter(context)
    source_url = UI_URL % path
    context.log.info("Crawling country: %s", country)

    res = requests.get(DATA_URL % path)
    data = res.json().get("result", {}).get("data", {}).get("page", {})
    blocks = data.get("acf", {}).get("blocks", [{}])[0]
    content = blocks.get("free_form_content", []).get("content")
    doc = html.fromstring(content)
    function = None
    for el in doc.getchildren():
        text = el.text_content().strip()
        if el.tag == "h2":
            continue
        if el.tag == "h3":
            function = text
            continue
        name = text.replace("(Acting)", "")
        person = emitter.make("Person")
        person.make_id(source_url, name, function)
        person.add("name", name)
        person.add("country", country)
        person.add("position", function)
        person.add("sourceUrl", source_url)
        person.add("topics", "role.pep")
        # pprint(person.to_dict())
        emitter.emit(person)
    emitter.finalize()

示例#19

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        doc = res.html
        url_file = doc.find(".//main//ul//li//a[@href]")
        url_file = url_file.get("href")

        data = requests.get(url_file).content
        fp = io.BytesIO(data)
        zfp = zipfile.ZipFile(fp, "r")
        info = zfp.filelist[0]
        fp = zfp.read(info)

        xls = xlrd.open_workbook(file_contents=fp)
        ws = xls.sheet_by_index(1)
        headers = [
            "firstname",
            "lastname",
            "aliases",
            "birthdate",
            "birthplace",
            "other informations",
            "nature",
        ]

        for r in range(3, ws.nrows):
            row = ws.row(r)
            row = dict(zip(headers, row))
            for header, cell in row.items():
                if cell.ctype == 2:
                    row[header] = str(int(cell.value))
                elif cell.ctype == 3:
                    date = xldate_as_datetime(cell.value, xls.datemode)
                    row[header] = date.isoformat()
                elif cell.ctype == 0:
                    row[header] = None
                else:
                    row[header] = cell.value

            parse_reference(emitter, row)

        emitter.finalize()

示例#20

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        doc = remove_namespace(res.xml)
        context.log.info("Loading reference values...")
        load_ref_values(doc)
        context.log.info("Loading locations...")
        locations = load_locations(doc)
        context.log.info("Loading ID reg documents...")
        documents = load_documents(doc)

        for distinct_party in doc.findall(".//DistinctParty"):
            parse_party(emitter, doc, distinct_party, locations, documents)

        for entry in doc.findall(".//SanctionsEntry"):
            parse_entry(emitter, doc, entry)

        for relation in doc.findall(".//ProfileRelationship"):
            parse_relation(emitter, doc, relation)

    emitter.finalize()

示例#21

0

显示文件

def store(context, data):
    emitter = EntityEmitter(context)
    entity = model.get_proxy(data["entity"])
    documentation = emitter.make("Documentation")
    documentation.make_id("Documentation", data["entity"]["id"],
                          data["aleph_id"])
    documentation.add("entity", entity)
    documentation.add("document", data["aleph_id"])
    emitter.emit(documentation)
    emitter.finalize()

示例#22

0

显示文件

文件： coe_assembly.py 项目： givve/opensanctions-1

def parse(context, data):
    emitter = EntityEmitter(context)
    seen = set()
    for letter in string.ascii_uppercase:
        url = URL % letter
        while True:
            context.log.info("URL: %s", url)
            res = context.http.get(url)
            doc = res.html
            for member in doc.findall('.//ul[@class="member-results"]/li'):
                parse_entry(emitter, member)

            seen.add(url)
            url = None
            for a in doc.findall('.//div[@id="pagination"]//a'):
                next_url = urljoin(URL, a.get("href"))
                if next_url not in seen:
                    url = next_url
            if url is None:
                break
    emitter.finalize()

示例#23

0

显示文件

def officer(context, data):
    emitter = EntityEmitter(context)
    officer_id = data.get('officer_id')
    url = API_URL % officer_id
    with context.http.get(url, auth=AUTH) as res:
        if res.status_code != 200:
            return
        data = res.json
        person = emitter.make('Person')
        person.make_id(officer_id)
        source_url = urljoin(WEB_URL, data.get('links', {}).get('self', '/'))
        person.add('sourceUrl', source_url)

        last_name = data.pop('surname', None)
        person.add('lastName', last_name)
        forename = data.pop('forename', None)
        person.add('firstName', forename)
        other_forenames = data.pop('other_forenames', None)
        person.add('middleName', other_forenames)
        person.add('name', jointext(forename, other_forenames, last_name))
        person.add('title', data.pop('title', None))

        nationality = normalize_country(data.pop('nationality', None))
        person.add('nationality', nationality)
        person.add('birthDate', data.pop('date_of_birth', None))

        for disqual in data.pop('disqualifications', []):
            case = disqual.get('case_identifier')
            sanction = emitter.make('Sanction')
            sanction.make_id(person.id, case)
            sanction.add('entity', person)
            sanction.add('authority', 'UK Companies House')
            sanction.add('program', case)
            sanction.add('startDate', disqual.pop('disqualified_from', None))
            sanction.add('endDate', disqual.pop('disqualified_until', None))
            emitter.emit(sanction)

            address = disqual.pop('address', {})
            locality = address.get('locality')
            locality = jointext(locality, address.get('postal_code'))
            street = address.get('address_line_1')
            premises = address.get('premises')
            street = jointext(street, premises)
            address = jointext(street, address.get('address_line_2'),
                               locality, address.get('region'), sep=', ')
            person.add('address', address)
        emitter.emit(person)

示例#24

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    url = data.get("url")
    country = data.get("country")
    with context.http.rehash(data) as res:
        doc = res.html
        output = doc.find('.//div[@id="countryOutput"]')
        if output is None:
            return
        # component = None
        for row in output.findall(".//li"):
            # next_comp = row.findtext('./td[@class="componentName"]/strong')
            # if next_comp is not None:
            #     component = next_comp
            #     continue
            function = element_text(row.find('.//span[@class="title"]'))
            if function is None:
                continue
            name = element_text(row.find('.//span[@class="cos_name"]'))
            if name is None:
                continue

            person = emitter.make("Person")
            person.make_id(url, country, name, function)
            person.add("name", name)
            person.add("country", country)
            person.add("position", function)
            person.add("sourceUrl", url)
            person.add("topics", "role.pep")
            updated_at = doc.findtext('.//span[@id="lastUpdateDate"]')
            updated_at = parse_updated(updated_at)
            if updated_at is not None:
                person.add("modifiedAt", updated_at)
                person.context["updated_at"] = updated_at.isoformat()
            emitter.emit(person)
    emitter.finalize()

示例#25

0

显示文件

文件： interpol_yellow_notices.py 项目： suraj8136/opensanctions

def parse_notice(context, data):
    with context.http.rehash(data) as res:
        res = res.json
        first_name = res["forename"] or ""
        last_name = res["name"] or ""
        dob = res["date_of_birth"]
        nationalities = res["nationalities"]
        place_of_birth = res["place_of_birth"]
        gender = SEXES.get(res["sex_id"])
        emitter = EntityEmitter(context)
        entity = emitter.make("Person")
        entity.make_id("INTERPOL", first_name, last_name, res["entity_id"])
        entity.add("name", first_name + " " + last_name)
        entity.add("firstName", first_name)
        entity.add("lastName", last_name)
        entity.add("nationality", nationalities)
        entity.add("gender", gender)
        entity.add("birthPlace", place_of_birth)
        entity.add("birthDate", parse_date(dob))
        entity.add("sourceUrl", res["_links"]["self"]["href"])
        entity.add("keywords", "YELLOWNOTICE")
        entity.add("topics", "researched")
        emitter.emit(entity)
        emitter.finalize()

示例#26

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    url = data.get("url")
    with context.http.rehash(data) as res:
        doc = res.html
        divs = doc.findall('.//div[@class="regular-details"]/div')
        image_link = "{}{}".format(ROOT_URL,
                                   divs[0].find(".//img").attrib["src"])

        infos = {}
        infos["phone"] = []

        for li in divs[1].findall('.//ul[@class="no-bullet"]/li'):
            children = li.getchildren()
            title = children[0]
            if len(children) > 1:
                infos[title.text.strip()[0:-1]] = []
                for child in children:
                    if child.tag == "a":
                        infos[title.text.strip()[0:-1]].append(child.text)
                    if child.tag == "ul":
                        for li_in in child.findall("./li"):
                            infos[title.text.strip()[0:-1]].append(li_in.text)
                    if child.tag == "b":
                        for item in title.xpath(
                                "following-sibling::*/text()|following-sibling::text()"
                        ):
                            item = item.strip()
                            if item:
                                infos[title.text.strip()[0:-1]].append(item)
                    if child.tag == "img":
                        infos[title.text.strip()[0:-1]].append(
                            "image: {}{}".format(ROOT_URL,
                                                 child.attrib["src"]))
            elif title.tag == "b" or title.tag == "i":
                if title.tag == "i" and not title.attrib:
                    infos["description"] = title.text
                else:
                    for item in title.xpath(
                            "following-sibling::*/text()|following-sibling::text()"
                    ):
                        item = item.strip()
                        if item:
                            if title.tag == "b":
                                infos[title.text.strip()[0:-1]] = item
                            elif title.tag == "i":
                                phone_type = "phone"
                                if title.attrib["class"] == "fa fa-fax":
                                    phone_type = "fax"
                                infos["phone"].append("{}: {}".format(
                                    phone_type, item))

        first_name = infos["Full name"].split(", ")[1]
        last_name = infos["Full name"].split(", ")[0]

        if "Languages" in infos:
            infos["Languages"] = [
                info.strip() for info in infos["Languages"].split(",")
            ]

        person = emitter.make("Person")
        person.make_id(url, first_name, last_name)
        person.add("sourceUrl", url)

        person.add("firstName", first_name)
        person.add("lastName", last_name)
        person.add("name", infos.pop("Full name"))
        person.add("description", infos.get("description"))

        street = infos.get("Street", "")
        city = infos.get("City", "")
        postal_code = infos.get("Postal code", "")
        country = infos.get("Country", "")
        person.add("address", "{} {} {} {}".format(street, city, postal_code,
                                                   country))
        person.add("email", email=infos.get("Emails"))
        person.add("country", infos.get("Represented Country"))
        person.add("phone", infos.get("phone"))

        # TODO: make political party into an entity
        # TODO: don't have any left-over JSON stuff :)
        infos["Photo"] = image_link
        person.add("notes",
                   json.dumps({key: value
                               for key, value in infos.items()}))

        emitter.emit(person)
    emitter.finalize()

示例#27

0

显示文件

def parse(context, data):
    url = data["url"]
    response = context.http.rehash(data)
    html = response.html
    emitter = EntityEmitter(context)

    person = emitter.make("Person")

    title = _get_itemprop(html, 'http://schema.org/honorificPrefix')
    firstName = _get_itemprop(html, "http://schema.org/givenName")
    familyName = _get_itemprop(html, "http://schema.org/familyName")

    if not firstName or not familyName:
        return

    context.log.info("Parsing Person '" + firstName + " " + familyName +
                     "' found at: " + url)
    birthDate = _extract_birth_date(_get_itemprop(html, "birthDate"))
    birthPlace = _get_itemprop(html, "birthPlace")
    telephone = _get_itemprop(html, "http://schema.org/telephone")
    faxNumber = _get_itemprop(html, "http://schema.org/faxNumber")
    image = _extract_img_src(html)
    email = _get_itemprop(html, "http://schema.org/email", "*")
    _extract_personal_websites(context, person, html)

    person.add("title", title)
    person.add("firstName", firstName)
    person.add("lastName", familyName)
    person.add("name", " ".join([firstName, familyName]))
    person.add("birthDate", birthDate)
    person.add("birthPlace", birthPlace)
    person.add("country", "at")

    _extract_social_media(html, person, context)
    _extract_addresses(context, html, person)
    person.add("phone", telephone)
    person.add("email", email)
    person.add("sourceUrl", url)
    person.make_id(url)

    _parse_info_table(emitter, context, person, html, make_mandates, "mandate")
    _parse_info_table(emitter, context, person, html, _make_societies,
                      "vereine")
    _parse_info_table(emitter, context, person, html,
                      _make_work_and_affiliates, "firmenfunktionen")

    party = _make_party(context, data, emitter, html)
    emitter.emit(person)

    if not party:
        emitter.finalize()
        return

    emitter.emit(party)

    membership = emitter.make("Membership")
    membership.make_id(person.id, party.id)
    membership.add("member", person.id)
    membership.add("organization", party.id)
    membership.add("sourceUrl", url)
    emitter.emit(membership)
    emitter.finalize()

示例#28

0

显示文件

文件： ua_sdfm_blacklist.py 项目： givve/opensanctions-1

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for entry in res.xml.findall(".//acount-list"):
            parse_entry(emitter, entry)
    emitter.finalize()

示例#29

0

显示文件

def eeas_parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for entry in res.xml.findall(".//ENTITY"):
            parse_entry(emitter, entry)
    emitter.finalize()

示例#30

0

显示文件

def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for node in res.xml.findall('.//mep'):
            parse_node(emitter, node)
    emitter.finalize()