Пример #1
0
def load_dict(meta_dict):
    if "rec_class" not in meta_dict:
        return Common(meta_dict)
    elif meta_dict["rec_class"] == "Document":
        return Document(meta_dict)
    elif meta_dict["rec_class"] == "Person":
        return Person(meta_dict)
    elif meta_dict["rec_class"] == "Orgunit":
        return Orgunit(meta_dict)
    elif meta_dict["rec_class"] == "Project":
        return Project(meta_dict)
    elif meta_dict["rec_class"] == "Event":
        return Event(meta_dict)
    elif meta_dict["rec_class"] == "Family":
        return Family(meta_dict)
    elif meta_dict["rec_class"] == "Field":
        return Field(meta_dict)
    elif meta_dict["rec_class"] == "Resource":
        return Resource(meta_dict)
    elif meta_dict["rec_class"] == "Target":
        return Target(meta_dict)
    elif meta_dict["rec_class"] == "Type":
        return Type(meta_dict)
    elif meta_dict["rec_class"] == "Collection":
        return Collection(meta_dict)
    else:
        logging.debug(jsonbson.dumps_bson(meta_dict))
        logging.warning("Unknown rec_class: {O}".format(meta_dict["rec_class"]))
        return Common(meta_dict)
Пример #2
0
def get_rml_relationships(rml):
    """ relationship -> relationships """
    result = {}
    rml_relationships = rml.findall(xmletree.prefixtag("rml", "relationship"))
    if rml_relationships is not None:
        relationships = []
        for rml_relationship in rml_relationships:
            if rml_relationship is not None:
                # name -> agent.name
                name = get_rml_element_text(rml_relationship, "name")
                relationship = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_PERSON, None)
                if relationship is None:
                    relationship = {}
                    relationship["agent"] = Person()

                # identifier -> agent.rec_id & agent.identifiers
                relationship["agent"].update(get_rml_identifiers(rml_relationship))

                # relationType -> relation_type
                relationship.update(get_rml_element_text_and_set_key(rml_relationship, "relationType", "relation_type"))

                # descriptions -> descriptions
                relationship.update(get_rml_textlangs_and_set_key(rml_relationship, "description", "descriptions"))

                if relationship is not None:
                    relationships.append(relationship)
        if relationships:
            result["relationships"] = relationships
    return result
Пример #3
0
def convert_creators(sum_doc, sum_authors_key, sum_affiliations_key, creator_type, role):
    creators = []
    if sum_authors_key in sum_doc:
        sum_authors = sum_doc[sum_authors_key]
        if sum_authors:
            affiliations_dict = {}
            if sum_affiliations_key in sum_doc:
                sum_affiliations = sum_doc[sum_affiliations_key]
                if sum_affiliations:
                    for index, affiliation in enumerate(sum_affiliations):
                        affiliations_dict[index + 1] = affiliation

            for author in sum_authors:
                creator = Creator()
                if "surname" in author and "givenname" in author:
                    person = Person()
                    person.set_key_if_not_none("name_family", author["surname"])
                    person.set_key_if_not_none("name_given", author["givenname"])
                    creator["agent"] = person
                    if role:
                        creator["roles"] = [role]
                else:
                    formatted_name = ""
                    if "fullname" in author:
                        formatted_name = author["fullname"]
                    elif "name" in author:
                        formatted_name = author["name"]

                    creator = creator_service.formatted_name_to_creator(formatted_name, creator_type, role)
                if "sequence" in author and author["sequence"] in affiliations_dict:
                    creator["affiliation"] = affiliations_dict[author["sequence"]]

                # todo : location dans le cas des DissertationSchool_xml
                if creator:
                    creators.append(creator)

    return creators
Пример #4
0
def formatted_name_to_creator(formatted_name, rec_class, role):
    #logging.debug("formatted_name_to_creator formatted_name: {}".format(formatted_name))
    if formatted_name:
        formatted_name = formatted_name.strip()
        event = None
        family = None
        orgunit = None
        person = None

        #logging.debug("formatted_name: {}".format(formatted_name))
        # rec_class determination
        if rec_class is None or rec_class not in [
                constants.REC_CLASS_EVENT, constants.REC_CLASS_FAMILY,
                constants.REC_CLASS_ORGUNIT, constants.REC_CLASS_PERSON
        ]:
            for event_term in creator_event_terms:
                if event_term in formatted_name.lower():
                    rec_class = constants.REC_CLASS_EVENT
                    break
            for orgunit_term in creator_orgunit_terms:
                if orgunit_term in formatted_name.lower():
                    rec_class = constants.REC_CLASS_ORGUNIT
                    break
            if rec_class is None:
                rec_class = constants.REC_CLASS_PERSON

        creator = Creator()
        if role:
            creator["roles"] = [role]

        if rec_class == constants.REC_CLASS_EVENT:
            event = Event()
            event["title"] = formatted_name
            creator["agent"] = event

        elif rec_class == constants.REC_CLASS_ORGUNIT:
            orgunit = Orgunit()
            orgunit["name"] = formatted_name
            creator["agent"] = orgunit

        elif rec_class == constants.REC_CLASS_PERSON or rec_class == constants.REC_CLASS_FAMILY:
            # class is "Person" or "Family"

            name_given = ""
            name_middle = ""
            name_family = ""
            name_prefix = ""
            name_terms_of_address = ""
            date_birth = ""
            date_death = ""

            parenthesis_index = formatted_name.rfind("(")
            if parenthesis_index != -1:
                #may be like: name (date_birth-date_death)
                dates_part = formatted_name[parenthesis_index + 1:-1].strip()
                date_birth = dates_part[:4]
                date_death = dates_part[5:]
                if date_death == "....":
                    date_death = ""
                formatted_name = formatted_name[:parenthesis_index].strip()

            slash_index = formatted_name.find("/")
            if slash_index != -1:
                #like: name/affiliation
                affiliation_name = formatted_name[slash_index + 1:].strip()
                formatted_name = formatted_name[:slash_index].strip()

            commaspacejrdot_index = formatted_name.rfind(", Jr.")
            if (commaspacejrdot_index != -1):
                #like "Paul B. Harvey, Jr."
                formatted_name = formatted_name[:commaspacejrdot_index].strip()
                name_middle = "Jr."

            #Is it formatted like "Family, Given" or "Given Family" ?
            comma_index = formatted_name.find(",")
            if comma_index == -1:
                space_index = formatted_name.rfind(" ")
                #logging.debug(formatted_name)
                #logging.debug(space_index)
                if space_index != -1:
                    #like Given Family
                    name_given = formatted_name[:space_index].strip()
                    name_family = formatted_name[space_index + 1:].strip()
                else:
                    #like Family
                    name_family = formatted_name.strip()

            else:
                #like Family, Given
                name_family = formatted_name[:comma_index].strip()
                name_given = formatted_name[comma_index + 1:].strip()

            # manage the terms_of_address and particule
            for term_of_address in creator_person_terms_of_address:
                if name_family and name_family.lower().startswith(
                        term_of_address + " "):
                    name_terms_of_address = name_family[:len(term_of_address)]
                    name_family = name_family[len(term_of_address):].strip()
                if name_given:
                    if name_given.lower().endswith(" " + term_of_address):
                        name_terms_of_address = name_given[-len(term_of_address
                                                                ):]
                        name_given = name_given[:-len(term_of_address)].strip()
                    if name_given.lower().startswith(term_of_address + " "):
                        name_terms_of_address = name_given[:len(term_of_address
                                                                )]
                        name_given = name_given[len(term_of_address):].strip()
                    if name_given.lower() == term_of_address:
                        name_terms_of_address = name_given
                        name_given = None

            # Be careful with a particule inside the name like: Viveiros de Castro, Eduardo
            for particule in creator_particule:
                if name_family and name_family.lower().startswith(particule +
                                                                  " "):
                    name_prefix = name_family[0:len(particule)]
                    name_family = name_family[len(particule):].strip()
                if name_given:
                    if name_given.lower().endswith(" " + particule):
                        name_prefix = name_given[-len(particule):]
                        name_given = name_given[:-len(particule)].strip()
                    if name_given.lower().startswith(particule + " "):
                        name_prefix = name_given[:len(particule)]
                        name_given = name_given[len(particule):].strip()
                    if name_given.lower() == particule:
                        name_prefix = name_given
                        name_given = None

            if rec_class == constants.REC_CLASS_PERSON:
                person = Person()
                person.set_key_if_not_none("name_family", name_family)
                person.set_key_if_not_none("name_given", name_given)
                person.set_key_if_not_none("name_middle", name_middle)
                person.set_key_if_not_none("name_terms_of_address",
                                           name_terms_of_address)
                person.set_key_if_not_none("name_prefix", name_prefix)
                person.set_key_if_not_none("date_birth", date_birth)
                person.set_key_if_not_none("date_death", date_death)

                creator["agent"] = person

                if 'affiliation_name' in vars() and affiliation_name:
                    #todo manage as an object
                    affiliation = Orgunit()
                    affiliation["name"] = affiliation_name
                    creator["affiliation"] = affiliation

            elif rec_class == constants.REC_CLASS_FAMILY:
                family = Family()
                family.set_key_if_not_none("name_family", name_family)
                creator["agent"] = family

        #logging.debug(jsonbson.dumps_json(creator, True))
        return creator
Пример #5
0
def formatted_name_to_creator(formatted_name, rec_class, role):
    #logging.debug("formatted_name_to_creator formatted_name: {}".format(formatted_name))
    if formatted_name:
        formatted_name = formatted_name.strip()
        event = None
        family = None
        orgunit = None
        person = None

        #logging.debug("formatted_name: {}".format(formatted_name))
        # rec_class determination
        if rec_class is None or rec_class not in [constants.REC_CLASS_EVENT, constants.REC_CLASS_FAMILY, constants.REC_CLASS_ORGUNIT, constants.REC_CLASS_PERSON]:
            for event_term in creator_event_terms:
                if event_term in formatted_name.lower():
                    rec_class = constants.REC_CLASS_EVENT
                    break
            for orgunit_term in creator_orgunit_terms:
                if orgunit_term in formatted_name.lower():
                    rec_class = constants.REC_CLASS_ORGUNIT
                    break
            if rec_class is None:
                rec_class = constants.REC_CLASS_PERSON

        creator = Creator()
        if role:
            creator["roles"] = [role]

        if rec_class == constants.REC_CLASS_EVENT:
            event = Event()
            event["title"] = formatted_name
            creator["agent"] = event

        elif rec_class == constants.REC_CLASS_ORGUNIT:
            orgunit = Orgunit()
            orgunit["name"] = formatted_name
            creator["agent"] = orgunit

        elif rec_class == constants.REC_CLASS_PERSON or rec_class == constants.REC_CLASS_FAMILY:
            # class is "Person" or "Family"

            name_given = ""
            name_middle = ""
            name_family = ""
            name_prefix = ""
            name_terms_of_address = ""
            date_birth = ""
            date_death = ""

            parenthesis_index = formatted_name.rfind("(")
            if parenthesis_index != -1:
                #may be like: name (date_birth-date_death)
                dates_part = formatted_name[parenthesis_index + 1:-1].strip()
                date_birth = dates_part[:4]
                date_death = dates_part[5:]
                if date_death == "....":
                    date_death = ""
                formatted_name = formatted_name[:parenthesis_index].strip()

            slash_index = formatted_name.find("/")
            if slash_index != -1:
                #like: name/affiliation
                affiliation_name = formatted_name[slash_index + 1:].strip()
                formatted_name = formatted_name[:slash_index].strip()

            commaspacejrdot_index = formatted_name.rfind(", Jr.")
            if (commaspacejrdot_index != -1):
                #like "Paul B. Harvey, Jr."
                formatted_name = formatted_name[:commaspacejrdot_index].strip()
                name_middle = "Jr."

            #Is it formatted like "Family, Given" or "Given Family" ?
            comma_index = formatted_name.find(",")
            if comma_index == -1:
                space_index = formatted_name.rfind(" ")
                #logging.debug(formatted_name)
                #logging.debug(space_index)
                if space_index != -1:
                    #like Given Family
                    name_given = formatted_name[:space_index].strip()
                    name_family = formatted_name[space_index+1:].strip()
                else:
                    #like Family
                    name_family = formatted_name.strip()

            else:
                #like Family, Given
                name_family = formatted_name[:comma_index].strip()
                name_given = formatted_name[comma_index+1:].strip()

            # manage the terms_of_address and particule
            for term_of_address in creator_person_terms_of_address:
                if name_family and name_family.lower().startswith(term_of_address+" "):
                    name_terms_of_address = name_family[:len(term_of_address)]
                    name_family = name_family[len(term_of_address):].strip()
                if name_given:
                    if name_given.lower().endswith(" "+term_of_address):
                        name_terms_of_address = name_given[-len(term_of_address):]
                        name_given = name_given[:-len(term_of_address)].strip()
                    if name_given.lower().startswith(term_of_address+" "):
                        name_terms_of_address = name_given[:len(term_of_address)]
                        name_given = name_given[len(term_of_address):].strip()
                    if name_given.lower() == term_of_address:
                        name_terms_of_address = name_given
                        name_given = None

            # Be careful with a particule inside the name like: Viveiros de Castro, Eduardo
            for particule in creator_particule:
                if name_family and name_family.lower().startswith(particule+" "):
                    name_prefix = name_family[0:len(particule)]
                    name_family = name_family[len(particule):].strip()
                if name_given:
                    if name_given.lower().endswith(" "+particule):
                        name_prefix = name_given[-len(particule):]
                        name_given = name_given[:-len(particule)].strip()
                    if name_given.lower().startswith(particule+" "):
                        name_prefix = name_given[:len(particule)]
                        name_given = name_given[len(particule):].strip()
                    if name_given.lower() == particule:
                        name_prefix = name_given
                        name_given = None

            if rec_class == constants.REC_CLASS_PERSON:
                person = Person()
                person.set_key_if_not_none("name_family", name_family)
                person.set_key_if_not_none("name_given", name_given)
                person.set_key_if_not_none("name_middle", name_middle)
                person.set_key_if_not_none("name_terms_of_address", name_terms_of_address)
                person.set_key_if_not_none("name_prefix", name_prefix)
                person.set_key_if_not_none("date_birth", date_birth)
                person.set_key_if_not_none("date_death", date_death)

                creator["agent"] = person

                if 'affiliation_name' in vars() and affiliation_name:
                    #todo manage as an object
                    affiliation = Orgunit()
                    affiliation["name"] = affiliation_name
                    creator["affiliation"] = affiliation

            elif rec_class == constants.REC_CLASS_FAMILY:
                family = Family()
                family.set_key_if_not_none("name_family", name_family)
                creator["agent"] = family

        #logging.debug(jsonbson.dumps_json(creator, True))
        return creator
Пример #6
0
def rml_person_to_metajson(rml_person, source, rec_id_prefix):
    """ person -> person """
    person = Person()

    # source
    if source:
        person["rec_source"] = source

    # academicTitle, honorificTitle -> titles
    person.update(get_rml_titles(rml_person))

    # address -> addresses
    person.update(get_rml_addresses(rml_person))

    # affiliation -> affiliations
    person.update(get_rml_affiliations(rml_person))

    # award -> awards
    person.update(get_rml_textlangs_and_set_key(rml_person, "award", "awards"))

    # biography -> biographies
    person.update(get_rml_textlangs_and_set_key(rml_person, "biography", "biographies"))

    # dateOfBirth -> date_birth
    person.update(get_rml_element_text_and_set_key(rml_person, "dateOfBirth", "date_birth"))

    # dateOfDeath -> date_death
    person.update(get_rml_element_text_and_set_key(rml_person, "dateOfDeath", "date_death"))

    # degree -> degrees
    person.update(get_rml_degrees(rml_person))

    # email -> emails
    person.update(get_rml_emails(rml_person))

    # @fictitious -> fictitious
    person.update(xmletree.get_element_attribute_as_boolean_and_set_key(rml_person, "fictitious", "fictitious"))

    # firstname -> name_given
    person.update(get_rml_element_text_and_set_key(rml_person, "firstname", "name_given"))

    # identifier -> identifiers & rec_id
    person.update(get_rml_identifiers(rml_person))

    # image -> resources[i]
    person.update(get_rml_images(rml_person, "picture"))

    # instantMessage -> instant_messages
    person.update(get_rml_instant_messages(rml_person))

    # languageCapability -> language_capabilities
    person.update(get_rml_language_capabilities(rml_person))

    # lastname -> name_family
    person.update(get_rml_element_text_and_set_key(rml_person, "lastname", "name_family"))

    # lastnamePrefix -> name_prefix
    person.update(get_rml_element_text_and_set_key(rml_person, "lastnamePrefix", "name_prefix"))

    # lastnameSuffix -> name_suffix
    person.update(get_rml_element_text_and_set_key(rml_person, "lastnameSuffix", "name_suffix"))

    # middlename -> name_middle
    person.update(get_rml_element_text_and_set_key(rml_person, "middlename", "name_middle"))

    # nationality -> nationality
    person.update(get_rml_element_text_and_set_key(rml_person, "nationality", "nationality"))

    # nickname -> name_nick
    person.update(get_rml_element_text_and_set_key(rml_person, "nickname", "name_nick"))

    # note -> notes
    person.update(get_rml_textlangs_and_set_key(rml_person, "note", "notes"))

    # olBiography -> biographies_short
    person.update(get_rml_textlangs_and_set_key(rml_person, "olBiography", "biographies_short"))

    # ongoingResearch -> ongoing_researches
    person.update(get_rml_ongoing_researches(rml_person))

    # phone -> phones
    person.update(get_rml_phones(rml_person))

    # relationship -> relationships
    person.update(get_rml_relationships(rml_person))

    # researchCoverage -> research_coverages
    person.update(get_rml_research_coverages(rml_person))

    # responsability -> responsabilities
    person.update(get_rml_textlangs_and_set_key(rml_person, "responsability", "responsabilities"))

    # sex -> gender
    person.update(get_rml_element_text_and_set_key(rml_person, "sex", "gender"))

    # skill -> skills
    person.update(get_rml_textlangs_and_set_key(rml_person, "skill", "skills"))

    # teaching -> teachings
    person.update(get_rml_teachings(rml_person))

    # uri -> urls
    person.update(get_rml_uris(rml_person))

    return person