def extract_convert_identifiers(sum_doc):
    has_isbn = False
    has_eissn = False
    identifiers_item = []
    identifiers_is_part_of = []
    for sum_key in summon_identifier_type_to_metajson_identifier_type:
        if sum_key in sum_doc:
            for id_value in sum_doc[sum_key]:
                id_type = summon_identifier_type_to_metajson_identifier_type[sum_key]
                if id_type == "issn":
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "eissn":
                    has_eissn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "isbn":
                    has_isbn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                else:
                    identifiers_item.append(metajson.create_identifier(id_type, id_value))
    return identifiers
def convert_mods_name_to_contributor(mods_name, dai_dict):
    if mods_name is not None:
        contributor = Contributor()
        # extract properties
        name_type = mods_name.get("type")
        name_id = mods_name.get("ID")
        name_parts = mods_name.findall(prefixtag("mods", "namePart"))
        name_affiliations = mods_name.findall(prefixtag("mods", "affiliation"))
        name_roleterm = None
        name_role = mods_name.find(prefixtag("mods", "role"))
        if name_role is not None:
            name_roleterm = name_role.find(prefixtag("mods", "roleTerm"))
        name_descriptions = mods_name.findall(prefixtag("mods", "description"))

        if name_type == "personal":
            person = Person()

            if name_id is not None and dai_dict is not None and name_id in dai_dict:
                id_value = dai_dict[name_id]["authority"] + "/" + dai_dict[name_id]["value"]
                identifier = metajson.create_identifier("uri", id_value)
                person.add_item_to_key(identifier, "identifiers")

            if name_parts:
                for name_part in name_parts:
                    if name_part.get("type") == "given":
                        person["name_given"] = name_part.text
                    elif name_part.get("type") == "family":
                        person["name_family"] = name_part.text
                    elif name_part.get("type") == "date":
                        date = name_part.text.replace("(", "").replace(")", "")
                        minus_index = date.find("-")
                        if minus_index == -1:
                            person["date_of_birth"] = date
                        else:
                            person["date_of_birth"] = date[:minus_index]
                            person["date_of_death"] = date[minus_index+1:]
                    elif name_part.get("termsOfAddress") == "date":
                        person["name_terms_of_address"] = name_part.text

            contributor["person"] = person
        #print name_type, name_id, name_parts, name_affiliations, name_roleterm, name_descriptions
        return contributor
def convert_summon_json_document_to_metajson_document(sum_doc, source):
    document = Document()

    # Extract Summon properties
    rec_id = sum_doc["ID"][0].replace("FETCH-", "")
    sum_type = sum_doc["ContentType"][0]
    rec_type = summon_document_type_to_metajson_document_type[sum_type]

    # rec_id, rec_source, rec_type
    document["rec_id"] = rec_id
    document["rec_source"] = source
    document["rec_type"] = rec_type

    # languages
    main_language = None
    if "Language" in sum_doc:
        languages = []
        for sum_lang in sum_doc["Language"]:
            lang = language_service.convert_english_to_rfc5646(sum_lang)
            if lang:
                languages.append(lang)
        if languages:
            main_language = languages[0]
            document["languages"] = languages

    # extract summon properties
    contributors = extract_contributors(sum_doc)
    copyright_statement = extract_value(sum_doc, "Copyright")
    date_issued = extract_date_issued(sum_doc)
    degree = extract_value(sum_doc, "DissertationDegree")
    descriptions = extract_convert_langstrings(sum_doc, "Abstract", main_language)
    edition = extract_value(sum_doc, "Edition")
    extent_pages = extract_value(sum_doc, "PageCount")
    genre = extract_value(sum_doc, "Genre")
    is_part_of_edition = extract_value(sum_doc, "PublicationEdition")
    is_part_of_title = extract_value(sum_doc, "PublicationTitle")
    is_part_of_title_sub = extract_value(sum_doc, "PublicationSubtitle")
    notes = extract_convert_langstrings(sum_doc, "Notes", main_language)
    part_issue = extract_value(sum_doc, "Issue")
    part_page_end = extract_value(sum_doc, "EndPage")
    part_page_start = extract_value(sum_doc, "StartPage")
    part_volume = extract_value(sum_doc, "Volume")
    peer_reviewed = extract_boolean_value(sum_doc, "IsPeerReviewed")
    publisher = extract_value(sum_doc, "Publisher")
    publisher_place = extract_value(sum_doc, "PublicationPlace")
    scholarly = extract_boolean_value(sum_doc, "IsScholarly")
    series_title = extract_value(sum_doc, "PublicationSeriesTitle")
    subject_keywords = extract_value(sum_doc, "Keywords", True)
    subject_names = convert_contributors(sum_doc, "RelatedPersons", None, "person", None)
    subject_terms = extract_value(sum_doc, "SubjectTerms", True)
    table_of_contents = extract_convert_langstrings(sum_doc, "TableOfContents", main_language)
    title = extract_value(sum_doc, "Title")
    title_sub = extract_value(sum_doc, "Subtitle")

    # identifiers
    has_isbn = False
    has_eissn = False
    identifiers_item = []
    identifiers_is_part_of = []
    for sum_key in summon_identifier_type_to_metajson_identifier_type:
        if sum_key in sum_doc:
            for id_value in sum_doc[sum_key]:
                id_type = summon_identifier_type_to_metajson_identifier_type[sum_key]
                if id_type == "issn":
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "eissn":
                    has_eissn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "isbn":
                    has_isbn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                else:
                    identifiers_item.append(metajson.create_identifier(id_type, id_value))

    # is_part_of_type determination
    is_part_of_type = None
    if sum_type in summon_document_type_to_metajson_document_is_part_of_type:
        is_part_of_type = summon_document_type_to_metajson_document_is_part_of_type[sum_type]

    elif is_part_of_title and is_part_of_title != title and rec_type not in ["Book", "Journal", "Magazine", "Newspaper", "Periodical"]:
        if has_isbn:
            is_part_of_type = "Book"
        elif has_eissn:
            is_part_of_type = "Journal"
        elif is_part_of_title.lower().find("conference") != -1:
            is_part_of_type = "Book"
        elif is_part_of_title.lower().find("review") or is_part_of_title.lower().find("journal"):
            is_part_of_type = "Journal"
        elif rec_type == "Dataset":
            is_part_of_type = "Periodical"
        else:
            print "unknown is_part_of_type for rec_type: %s" % rec_type

    # is_part_of
    if is_part_of_type:
        is_part_of = Document()
        is_part_of.set_key_if_not_none("rec_type", is_part_of_type)
        is_part_of.set_key_if_not_none("edition", is_part_of_edition)
        is_part_of.add_items_to_key(identifiers_is_part_of, "identifiers")
        is_part_of.set_key_if_not_none("peer_reviewed", peer_reviewed)
        is_part_of.set_key_if_not_none("publisher", publisher)
        is_part_of.set_key_if_not_none("publisher_place", publisher_place)
        is_part_of.set_key_if_not_none("title", is_part_of_title)
        is_part_of.set_key_if_not_none("title_sub", is_part_of_title_sub)

        document.add_items_to_key(identifiers_item, "identifiers")

        document.add_items_to_key([is_part_of], "is_part_of")
    else:
        document.set_key_if_not_none("peer_reviewed", peer_reviewed)
        document.set_key_if_not_none("publisher", publisher)
        document.set_key_if_not_none("publisher_place", publisher_place)
        document.add_items_to_key(identifiers_is_part_of, "identifiers")
        document.add_items_to_key(identifiers_item, "identifiers")

    # series
    if series_title:
        series = Document()
        series.set_key_if_not_none("title", series_title)

        document.add_items_to_key([series], "series")

    # classificiations
    extract_convert_add_classifications(sum_doc, document, "DEWEY", "ddc")
    extract_convert_add_classifications(sum_doc, document, "Discipline", "discipline")
    extract_convert_add_classifications(sum_doc, document, "NAICS", "NAICS")

    # set properties
    document.set_key_if_not_none("contributors", contributors)
    document.set_key_if_not_none("copyright_statement", copyright_statement)
    document.set_key_if_not_none("date_issued", date_issued)
    document.set_key_if_not_none("degree", degree)
    document.set_key_if_not_none("descriptions", descriptions)
    document.set_key_if_not_none("edition", edition)
    document.set_key_if_not_none("extent_pages", extent_pages)
    document.set_key_if_not_none("genre", genre)
    document.set_key_if_not_none("notes", notes)
    document.set_key_if_not_none("part_issue", part_issue)
    document.set_key_if_not_none("part_page_end", part_page_end)
    document.set_key_if_not_none("part_page_start", part_page_start)
    document.set_key_if_not_none("part_volume", part_volume)
    document.set_key_if_not_none("scholarly", scholarly)
    document.set_key_if_not_none("table_of_contents", table_of_contents)
    document.set_key_if_not_none("title", title)
    document.set_key_if_not_none("title_sub", title_sub)

    # subject
    subject = Subject()
    if subject_keywords:
        subject["keywords"] = subject_keywords
    if subject_names:
        subject["names"] = subject_names
    if subject_terms:
        subject["terms"] = subject_terms
    if subject:
        document["subjects"] = subject

    debug = True
    if debug:
        related_items_msg = "\t\t\t\t\t\t"
        if is_part_of_type:
            related_items_msg = "\tis_part_of: {} ".format(is_part_of_type)
        print "{}\t->\titem: {} {}\t:\t{}\t:\t{}".format(sum_type, rec_type, related_items_msg, rec_id, title)

    return document
def convert_mods_identifier(mods_identifier):
    if mods_identifier is not None:
        return metajson.create_identifier(mods_identifier.get("type"), mods_identifier.text)