def convert_endnote_record_to_metajson_document(record, source):
    document = Document()

    # TODO
    # translated_contributors: /contributors/translated-authors/author/style
    # auth_address: /auth-address/style
    # label: /label/style
    # custom1

    # Extract endnote properties

    rec_id = record.find("rec-number").text
    endnote_type = record.find("ref-type").text
    rec_type = endnote_record_type_to_metajson_document_type[endnote_type]

    primary_contributors = extract_contributors(None, "aut", record, "./contributors/authors/author/style")
    secondary_contributors = extract_contributors(None, "edt", record, "./contributors/secondary-authors/author/style")
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION]:
        tertiary_contributors = extract_contributors(None, "edt", record, "./contributors/tertiary-authors/author/style")
    elif endnote_type == TYPE_THESIS:
        tertiary_contributors = extract_contributors(None, "ths", record, "./contributors/tertiary-authors/author/style")
    elif endnote_type == TYPE_FILM_OR_BROADCAST:
        tertiary_contributors = extract_contributors(None, "pro", record, "./contributors/tertiary-authors/author/style")
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION]:
        subsidiary_contributors = extract_contributors(None, "trl", record, "./contributors/subsidiary-authors/author/style")
    elif endnote_type == TYPE_FILM_OR_BROADCAST:
        subsidiary_contributors = extract_contributors(None, "act", record, "./contributors/subsidiary-authors/author/style")
    translated_contributors = extract_contributors(None, "trl", record, "./contributors/translated-authors/author/style")
    auth_address = extract_text(record, "./auth-address/style")

    title = extract_text(record, "./titles/title/style")
    title_secondary = extract_text(record, "./titles/secondary-title/style")
    title_tertiary = extract_text(record, "./titles/tertiary-title/style")
    title_alternative = extract_text(record, "./titles/alt-title/style")
    title_abbreviated = extract_text(record, "./titles/short-title/style")
    title_translated = extract_text(record, "./titles/translated-title/style")

    pages = extract_text(record, "./pages/style")
    part_volume = extract_text(record, "./volume/style")
    part_number = extract_text(record, "./number/style")
    extent_volumes = extract_text(record, "./num-vols/style")
    edition = extract_text(record, "./edition/style")
    part_section = extract_text(record, "./section/style")
    reprint_edition = extract_text(record, "./reprint-edition/style")
    keywords = extract_text(record, "./keywords/keyword/style")
    date_year = extract_text(record, "./dates/year/style")
    date_pub = extract_text(record, "./dates/pub-dates/date/style")
    publisher_place = extract_text(record, "./pub-location/style")
    publisher = extract_text(record, "./publisher/style")
    orig_pub = extract_text(record, "./orig-pub/style")
    isbn_or_issn = extract_text(record, "./isbn/style")
    accessionnumber = extract_text(record, "./accession-num/style")
    callnumber = extract_text(record, "./call-num/style")
    if endnote_type == TYPE_WEB_PAGE:
        abstract = extract_text(record, "./pages/style")
    else:
        abstract = extract_text(record, "./abstract/style")
    label = extract_text(record, "./label/style")
    caption = extract_text(record, "./caption/style")
    note = extract_text(record, "./notes/style")
    reviewed_item = extract_text(record, "./reviewed-item/style")
    rec_type_description = extract_text(record, "./work-type/style")
    remote_url = extract_text(record, "./urls/related-urls/url/style")
    custom1 = extract_text(record, "./custom1/style")
    custom2 = extract_text(record, "./custom2/style")
    custom3 = extract_text(record, "./custom3/style")
    custom4 = extract_text(record, "./custom4/style")
    custom5 = extract_text(record, "./custom5/style")
    custom6 = extract_text(record, "./custom6/style")
    custom7 = extract_text(record, "./custom7/style")
    doi = extract_text(record, "./electronic-resource-num/style")
    remote_database_name = extract_text(record, "./remote-database-name/style")
    remote_database_provider = extract_text(record, "./remote-database-provider/style")
    research_notes = extract_text(record, "./research-notes/style")
    language = extract_text(record, "./language/style")
    access_date = extract_text(record, "./access-date/style")

    # rec_id, rec_source
    document["rec_id"] = rec_id
    document["rec_source"] = source

    # publisher, publisher_place
    if publisher:
        publisher = publisher.replace("\r", "; ")
    if publisher_place:
        publisher_place = publisher_place.replace("\r", "; ")

    # type, is_part_of.type, is_part_of.is_part_of.type
    try:
        is_part_of_type = endnote_record_type_to_metajson_document_is_part_of_type[endnote_type]
    except:
        is_part_of_type = None

    is_part_of_is_part_of_type = None
    if title_secondary is not None:
        if endnote_type == TYPE_FIGURE:
            # how to determine the is_part_of type ?
            # if there is a volume or an issue number, it's a JournalArticle, else it's a Book or BookChapter
            if part_volume is not None or part_number is not None:
                is_part_of_type = "Article"
                is_part_of_is_part_of_type = "Journal"
            else:
                if title_translated is not None:
                    is_part_of_type = "BookPart"
                    is_part_of_is_part_of_type = "Book"
                else:
                    is_part_of_type = "Book"
        elif endnote_type == TYPE_FILM_OR_BROADCAST:
            rec_type = "VideoPart"
            is_part_of_type = "VideoRecording"

    document["rec_type"] = rec_type
    document.set_key_if_not_none("rec_type_description", rec_type_description)

    # issn or isbn ?
    if is_part_of_type in ["Journal", "Newspaper", "Article"]:
        isbn_or_issn_type = "issn"
    else:
        isbn_or_issn_type = "isbn"

    # is_part_of, is_part_of.is_part_of
    if is_part_of_type is not None:
        is_part_of = Document()
        is_part_of.set_key_if_not_none("rec_type", is_part_of_type)
        is_part_of.set_key_if_not_none("title", title_secondary)

        if is_part_of_is_part_of_type is not None:
            # is_part_of in case of is_part_of.is_part_of
            # contributors with role aut
            is_part_of.add_contributors(contributor_service.change_contibutors_role(secondary_contributors, "aut"))

            # is_part_of.is_part_of
            is_part_of_is_part_of = Document()
            is_part_of_is_part_of.set_key_if_not_none("rec_type", is_part_of_is_part_of_type)
            is_part_of_is_part_of.set_key_if_not_none("title", title_translated)
            # contributors with role edt
            is_part_of_is_part_of.add_contributors(contributor_service.change_contibutors_role(translated_contributors, "edt"))
            #is_part_of_is_part_of.set_key_if_not_none("date_issued",date_year)
            is_part_of_is_part_of.set_key_if_not_none("publisher", publisher)
            is_part_of_is_part_of.set_key_if_not_none("publisher_place", publisher_place)
            is_part_of_is_part_of.set_key_with_value_type_in_list("identifiers", isbn_or_issn, isbn_or_issn_type)

            is_part_of.add_items_to_key([is_part_of_is_part_of], "is_part_of")

        else:
            # is_part_of in case of no is_part_of.is_part_of
            # contributors with role edt
            is_part_of.add_contributors(secondary_contributors)
            #is_part_of.set_key_if_not_none("date_issued",date_year)
            is_part_of.set_key_if_not_none("publisher", publisher)
            is_part_of.set_key_if_not_none("publisher_place", publisher_place)
            is_part_of.set_key_with_value_type_in_list("identifiers", isbn_or_issn, isbn_or_issn_type)

        document.add_items_to_key([is_part_of], "is_part_of")

    else:
        document.set_key_with_value_type_in_list("identifiers", isbn_or_issn, isbn_or_issn_type)
        if publisher:
            if endnote_type == TYPE_THESIS:
                document.add_contributors([contributor_service.convert_formatted_name_to_contributor(publisher, "orgunit", "dgg")])
            elif endnote_type == TYPE_FILM_OR_BROADCAST:
                document.add_contributors([contributor_service.convert_formatted_name_to_contributor(publisher, "orgunit", "dst")])
            else:
                document.set_key_if_not_none("publisher", publisher)
        document.set_key_if_not_none("publisher_place", publisher_place)

    # series[]
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION]:
        series = Document()
        if endnote_type == TYPE_BOOK and title_secondary:
            series.set_key_if_not_none("title", title_secondary)
            series.add_contributors(secondary_contributors)
            series.set_key_if_not_none("part_volume", part_number)
        if endnote_type == TYPE_BOOK_SECTION and title_tertiary:
            series.set_key_if_not_none("title", title_tertiary)
            series.add_contributors(tertiary_contributors)
            series.set_key_if_not_none("part_volume", part_number)
        if "title" in series and len(series) > 2:
            document.add_items_to_key([series], "series")

    # originals[]
    if (reprint_edition or orig_pub) and endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION, TYPE_JOURNAL_ARTICLE, TYPE_FILM_OR_BROADCAST]:
        original = Document()
        if reprint_edition:
            original_title = reprint_edition
        elif orig_pub:
            original_title = orig_pub
        original.set_key_if_not_none("title", original_title)
        original.set_key_if_not_none("rec_type", rec_type)
        document.add_items_to_key([original], "original")

    # review_of[]
    if reviewed_item and endnote_type in [TYPE_BOOK_SECTION, TYPE_JOURNAL_ARTICLE]:
        review_of = Document()
        review_of.set_key_if_not_none("title", reviewed_item)
        review_of.set_key_if_not_none("rec_type", "Book")
        document.add_items_to_key([review_of], "review_of")

    # abstracts[0].value
    if abstract:
        document["abstracts"] = [{"value": abstract}]

    # archive
    if endnote_type == TYPE_FIGURE and remote_database_provider:
        archive = Document()
        archive["title"] = remote_database_provider
        document.add_items_to_key([archive], "archive")

    # caption
    document.set_key_if_not_none("caption", caption)

    # contributors[]
    document.add_contributors(primary_contributors)
    if endnote_type in [TYPE_BOOK, TYPE_THESIS, TYPE_FILM_OR_BROADCAST]:
        document.add_contributors(tertiary_contributors)
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION, TYPE_FILM_OR_BROADCAST]:
        document.add_contributors(subsidiary_contributors)
    if custom4:
        document.add_contributors(convert_endnote_authors_to_contributors(custom4, "person", "rev"))
    if endnote_type == TYPE_FIGURE and remote_database_name:
        document.add_contributors(convert_endnote_authors_to_contributors(remote_database_name, None, "cph"))

    # edition
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION, TYPE_FILM_OR_BROADCAST, TYPE_WEB_PAGE] and edition:
        document["edition"] = edition

    # extent_pages, extent_volumes
    if endnote_type in [TYPE_BOOK, TYPE_THESIS] and pages:
        document["extent_pages"] = pages.replace("p.", "").strip()
    if endnote_type in [TYPE_BOOK, TYPE_BOOK_SECTION] and extent_volumes:
        document["extent_volumes"] = extent_volumes

    # date_issued, date_issued_first
    if date_year:
        date_issued = ""
        date_issued_first = ""
        orig_index_start = date_year.find("[")
        orig_index_end = date_year.find("]")
        if orig_index_start != -1 and orig_index_end != -1:
            date_issued_first = date_year[orig_index_start + 1: orig_index_end]
            date_issued = date_year.replace("[" + date_issued_first + "]", "").strip()
        else:
            date_issued = date_year.strip()

        if "is_part_of" in document:
            if document["is_part_of"][0]["rec_type"] == "Book":
                document["is_part_of"][0].set_key_if_not_none("date_issued", date_issued)
                document["is_part_of"][0].set_key_if_not_none("date_issued_first", date_issued_first)
            elif "is_part_of" in document["is_part_of"][0] and document["is_part_of"][0]["is_part_of"][0]["rec_type"] == "Book":
                document["is_part_of"][0]["is_part_of"][0].set_key_if_not_none("date_issued", date_issued)
                document["is_part_of"][0]["is_part_of"][0].set_key_if_not_none("date_issued_first", date_issued_first)
            else:
                document.set_key_if_not_none("date_issued_first", date_issued_first)
                document.set_key_if_not_none("date_issued", date_issued)
        else:
            document.set_key_if_not_none("date_issued_first", date_issued_first)
            document.set_key_if_not_none("date_issued", date_issued)

    # identifiers[]
    document.set_key_with_value_type_in_list("identifiers", accessionnumber, "accessionnumber")
    document.set_key_with_value_type_in_list("identifiers", callnumber, "callnumber")
    document.set_key_with_value_type_in_list("identifiers", doi, "doi")

    # language
    if language:
        iso639_2 = language_service.convert_unknown_format_to_iso639_2(language)
        if iso639_2:
            document["languages"] = [{"authority": "iso639-2b", "value": iso639_2}]

    # note
    if endnote_import_note and note:
        document.set_key_with_value_type_in_list("notes", note, "general")
    if endnote_import_research_note and research_notes:
        document.set_key_with_value_type_in_list("notes", research_notes, "user")

    # part_page_start & part_page_end
    if endnote_type in [TYPE_BOOK_SECTION, TYPE_FIGURE, TYPE_JOURNAL_ARTICLE] and pages:
        hyphen_index = pages.find("-")
        if hyphen_index == -1:
            document["part_page_start"] = pages.replace("p.", "").strip()
        else:
            document["part_page_start"] = pages[:hyphen_index].replace("p.", "").strip()
            document["part_page_end"] = pages[hyphen_index+1:].replace("p.", "").strip()

    if endnote_type in [TYPE_JOURNAL_ARTICLE]:
        document.set_key_if_not_none("part_issue", part_number)
    elif endnote_type in [TYPE_FIGURE]:
        document.set_key_if_not_none("part_number", part_number)
    document.set_key_if_not_none("part_section", part_section)
    document.set_key_if_not_none("part_volume", part_volume)

    # resources[0]
    if remote_url is not None:
        resource = Resource()
        resource.set_key_if_not_none("remote_url", remote_url)
        if endnote_type == TYPE_WEB_PAGE:
            resource.set_key_if_not_none("date_last_accessed", part_number)
        else:
            resource.set_key_if_not_none("date_last_accessed", access_date)
        document["resources"] = [resource]

    # subjects[]
    if endnote_import_keywords and keywords:
        for keyword in keywords.split():
            document.set_key_with_value_type_in_list("subjects", keyword, "topic")

    # title, title_alternative, title_abbreviated, title_translated
    document["title"] = title
    if title_alternative:
        document["title_alternative"] = [{"title": title_alternative}]
    if title_abbreviated:
        document["title_abbreviated"] = [{"title": title_abbreviated}]
    if not is_part_of_is_part_of_type and title_translated:
        # the title_translated is used for the is_part_of.is_part_of_type.title
        document["title_translated"] = [{"title": title_translated}]

    debug = True
    if debug:
        related_items_msg = ""
        if is_part_of_type:
            related_items_msg = "\tis_part_of: {} ".format(is_part_of_type)
        if is_part_of_is_part_of_type:
            related_items_msg += "\tis_part_of.is_part_of: {} ".format(is_part_of_is_part_of_type)
        print "# {}\t:\t{}\t:\t{}\t->\titem: {} {}".format(source, rec_id, endnote_type, rec_type, related_items_msg, title)

    return document
def convert_summon_json_document_to_metajson_document(sum_doc, source):
    document = Document()

    # Extract Summon properties
    rec_id = sum_doc["ID"][0].replace("FETCH-", "")
    sum_type = sum_doc["ContentType"][0]
    rec_type = summon_document_type_to_metajson_document_type[sum_type]

    # rec_id, rec_source, rec_type
    document["rec_id"] = rec_id
    document["rec_source"] = source
    document["rec_type"] = rec_type

    # languages
    main_language = None
    if "Language" in sum_doc:
        languages = []
        for sum_lang in sum_doc["Language"]:
            lang = language_service.convert_english_to_rfc5646(sum_lang)
            if lang:
                languages.append(lang)
        if languages:
            main_language = languages[0]
            document["languages"] = languages

    # extract summon properties
    contributors = extract_contributors(sum_doc)
    copyright_statement = extract_value(sum_doc, "Copyright")
    date_issued = extract_date_issued(sum_doc)
    degree = extract_value(sum_doc, "DissertationDegree")
    descriptions = extract_convert_langstrings(sum_doc, "Abstract", main_language)
    edition = extract_value(sum_doc, "Edition")
    extent_pages = extract_value(sum_doc, "PageCount")
    genre = extract_value(sum_doc, "Genre")
    is_part_of_edition = extract_value(sum_doc, "PublicationEdition")
    is_part_of_title = extract_value(sum_doc, "PublicationTitle")
    is_part_of_title_sub = extract_value(sum_doc, "PublicationSubtitle")
    notes = extract_convert_langstrings(sum_doc, "Notes", main_language)
    part_issue = extract_value(sum_doc, "Issue")
    part_page_end = extract_value(sum_doc, "EndPage")
    part_page_start = extract_value(sum_doc, "StartPage")
    part_volume = extract_value(sum_doc, "Volume")
    peer_reviewed = extract_boolean_value(sum_doc, "IsPeerReviewed")
    publisher = extract_value(sum_doc, "Publisher")
    publisher_place = extract_value(sum_doc, "PublicationPlace")
    scholarly = extract_boolean_value(sum_doc, "IsScholarly")
    series_title = extract_value(sum_doc, "PublicationSeriesTitle")
    subject_keywords = extract_value(sum_doc, "Keywords", True)
    subject_names = convert_contributors(sum_doc, "RelatedPersons", None, "person", None)
    subject_terms = extract_value(sum_doc, "SubjectTerms", True)
    table_of_contents = extract_convert_langstrings(sum_doc, "TableOfContents", main_language)
    title = extract_value(sum_doc, "Title")
    title_sub = extract_value(sum_doc, "Subtitle")

    # identifiers
    has_isbn = False
    has_eissn = False
    identifiers_item = []
    identifiers_is_part_of = []
    for sum_key in summon_identifier_type_to_metajson_identifier_type:
        if sum_key in sum_doc:
            for id_value in sum_doc[sum_key]:
                id_type = summon_identifier_type_to_metajson_identifier_type[sum_key]
                if id_type == "issn":
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "eissn":
                    has_eissn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                elif id_type == "isbn":
                    has_isbn = True
                    identifiers_is_part_of.append(metajson.create_identifier(id_type, id_value))
                else:
                    identifiers_item.append(metajson.create_identifier(id_type, id_value))

    # is_part_of_type determination
    is_part_of_type = None
    if sum_type in summon_document_type_to_metajson_document_is_part_of_type:
        is_part_of_type = summon_document_type_to_metajson_document_is_part_of_type[sum_type]

    elif is_part_of_title and is_part_of_title != title and rec_type not in ["Book", "Journal", "Magazine", "Newspaper", "Periodical"]:
        if has_isbn:
            is_part_of_type = "Book"
        elif has_eissn:
            is_part_of_type = "Journal"
        elif is_part_of_title.lower().find("conference") != -1:
            is_part_of_type = "Book"
        elif is_part_of_title.lower().find("review") or is_part_of_title.lower().find("journal"):
            is_part_of_type = "Journal"
        elif rec_type == "Dataset":
            is_part_of_type = "Periodical"
        else:
            print "unknown is_part_of_type for rec_type: %s" % rec_type

    # is_part_of
    if is_part_of_type:
        is_part_of = Document()
        is_part_of.set_key_if_not_none("rec_type", is_part_of_type)
        is_part_of.set_key_if_not_none("edition", is_part_of_edition)
        is_part_of.add_items_to_key(identifiers_is_part_of, "identifiers")
        is_part_of.set_key_if_not_none("peer_reviewed", peer_reviewed)
        is_part_of.set_key_if_not_none("publisher", publisher)
        is_part_of.set_key_if_not_none("publisher_place", publisher_place)
        is_part_of.set_key_if_not_none("title", is_part_of_title)
        is_part_of.set_key_if_not_none("title_sub", is_part_of_title_sub)

        document.add_items_to_key(identifiers_item, "identifiers")

        document.add_items_to_key([is_part_of], "is_part_of")
    else:
        document.set_key_if_not_none("peer_reviewed", peer_reviewed)
        document.set_key_if_not_none("publisher", publisher)
        document.set_key_if_not_none("publisher_place", publisher_place)
        document.add_items_to_key(identifiers_is_part_of, "identifiers")
        document.add_items_to_key(identifiers_item, "identifiers")

    # series
    if series_title:
        series = Document()
        series.set_key_if_not_none("title", series_title)

        document.add_items_to_key([series], "series")

    # classificiations
    extract_convert_add_classifications(sum_doc, document, "DEWEY", "ddc")
    extract_convert_add_classifications(sum_doc, document, "Discipline", "discipline")
    extract_convert_add_classifications(sum_doc, document, "NAICS", "NAICS")

    # set properties
    document.set_key_if_not_none("contributors", contributors)
    document.set_key_if_not_none("copyright_statement", copyright_statement)
    document.set_key_if_not_none("date_issued", date_issued)
    document.set_key_if_not_none("degree", degree)
    document.set_key_if_not_none("descriptions", descriptions)
    document.set_key_if_not_none("edition", edition)
    document.set_key_if_not_none("extent_pages", extent_pages)
    document.set_key_if_not_none("genre", genre)
    document.set_key_if_not_none("notes", notes)
    document.set_key_if_not_none("part_issue", part_issue)
    document.set_key_if_not_none("part_page_end", part_page_end)
    document.set_key_if_not_none("part_page_start", part_page_start)
    document.set_key_if_not_none("part_volume", part_volume)
    document.set_key_if_not_none("scholarly", scholarly)
    document.set_key_if_not_none("table_of_contents", table_of_contents)
    document.set_key_if_not_none("title", title)
    document.set_key_if_not_none("title_sub", title_sub)

    # subject
    subject = Subject()
    if subject_keywords:
        subject["keywords"] = subject_keywords
    if subject_names:
        subject["names"] = subject_names
    if subject_terms:
        subject["terms"] = subject_terms
    if subject:
        document["subjects"] = subject

    debug = True
    if debug:
        related_items_msg = "\t\t\t\t\t\t"
        if is_part_of_type:
            related_items_msg = "\tis_part_of: {} ".format(is_part_of_type)
        print "{}\t->\titem: {} {}\t:\t{}\t:\t{}".format(sum_type, rec_type, related_items_msg, rec_id, title)

    return document