示例#1
0
def test():
    result_periodical_by_issn = request_periodical_by_issn("0042-0980")
    openurl_crosswalk.openurl_xmletree_to_metajson_list(
        result_periodical_by_issn, None, False)

    result_periodical_by_title = request_periodical_by_title("Urban studies")
    openurl_crosswalk.openurl_xmletree_to_metajson_list(
        result_periodical_by_title, None, False)
def convert_xmletree(xmletree_root, input_format, source, rec_id_prefix, only_first_record):
    if xmletree_root is not None:
        if input_format is None:
            input_format = io_service.guess_format_from_xmletree(xmletree_root)

        if input_format is not None:
            logging.info("# input_format: {0}".format(input_format))

            if input_format == constants.FORMAT_DDI:
                # ddi
                return ddi_crosswalk.ddi_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_DIDL:
                # didl
                return didl_crosswalk.didl_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_ENDNOTEXML:
                # endnotexml
                return endnotexml_crosswalk.endnotexml_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_METS:
                # mets
                return mets_crosswalk.mets_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_MODS:
                # mods
                return mods_crosswalk.mods_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_OPENURL:
                # openurl
                return openurl_crosswalk.openurl_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_RESEARCHERML:
                # researcherml
                return researcherml_crosswalk.researcherml_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_TEI:
                # tei
                return tei_crosswalk.tei_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_UNIXREF:
                # unixref
                return unixref_crosswalk.unixref_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            else:
                logging.error("Error: {} input_format not managed".format(input_format))
示例#3
0
def convert_xmletree(xmletree_root, input_format, source, rec_id_prefix, only_first_record):
    if xmletree_root is not None:
        if input_format is None:
            input_format = io_service.guess_format_from_xmletree(xmletree_root)

        if input_format is not None:
            logging.info("# input_format: {0}".format(input_format))

            if input_format == constants.FORMAT_DDI:
                # ddi
                return ddi_crosswalk.ddi_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_DIDL:
                # didl
                return didl_crosswalk.didl_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_ENDNOTEXML:
                # endnotexml
                return endnotexml_crosswalk.endnotexml_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_METS:
                # mets
                return mets_crosswalk.mets_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_MODS:
                # mods
                return mods_crosswalk.mods_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_OPENURL:
                # openurl
                return openurl_crosswalk.openurl_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_RESEARCHERML:
                # researcherml
                return researcherml_crosswalk.researcherml_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_TEI:
                # tei
                return tei_crosswalk.tei_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            elif input_format == constants.FORMAT_UNIXREF:
                # unixref
                return unixref_crosswalk.unixref_xmletree_to_metajson_list(xmletree_root, source, rec_id_prefix, only_first_record)

            else:
                logging.error("Error: {} input_format not managed".format(input_format))
def validate_perios(documents, csv_file_path):
    if documents:
        source = "Serials Solutions"
        rec_id_prefix = ""
        issn_duplicated = {}
        # restore of the previous state
        previously_dict = {}
        if os.path.isfile(csv_file_path):
            with open(csv_file_path, "rb") as csv_file:
                csvreader = csv.DictReader(csv_file, delimiter=',')
                for csvdict in csvreader:
                    previously_dict[csvdict["rec_id"]] = csvdict["rec_id"]
        with open(csv_file_path, "wb") as csv_file:
            fieldnames = ["rec_id", "rec_type", "title_non_sort", "title", "title_sub", "issn", "issn_status", "rel_eissn", "rel_response", 
                          "856_1_u", "856_1_status", "856_2_u", "856_2_status", "856_3_u", "856_3_status", "856_4_u", "856_4_status", "856_5_u", "856_5_status", 
                          "856_6_u", "856_6_status", "856_7_u", "856_7_status", "856_8_u", "856_8_status", "856_9_u", "856_9_status", "856_10_u", "856_10_status", 
                          "856_11_u", "856_11_status", "856_12_u", "856_12_status", "856_13_u", "856_13_status", "856_14_u", "856_14_status", "856_15_u", "856_15_status", 
                          "856_16_u", "856_16_status", "856_17_u", "856_17_status", "856_18_u", "856_18_status", "856_19_u", "856_19_status", "856_20_u", "856_20_status", 
                          "856_21_u", "856_21_status", "856_22_u", "856_22_status", "856_23_u", "856_23_status", "856_24_u", "856_24_status", "856_25_u", "856_25_status"]
            csvwriter = csv.DictWriter(csv_file, delimiter=',', fieldnames=fieldnames)
            csvwriter.writeheader()
            for index, document in enumerate(documents):
                rec_id = document["rec_id"]
                if rec_id in previously_dict:
                    logging.info("# Document with index: {} and rec_id: {} - Previously verified".format(index, rec_id))
                else:
                    logging.info("# Document index: {} and rec_id: {} - Starting verification".format(index, rec_id))
                    csvdict = {}
                    csvdict["rec_id"] = document["rec_id"]
                    #logging.debug(csvdict["rec_id"])
                    csvdict["rec_type"] = document["rec_type"]
                    if "title_non_sort" in document:
                        csvdict["title_non_sort"] = document["title_non_sort"]
                    if "title" in document:
                        csvdict["title"] = document["title"]
                    if "title_sub" in document:
                        csvdict["title_sub"] = document["title_sub"]
                    if "identifiers" in document:
                        for identifier in  document["identifiers"]:
                            if identifier["id_type"] == "issn":
                                csvdict["issn"] = identifier["value"]
                                try:
                                    issn.validate(identifier["value"])
                                    if identifier["value"] in issn_duplicated:
                                        csvdict["issn_status"] = "DUPLICATED"
                                    else:
                                        issn_duplicated[identifier["value"]] = ""
                                        csvdict["issn_status"] = "OK"
                                except:
                                    csvdict["issn_status"] = "INVALID"
                                break
                    if "issn" not in csvdict:
                        csvdict["issn_status"] = "EMPTY"

                    # 856 : list, status
                    if "resources" in document:
                        for i, resource in enumerate(document["resources"]):
                            if "url" in resource:
                                csvdict["856_" + str(i+1) + "_u"] = resource["url"]
                                # test URL
                                res_dict  = resource_service.fetch_url(resource["url"])[0]
                                if res_dict["error"]:
                                    csvdict["856_" + str(i+1) + "_status"] = "ERROR"
                                else:
                                    csvdict["856_" + str(i+1) + "_status"] = "OK"
                            else:
                                csvdict["856_" + str(i+1) + "_u"] = "EMPTY"
                                csvdict["856_" + str(i+1) + "_status"] = "EMPTY"

                    # revues en ligne / openurl
                    if csvdict["issn_status"] == "OK":
                        openurl_response = openurl_client.request_periodical_by_issn(csvdict["issn"])
                        if openurl_response is not None:
                            openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list(openurl_response, source, rec_id_prefix, True)
                            if openurl_documents:
                                openurl_document = openurl_documents[0]
                                if "identifiers" in openurl_document:
                                    for identifier in openurl_document["identifiers"]:
                                        if identifier["id_type"] == "eissn":
                                            csvdict["rel_eissn"] = identifier["value"]
                                            break
                                if "resources" in openurl_document:
                                    rel_response = []
                                    for resource in openurl_document["resources"]:
                                        if rel_response:
                                            rel_response.append("\n")
                                        if "institution_name" in resource:
                                            rel_response.append(resource["institution_name"])
                                        if "service_name" in resource:
                                            rel_response.append(" - ")
                                            rel_response.append(resource["service_name"])
                                        if "period_begin" in resource or "period_end" in resource:
                                            rel_response.append(" (")
                                            if "period_begin" in resource:
                                                rel_response.append(resource["period_begin"])
                                            else:
                                                rel_response.append("....")
                                            if "period_end" in resource:
                                                rel_response.append(" - ")
                                                rel_response.append(resource["period_end"])
                                            else:
                                                rel_response.append(" - ....")
                                            rel_response.append(")")
                                    if rel_response:
                                        csvdict["rel_response"] = "".join(rel_response)
                    csvwriter.writerow(csvdict)
示例#5
0
def validate_perios(documents, csv_file_path):
    if documents:
        source = "Serials Solutions"
        rec_id_prefix = ""
        issn_duplicated = {}
        # restore of the previous state
        previously_dict = {}
        if os.path.isfile(csv_file_path):
            with open(csv_file_path, "rb") as csv_file:
                csvreader = csv.DictReader(csv_file, delimiter=',')
                for csvdict in csvreader:
                    previously_dict[csvdict["rec_id"]] = csvdict["rec_id"]
        with open(csv_file_path, "wb") as csv_file:
            fieldnames = [
                "rec_id", "rec_type", "title_non_sort", "title", "title_sub",
                "issn", "issn_status", "rel_eissn", "rel_response", "856_1_u",
                "856_1_status", "856_2_u", "856_2_status", "856_3_u",
                "856_3_status", "856_4_u", "856_4_status", "856_5_u",
                "856_5_status", "856_6_u", "856_6_status", "856_7_u",
                "856_7_status", "856_8_u", "856_8_status", "856_9_u",
                "856_9_status", "856_10_u", "856_10_status", "856_11_u",
                "856_11_status", "856_12_u", "856_12_status", "856_13_u",
                "856_13_status", "856_14_u", "856_14_status", "856_15_u",
                "856_15_status", "856_16_u", "856_16_status", "856_17_u",
                "856_17_status", "856_18_u", "856_18_status", "856_19_u",
                "856_19_status", "856_20_u", "856_20_status", "856_21_u",
                "856_21_status", "856_22_u", "856_22_status", "856_23_u",
                "856_23_status", "856_24_u", "856_24_status", "856_25_u",
                "856_25_status"
            ]
            csvwriter = csv.DictWriter(csv_file,
                                       delimiter=',',
                                       fieldnames=fieldnames)
            csvwriter.writeheader()
            for index, document in enumerate(documents):
                rec_id = document["rec_id"]
                if rec_id in previously_dict:
                    logging.info(
                        "# Document with index: {} and rec_id: {} - Previously verified"
                        .format(index, rec_id))
                else:
                    logging.info(
                        "# Document index: {} and rec_id: {} - Starting verification"
                        .format(index, rec_id))
                    csvdict = {}
                    csvdict["rec_id"] = document["rec_id"]
                    #logging.debug(csvdict["rec_id"])
                    csvdict["rec_type"] = document["rec_type"]
                    if "title_non_sort" in document:
                        csvdict["title_non_sort"] = document["title_non_sort"]
                    if "title" in document:
                        csvdict["title"] = document["title"]
                    if "title_sub" in document:
                        csvdict["title_sub"] = document["title_sub"]
                    if "identifiers" in document:
                        for identifier in document["identifiers"]:
                            if identifier["id_type"] == "issn":
                                csvdict["issn"] = identifier["value"]
                                try:
                                    issn.validate(identifier["value"])
                                    if identifier["value"] in issn_duplicated:
                                        csvdict["issn_status"] = "DUPLICATED"
                                    else:
                                        issn_duplicated[
                                            identifier["value"]] = ""
                                        csvdict["issn_status"] = "OK"
                                except:
                                    csvdict["issn_status"] = "INVALID"
                                break
                    if "issn" not in csvdict:
                        csvdict["issn_status"] = "EMPTY"

                    # 856 : list, status
                    if "resources" in document:
                        for i, resource in enumerate(document["resources"]):
                            if "url" in resource:
                                csvdict["856_" + str(i + 1) +
                                        "_u"] = resource["url"]
                                # test URL
                                res_dict = resource_service.fetch_url(
                                    resource["url"])[0]
                                if res_dict["error"]:
                                    csvdict["856_" + str(i + 1) +
                                            "_status"] = "ERROR"
                                else:
                                    csvdict["856_" + str(i + 1) +
                                            "_status"] = "OK"
                            else:
                                csvdict["856_" + str(i + 1) + "_u"] = "EMPTY"
                                csvdict["856_" + str(i + 1) +
                                        "_status"] = "EMPTY"

                    # revues en ligne / openurl
                    if csvdict["issn_status"] == "OK":
                        openurl_response = openurl_client.request_periodical_by_issn(
                            csvdict["issn"])
                        if openurl_response is not None:
                            openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list(
                                openurl_response, source, rec_id_prefix, True)
                            if openurl_documents:
                                openurl_document = openurl_documents[0]
                                if "identifiers" in openurl_document:
                                    for identifier in openurl_document[
                                            "identifiers"]:
                                        if identifier["id_type"] == "eissn":
                                            csvdict["rel_eissn"] = identifier[
                                                "value"]
                                            break
                                if "resources" in openurl_document:
                                    rel_response = []
                                    for resource in openurl_document[
                                            "resources"]:
                                        if rel_response:
                                            rel_response.append("\n")
                                        if "institution_name" in resource:
                                            rel_response.append(
                                                resource["institution_name"])
                                        if "service_name" in resource:
                                            rel_response.append(" - ")
                                            rel_response.append(
                                                resource["service_name"])
                                        if "period_begin" in resource or "period_end" in resource:
                                            rel_response.append(" (")
                                            if "period_begin" in resource:
                                                rel_response.append(
                                                    resource["period_begin"])
                                            else:
                                                rel_response.append("....")
                                            if "period_end" in resource:
                                                rel_response.append(" - ")
                                                rel_response.append(
                                                    resource["period_end"])
                                            else:
                                                rel_response.append(" - ....")
                                            rel_response.append(")")
                                    if rel_response:
                                        csvdict["rel_response"] = "".join(
                                            rel_response)
                    csvwriter.writerow(csvdict)
def test():
    result_periodical_by_issn = request_periodical_by_issn("0042-0980")
    openurl_crosswalk.openurl_xmletree_to_metajson_list(result_periodical_by_issn, None, False)

    result_periodical_by_title = request_periodical_by_title("Urban studies")
    openurl_crosswalk.openurl_xmletree_to_metajson_list(result_periodical_by_title, None, False)