Exemplo n.º 1
0
def validate_corpus(corpus, error_file_path):
    if corpus and error_file_path:
        with open(error_file_path, "w") as error_file:
            # fetch
            document_list = repository_service.get_documents(corpus)

            # validate
            all_errors = []
            for document in document_list:

                rec_id = document["rec_id"]
                rec_source = "empy"
                if "rec_source" in document:
                    rec_source = document["rec_source"]

                errors = metajson_validation.validate_metajson_document(
                    document)
                for error in errors:
                    formatted_error = "".join([
                        corpus, ":", rec_source, ":", rec_id, ":", error, "\n"
                    ])
                    all_errors.append(formatted_error)
                    if error_file:
                        error_file.write(formatted_error)

            return all_errors
Exemplo n.º 2
0
def export_corpus(corpus,
                  output_file_path,
                  output_format,
                  all_in_one_file,
                  one_record_per_copy=False):
    if corpus and output_file_path:
        # fetch
        metajson_list = repository_service.get_documents(corpus)

        # one record per physical resource
        if one_record_per_copy:
            metajson_tmp = export_one_record_per_copy(metajson_list)
        else:
            metajson_tmp = metajson_list

        # convert
        results = crosswalks_service.convert_metajson_list(
            metajson_tmp, output_format, all_in_one_file)

        # export
        if all_in_one_file:
            io_service.write_items_in_one_file(corpus, corpus, results,
                                               output_file_path, output_format)
        else:
            io_service.write_items(corpus, corpus, results, output_file_path,
                                   output_format)
Exemplo n.º 3
0
def format_corpus(corpus, output_title, output_file_path, output_style):
    if corpus and output_file_path:
        # fetch
        metajson_list = repository_service.get_documents(corpus)
        # convert to html
        # format
        io_service.write_html(corpus, output_title, metajson_list, output_file_path, output_style)
Exemplo n.º 4
0
def format_corpus(corpus, output_title, output_file_path, output_style):
    if corpus and output_file_path:
        # fetch
        metajson_list = repository_service.get_documents(corpus)
        # convert to html
        # format
        io_service.write_html(corpus, output_title, metajson_list,
                              output_file_path, output_style)
Exemplo n.º 5
0
def export_corpus(corpus, output_file_path, output_format, all_in_one_file, one_record_per_copy=False):
    if corpus and output_file_path:
        # fetch
        metajson_list = repository_service.get_documents(corpus)
        
        # one record per physical resource
        if one_record_per_copy:
            metajson_tmp = export_one_record_per_copy(metajson_list)
        else:
            metajson_tmp = metajson_list

        # convert
        results = crosswalks_service.convert_metajson_list(metajson_tmp, output_format, all_in_one_file)

        # export
        if all_in_one_file:
            io_service.write_items_in_one_file(corpus, corpus, results, output_file_path, output_format)
        else:
            io_service.write_items(corpus, corpus, results, output_file_path, output_format)
Exemplo n.º 6
0
def validate_corpus(corpus, error_file_path):
    if corpus and error_file_path:
        with open(error_file_path, "w") as error_file:
            # fetch
            document_list = repository_service.get_documents(corpus)

            # validate
            all_errors = []
            for document in document_list:

                rec_id = document["rec_id"]
                rec_source = "empy"
                if "rec_source" in document:
                    rec_source = document["rec_source"]

                errors = metajson_validation.validate_metajson_document(document)
                for error in errors:
                    formatted_error = "".join([corpus, ":", rec_source, ":", rec_id, ":", error, "\n"])
                    all_errors.append(formatted_error)
                    if error_file:
                        error_file.write(formatted_error)

            return all_errors
Exemplo n.º 7
0

if __name__ == "__main__":
    date_begin = datetime.datetime.now()

    # conf params
    corpus = "perio"
    source = "Sciences Po | la bibliothèque"
    rec_id_prefix = ""
    input_file_path = os.path.join("data", "unimarc", "periouni.mrc")
    input_format = constants.FORMAT_UNIMARC
    csv_file_name = "".join(["validation-", corpus, ".csv"])
    csv_file_path = os.path.join("data", "result", csv_file_name)

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    corpus_service.import_metadata_file(corpus, input_file_path, input_format, source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate perio
    documents = repository_service.get_documents(corpus)
    validate_perios(documents, csv_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate perio", date_import, date_validate, None)
Exemplo n.º 8
0
if __name__ == "__main__":
    date_begin = datetime.datetime.now()

    # conf params
    corpus = "perio"
    source = "Sciences Po | la bibliothèque"
    rec_id_prefix = ""
    input_file_path = os.path.join("data", "unimarc", "periouni.mrc")
    input_format = constants.FORMAT_UNIMARC
    csv_file_name = "".join(["validation-", corpus, ".csv"])
    csv_file_path = os.path.join("data", "result", csv_file_name)

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    corpus_service.import_metadata_file(corpus, input_file_path, input_format,
                                        source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate perio
    documents = repository_service.get_documents(corpus)
    validate_perios(documents, csv_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate perio", date_import, date_validate, None)