Exemplo n.º 1
0
def export_metadatas(args):
    corpus = args.corpus
    if not corpus:
        corpus = default_corpus
    logging.info("corpus: {}".format(corpus))
    output_format = args.output_format
    logging.info("output_format: {}".format(output_format))
    output_file_path = args.output_file_path
    logging.info("output_file_path: {}".format(output_file_path))
    all_in_one_file = True
    corpus_service.export_corpus(corpus, output_file_path, output_format, all_in_one_file)
Exemplo n.º 2
0
def export_metadatas(args):
    corpus = args.corpus
    if not corpus:
        corpus = default_corpus
    logging.info("corpus: {}".format(corpus))
    output_format = args.output_format
    logging.info("output_format: {}".format(output_format))
    output_file_path = args.output_file_path
    logging.info("output_file_path: {}".format(output_file_path))
    all_in_one_file = True
    corpus_service.export_corpus(corpus, output_file_path, output_format,
                                 all_in_one_file)
Exemplo n.º 3
0
    # path
    data_result_dir = os.path.join(os.path.dirname(__file__), os.pardir, "data", "result")
    logging.info("data_result_dir: {}".format(data_result_dir))
    error_file_path = os.path.join(data_result_dir, "result_validation_errors.txt")
    metajson_file_path = os.path.join(data_result_dir, "result_didl_metajson_spire.json")
    mods_file_path = os.path.join(data_result_dir, "result_didl_mods_spire.json")
    repec_file_path = os.path.join(data_result_dir, "result_repec.txt")

    date_path = datetime.datetime.now()

    # validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_path, date_validate, None)

    # export MetaJSON
    corpus_service.export_corpus(corpus, metajson_file_path, constants.FORMAT_METAJSON, True)
    date_export_metajson = datetime.datetime.now()
    chrono.chrono_trace("Export corpus as MetaJSON", date_validate, date_export_metajson, None)

    # export MODS
    corpus_service.export_corpus(corpus, mods_file_path, constants.FORMAT_MODS, True)
    date_export_mods = datetime.datetime.now()
    chrono.chrono_trace("Export corpus as MODS", date_export_metajson, date_export_mods, None)

    # export RePEc
    corpus_service.export_corpus(corpus, repec_file_path, constants.FORMAT_REPEC, True)
    date_export_repec = datetime.datetime.now()
    chrono.chrono_trace("Export corpus as RePEc", date_export_mods, date_export_repec, None)

Exemplo n.º 4
0
    # import
    input_file_paths = io_service.get_relevant_file_list_by_format(
        input_dir_path, input_format)
    results = corpus_service.import_metadata_files(corpus, input_file_paths,
                                                   input_format, source,
                                                   rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_import, date_validate, None)

    # Export mods
    corpus_service.export_corpus(corpus, output_dir_path,
                                 constants.FORMAT_MODS, False, True)
    date_export_mods = datetime.datetime.now()
    chrono.chrono_trace("Export corpus mods", date_validate, date_export_mods,
                        None)

    # Export oai_dc
    corpus_service.export_corpus(corpus, output_dir_path,
                                 constants.FORMAT_OAI_DC, False, True)
    date_export_oai_dc = datetime.datetime.now()
    chrono.chrono_trace("Export corpus oai_dc", date_export_mods,
                        date_export_oai_dc, None)

    # Export CSV
    #corpus_service.export_corpus(corpus, output_dir_path, constants.FORMAT_CSV_METAJSON, True, True)
    #date_export_csv = datetime.datetime.now()
    #chrono.chrono_trace("Export corpus csv", date_export_oai_dc, date_export_csv, None)
Exemplo n.º 5
0
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    input_file_paths = io_service.get_relevant_file_list_by_format(input_dir_path, input_format)
    results = corpus_service.import_metadata_files(corpus, input_file_paths, input_format, source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_import, date_validate, None)

    # Export mods
    corpus_service.export_corpus(corpus, output_dir_path, constants.FORMAT_MODS, False, True)
    date_export_mods = datetime.datetime.now()
    chrono.chrono_trace("Export corpus mods", date_validate, date_export_mods, None)

    # Export oai_dc
    corpus_service.export_corpus(corpus, output_dir_path, constants.FORMAT_OAI_DC, False, True)
    date_export_oai_dc = datetime.datetime.now()
    chrono.chrono_trace("Export corpus oai_dc", date_export_mods, date_export_oai_dc, None)

    # Export CSV
    #corpus_service.export_corpus(corpus, output_dir_path, constants.FORMAT_CSV_METAJSON, True, True)
    #date_export_csv = datetime.datetime.now()
    #chrono.chrono_trace("Export corpus csv", date_export_oai_dc, date_export_csv, None)
Exemplo n.º 6
0
                                   "result_validation_errors.txt")
    metajson_file_path = os.path.join(data_result_dir,
                                      "result_didl_metajson_spire.json")
    mods_file_path = os.path.join(data_result_dir,
                                  "result_didl_mods_spire.json")
    repec_file_path = os.path.join(data_result_dir, "result_repec.txt")

    date_path = datetime.datetime.now()

    # validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_path, date_validate, None)

    # export MetaJSON
    corpus_service.export_corpus(corpus, metajson_file_path,
                                 constants.FORMAT_METAJSON, True)
    date_export_metajson = datetime.datetime.now()
    chrono.chrono_trace("Export corpus as MetaJSON", date_validate,
                        date_export_metajson, None)

    # export MODS
    corpus_service.export_corpus(corpus, mods_file_path, constants.FORMAT_MODS,
                                 True)
    date_export_mods = datetime.datetime.now()
    chrono.chrono_trace("Export corpus as MODS", date_export_metajson,
                        date_export_mods, None)

    # export RePEc
    corpus_service.export_corpus(corpus, repec_file_path,
                                 constants.FORMAT_REPEC, True)
    date_export_repec = datetime.datetime.now()