def migrate_isis_parser(sargs): parser = argparse.ArgumentParser(description="ISIS database migration tool") subparsers = parser.add_subparsers(title="Commands", metavar="", dest="command") extract_parser = subparsers.add_parser("extract", help="Extract mst files to json") extract_parser.add_argument( "mst_file_path", metavar="file", help="Path to MST file that will be extracted" ) extract_parser.add_argument("--output", required=True, help="The output file path") import_parser = subparsers.add_parser( "import", parents=[mongodb_parser(sargs)], help="Process JSON files then import into Kernel database", ) import_parser.add_argument( "import_file", metavar="file", help="JSON file path that contains mst extraction result, e.g: collection-title.json", ) import_parser.add_argument( "--type", help="Type of JSON file that will load into Kernel database", choices=["journal", "issue", "documents-bundles-link"], required=True, ) link_parser = subparsers.add_parser( "link", help="Generate JSON file of journals' ids and their issues linked by ISSN", ) link_parser.add_argument( "issues", help="JSON file path that contains mst extraction result, e.g: ~/json/collection-issues.json", ) link_parser.add_argument("--output", required=True, help="The output file path") args = parser.parse_args(sargs) if args.command == "extract": extract_isis.create_output_dir(args.output) extract_isis.run(args.mst_file_path, args.output) elif args.command == "import": mongo = ds_adapters.MongoDB(uri=args.uri, dbname=args.db) Session = ds_adapters.Session.partial(mongo) if args.type == "journal": pipeline.import_journals(args.import_file, session=Session()) elif args.type == "issue": pipeline.import_issues(args.import_file, session=Session()) elif args.type == "documents-bundles-link": pipeline.import_documents_bundles_link_with_journal( args.import_file, session=Session() ) elif args.command == "link": pipeline.link_documents_bundles_with_journals(args.issues, args.output) else: parser.print_help()
def link_documents_bundles_with_journals(issue_path: str, output_path: str): """Busca pelo relacionamento entre periódicos e fascículos a partir de arquivos JSON extraídos de uma base MST. O resultado é escrito em um arquivo JSON contendo um objeto (dict) com identificadores de periócios como chaves e arrays de ids das issues que compõe o periódico""" journals_bundles = {} extract_isis.create_output_dir(output_path) issues_as_json = reading.read_json_file(issue_path) issues = conversion.conversion_issues_to_xylose(issues_as_json) issues = filter_issues(issues) for issue in issues: journal_id = issue.data["issue"]["v35"][0]["_"] journals_bundles.setdefault(journal_id, []) _issue_id = issue_to_kernel(issue)["_id"] exist_item = len( list( filter(lambda d: d["id"] == _issue_id, journals_bundles[journal_id]))) if not exist_item: _creation_date = parse_date(issue.publication_date) _supplement = "" if issue.type is "supplement": _supplement = "0" if issue.supplement_volume: _supplement = issue.supplement_volume elif issue.supplement_number: _supplement = issue.supplement_number journals_bundles[journal_id].append({ "id": _issue_id, "order": issue.order, "number": issue.number, "volume": issue.volume, "year": str(date_to_datetime(_creation_date).year), "supplement": _supplement, }) with open(output_path, "w") as output: output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
def link_documents_bundles_with_journals( journal_path: str, issue_path: str, output_path: str ): """Busca pelo relacionamento entre periódicos e fascículos a partir de arquivos JSON extraídos de uma base MST. O resultado é escrito em um arquivo JSON contendo um objeto (dict) com identificadores de periócios como chaves e arrays de ids das issues que compõe o periódico""" journals_bundles = {} extract_isis.create_output_dir(output_path) journals_as_json = reading.read_json_file(journal_path) issues_as_json = reading.read_json_file(issue_path) journals = conversion.conversion_journals_to_kernel(journals_as_json) issues = conversion.conversion_issues_to_xylose(issues_as_json) issues = filter_issues(issues) for journal in journals: journals_bundles[journal["id"]] = find_documents_bundles(journal, issues) with open(output_path, "w") as output: output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
def test_should_not_try_to_create_an_existing_dir( self, path_exists_mock, makedirs_mock ): path_exists_mock.return_value = True extract_isis.create_output_dir("/random/dir/file.json") makedirs_mock.assert_not_called()
def test_should_create_an_output_dir(self, path_exists_mock, makedirs_mock): path_exists_mock.return_value = False extract_isis.create_output_dir("/random/dir/file.json") makedirs_mock.assert_called_once_with("/random/dir")