def UpdateAllMarcFiles(orig_deletion_list): # Create a deletion list that consists of the original list from the # BSZ as well as all the ID's from the files starting w/ "Diff": util.Remove("augmented_deletion_list") if orig_deletion_list is None: # Create empty file. with open("augmented_deletion_list", "a") as _: pass else: shutil.copyfile("../" + orig_deletion_list, "augmented_deletion_list") EnsureFileIsEmptyOrEndsWithNewline("augmented_deletion_list") extract_IDs_script_path = GetPathOrDie("extract_IDs_in_erase_format.sh") for marc_file_name in glob.glob("*.mrc"): if not marc_file_name.startswith("Diff"): continue if process_util.Exec(extract_IDs_script_path, args=[marc_file_name, "augmented_deletion_list"], timeout=100) != 0: util.Error("failed to append ID's from \"" + marc_file_name + "\" to \"augmented_deletion_list\"!") util.Info("Created an augmented deletion list.") # Now delete ID's from the augmented deletion list from all MARC-21 files: delete_ids_path = GetPathOrDie("delete_ids") for marc_file_name in glob.glob("*.mrc"): if marc_file_name.startswith("Diff"): continue trimmed_marc_file = marc_file_name[:-4] + "-trimmed.mrc" if process_util.Exec(delete_ids_path, args=["augmented_deletion_list", marc_file_name, trimmed_marc_file], timeout=200, new_stdout=util.GetLogDirectory() + "/trimmed_marc.log", new_stderr=util.GetLogDirectory() + "/trimmed_marc.log") != 0: util.Error("failed to create \"" + trimmed_marc_file + " from \"augmented_deletion_list\" and " "\"" + marc_file_name + "\"!") RemoveOrDie(marc_file_name) RemoveOrDie("augmented_deletion_list") util.Info("Deleted ID's from MARC files.") # Now concatenate the changed MARC records with the trimmed data sets: for marc_file_name in glob.glob("*-trimmed.mrc"): root_name = marc_file_name[:-19] diff_name = glob.glob("Diff" + root_name + "*.mrc")[0] if not util.ConcatenateFiles([marc_file_name, diff_name], root_name + ".mrc"): util.Error("We failed to concatenate \"" + marc_file_name + "\" and \"" + diff_name + "\"!") RemoveOrDie(marc_file_name) RemoveOrDie(diff_name) util.Info("Created concatenated MARC files.") # Rename files to include the current date and move them up a directory: current_date_str = datetime.datetime.now().strftime("%y%m%d") marc_files = glob.glob("*.mrc") for marc_file_name in marc_files: RenameOrDie(marc_file_name, "../" + marc_file_name[:-4] + "-" + current_date_str + ".mrc") os.chdir("..") util.Info("Renamed and moved files.") # Create symlinks with "current" instead of "YYMMDD" in the orginal files: for marc_file in marc_files: new_name = marc_file[:-4] + "-" + current_date_str + ".mrc" util.SafeSymlink(new_name, re.sub("\\d\\d\\d\\d\\d\\d", "current", new_name)) util.Info("Symlinked files.") return ("GesamtTiteldaten-current.mrc", "Normdaten-current.mrc")
def DeleteMarcRecords(original_marc_file, deletion_list, processed_marc_file): util.Remove(processed_marc_file) if process_util.Exec("delete_ids", args=[deletion_list, original_marc_file, processed_marc_file], timeout=200) != 0: util.Error("failed to create \"" + processed_marc_file + "\" from \"" + deletion_list + "\" and \"" + original_marc_file + "\"!") util.Info("Successfully created \"" + processed_marc_file + "\".")
def AugmentDeletionList(orig_list, changed_marc_data, augmented_list): util.Remove(augmented_list) shutil.copyfile(orig_list, augmented_list) if process_util.Exec("extract_IDs_in_erase_format.sh", args=[changed_marc_data, augmented_list], timeout=100) != 0: util.Error("failed to create \"" + augmented_list + "\" from \"" + changed_marc_data + "\"!") util.Info("Successfully created \"" + augmented_list + "\".")
def CleanUp(title_data_file, log_file_name): # Terminate the temporary solr instance util.ExecOrDie("/usr/local/bin/shutdown_refterm_solr.sh", [], log_file_name) # Clean up temporary title data if title_data_file is not None: util.Remove(title_data_file)
def GetNewBNBNumbers(list_no): zipped_rdf_filename = "bnbrdf_N" + str(list_no) + ".zip" retcode = util.RetrieveFileByURL( "https://www.bl.uk/bibliographic/bnbrdf/bnbrdf_N%d.zip" % list_no, 200, ["application/zip"]) if retcode == util.RetrieveFileByURLReturnCode.URL_NOT_FOUND: return [] if retcode != util.RetrieveFileByURLReturnCode.SUCCESS: util.Error("util.RetrieveFileByURL() failed w/ return code " + str(retcode)) print("Downloaded " + zipped_rdf_filename) with zipfile.ZipFile(zipped_rdf_filename, "r") as zip_file: zip_file.extractall() util.Remove(zipped_rdf_filename) rdf_filename = "bnbrdf_N" + str(list_no) + ".rdf" numbers = [] print("About to parse " + rdf_filename) tree = ElementTree.parse(rdf_filename) for child in tree.iter('{http://purl.org/dc/terms/}identifier'): if child.text[0:2] == "GB": numbers.append(child.text) util.Remove(rdf_filename) return numbers
def Main(): if len(sys.argv) != 3: util.SendEmail(os.path.basename(sys.argv[0]), "This script needs to be called with an email address and the system type!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] system_type = sys.argv[2] if system_type != "krimdok" and system_type != "relbib" and system_type != "ixtheo": util.SendEmail(os.path.basename(sys.argv[0]), "This system_type must be one of {krimdok,relbib,ixtheo}!\n", priority=1) sys.exit(-1) output_file = "/tmp/collect_solr_stats_data.csv" util.Remove(output_file) util.ExecOrDie("/usr/local/bin/collect_solr_stats_data", [ system_type, output_file ], "/usr/local/var/log/tuefind/collect_solr_stats_data.log") util.SendEmail("Solr Stats Collector", "Successfully generated Solr statistics and updated Ingo's MySQL database.", priority=5)
def RemoveOrDie(path): if not util.Remove(path): util.Error("Failed to delete \"" + path + "\"!")