def Main(): if len(sys.argv) != 2: util.SendEmail(os.path.basename(sys.argv[0]), "This script needs to be called with an email address as the only argument!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] try: config = util.LoadConfigFile() ftp_host = config.get("FTP", "host") ftp_user = config.get("FTP", "username") ftp_passwd = config.get("FTP", "password") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") # Download data from Crossref: log_file_name = CreateLogFileName() crossref_xml_file = "/tmp/crossref.xml" os.unlink(crossref_xml_file) util.ExecOrDie("/usr/local/bin/crossref_downloader", [ crossref_xml_file ], log_file_name) # Upload the XML data to the BSZ FTP server: ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd) try: with open(crossref_xml_file, "rb") as xml_file: ftp.storbinary("STOR crossref.xml", xml_file) except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") os.unlink(crossref_xml_file) util.SendEmail("Crossref Data Import", "Successfully imported Crossref data and uploaded it to the BSZ FTP server.", priority=5)
def Main(): if len(sys.argv) != 2: util.SendEmail( os.path.basename(sys.argv[0]), "This script needs to be called with an email address as the only argument!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] try: config = util.LoadConfigFile() ftp_host = config.get("FTP", "host") ftp_user = config.get("FTP", "username") ftp_passwd = config.get("FTP", "password") directory_on_ftp_server = config.get("Upload", "directory_on_ftp_server") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") marc_filename = "/tmp/crossref_marc.xml" no_of_records = DownloadCrossrefData(marc_filename) if no_of_records == 0: email_msg_body = "No new records.\n\n" else: ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd) try: ftp.cwd(directory_on_ftp_server) except: util.Error("failed to change directory on the FTP server to \"" + directory_on_ftp_server + "\"!") UploadFile(ftp, marc_filename, GenerateRemoteFilename()) email_msg_body = "Uploaded " + str( no_of_records) + " MARC records to the BSZ FTP server.\n\n" os.unlink(marc_filename) util.SendEmail("BSZ Crossref File Upload", email_msg_body, priority=5)
def Main(): if len(sys.argv) != 2: util.Error("This script expects one argument: default_email_recipient") util.default_email_recipient = sys.argv[1] config = util.LoadConfigFile() try: deletion_list = config.get("Files", "loesch_liste") complete_data = config.get("Files", "komplett_abzug") differential_data = config.get("Files", "differenz_abzug") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") if not os.access(complete_data, os.R_OK): util.Error("Fehlender oder nicht lesbarer Komplettabzug. (" + complete_data + ")") deletion_list_is_readable = os.access(deletion_list, os.R_OK) if not deletion_list_is_readable: deletion_list = None differential_data_is_readable = os.access(differential_data, os.R_OK) if not deletion_list_is_readable and not differential_data_is_readable: util.Error( "Fehlende oder nicht lesbare Löschliste und Differenzabzug..") # Bail out if the most recent complete data set is at least as recent as the deletion list or the differential # data: complete_data_mtime = os.path.getmtime(complete_data) deletion_list_mtime = None if deletion_list_is_readable: deletion_list_mtime = os.path.getmtime(deletion_list) differential_data_mtime = None if differential_data_is_readable: differential_data_mtime = os.path.getmtime(differential_data) if ((deletion_list_mtime is not None and complete_data_mtime >= deletion_list_mtime) or (differential_data_mtime is not None and complete_data_mtime >= differential_data_mtime)): util.SendEmail( "Nichts zu tun!", "Komplettabzug ist neuer als eventuell vorhandene Differenzabzüge.\n", priority=5) sys.exit(0) data_dir = PrepareDataDirectory( ) # After this we're in the data directory... util.ExtractAndRenameBSZFiles("../" + complete_data) util.ExtractAndRenameBSZFiles("../" + differential_data, "Diff") title_superior_norm_tuple = UpdateAllMarcFiles( deletion_list) # ...and we're back in the original directory. new_tarball_name = complete_data.replace( "current", datetime.date.today().strftime("%y%m%d")) CreateNewTarballAndDeletePreviousTarball(new_tarball_name, title_superior_norm_tuple, complete_data) util.RemoveLinkTargetAndLink(title_superior_norm_tuple[0]) util.RemoveLinkTargetAndLink(title_superior_norm_tuple[1]) util.RemoveLinkTargetAndLink(title_superior_norm_tuple[2]) util.Info("Successfully created updated MARC files.")
def Main(): if len(sys.argv) != 3: util.Info("usage: " + sys.argv[0] + " section entry", file=sys.stderr) sys.exit(-1) util.default_email_recipient = "*****@*****.**" config = util.LoadConfigFile() util.Info(config.get(sys.argv[1], sys.argv[2]))
def Main(): if len(sys.argv) != 2: util.SendEmail( os.path.basename(sys.argv[0]), "This script requires an email address as the only argument\n", priority=1, recipient=util.default_email_recipient) sys.exit(-1) util.default_email_recipient = sys.argv[1] user = sys.argv[1] try: sql_config = util.LoadConfigFile( "/usr/local/var/lib/tuelib/translations.conf") sql_database = sql_config.get("Database", "sql_database") sql_username = sql_config.get("Database", "sql_username") sql_password = sql_config.get("Database", "sql_password") except Exception as e: util.Error("Failed to read sql_config file (" + str(e) + ")") try: config = util.LoadConfigFile() archive_password = config.get("Passwords", "archive_password") except Exception as e: util.Error("Failed to read config file (" + str(e) + ")") raw_dump_file = tmp_file_path + "/" + output_filename DumpTranslationsDB(sql_database, sql_username, sql_password, raw_dump_file) compressed_and_encrypted_dump_file = re.sub( '\..*$', '', raw_dump_file) + compressed_extension + encrypted_extension CompressAndEncryptFile(raw_dump_file, compressed_and_encrypted_dump_file, re.sub('^"|"$', '', archive_password)) MoveToDownloadPosition(compressed_and_encrypted_dump_file, web_server_path) CleanUp(raw_dump_file) servername = DetermineServerName() NotifyUser(user, servername, os.path.basename(compressed_and_encrypted_dump_file))
def Main(): util.default_email_sender = "*****@*****.**" util.default_email_recipient = "*****@*****.**" if len(sys.argv) != 2: util.SendEmail("Create Refterm File (Kickoff Failure)", "This script must be called with one argument,\n" + "the default email recipient\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] # Download needed differential files config = util.LoadConfigFile() log_file_name = log_file_name = util.MakeLogFileName( sys.argv[0], util.GetLogDirectory()) changelist_url = config.get("Unpaywall", "changelist_url") api_key = config.get("Unpaywall", "api_key") oadoi_download_directory = config.get("LocalConfig", "download_dir") oadoi_imported_directory = oadoi_download_directory + "/imported/" StartMongoDB() json_update_objects = GetChangelists(changelist_url, api_key) remote_update_files = GetRemoteUpdateFiles(json_update_objects) local_update_files = GetLocalUpdateFiles(config, oadoi_download_directory) download_lists = GetAllFilesStartingAtFirstMissingLocal( remote_update_files, local_update_files) DownloadUpdateFiles(download_lists['download'], json_update_objects, api_key, oadoi_download_directory) # Update the Database ImportOADOIsToMongo( GetImportFiles(config, oadoi_download_directory, oadoi_imported_directory), oadoi_download_directory, log_file_name) # Generate the files to be used by the pipeline share_directory = config.get("LocalConfig", "share_directory") ixtheo_dois_file = config.get("LocalConfig", "ixtheo_dois_file") ixtheo_urls_file = config.get("LocalConfig", "ixtheo_urls_file") ExtractOADOIURLs(share_directory, ixtheo_dois_file, ixtheo_urls_file, log_file_name) ShareOADOIURLs(share_directory, ixtheo_urls_file) krimdok_dois_file = config.get("LocalConfig", "krimdok_dois_file") krimdok_urls_file = config.get("LocalConfig", "krimdok_urls_file") ExtractOADOIURLs(share_directory, krimdok_dois_file, krimdok_urls_file, log_file_name) ShareOADOIURLs(share_directory, krimdok_urls_file) DumpMongoDB(config, log_file_name) StopMongoDB() util.SendEmail("Update OADOI Data", "Successfully created \"" + ixtheo_urls_file + "\" and \"" + krimdok_urls_file + "\" in " + share_directory, priority=5)
def Main(): config = util.LoadConfigFile() changelist_url = config.get("Unpaywall", "changelist_url") api_key = config.get("Unpaywall", "api_key") working_dir = config.get("LocalConfig", "working_dir") json_update_objects = GetRemoteUpdateObjects(changelist_url, api_key) remote_update_files = GetRemoteUpdateFiles(json_update_objects) local_update_files = GetLocalUpdateFiles(config, working_dir) update_and_download_lists = GetAllFilesFromLastMissingLocal( remote_update_files, local_update_files) if not update_and_download_lists: print("Received empty list - so nothing to do") sys.exit(0) DownloadUpdateFiles(update_and_download_lists['download'], json_update_objects, api_key, working_dir) UpdateDatabase(update_and_download_lists['update'], config)
def Main(): if len(sys.argv) != 2: util.SendEmail( os.path.basename(sys.argv[0]), "This script needs to be called with an email address as the only argument!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] try: config = util.LoadConfigFile() ftp_host = config.get("FTP", "host") ftp_user = config.get("FTP", "username") ftp_passwd = config.get("FTP", "password") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd) msg = [] download_cutoff_date = IncrementStringDate( GetCutoffDateForDownloads(config)) complete_data_filenames = DownloadCompleteData(config, ftp, download_cutoff_date, msg) if complete_data_filenames is not None: download_cutoff_date = ExtractDateFromFilename( complete_data_filenames[0]) DownloadData(config, "Differenzabzug", ftp, download_cutoff_date, msg) DownloadData(config, "Loeschlisten", ftp, download_cutoff_date, msg) if config.has_section("Loeschlisten2"): DownloadData(config, "Loeschlisten2", ftp, download_cutoff_date, msg) if config.has_section("Hinweisabzug"): DownloadData(config, "Hinweisabzug", ftp, "000000", msg) if config.has_section("Errors"): DownloadData(config, "Errors", ftp, download_cutoff_date, msg) incremental_authority_cutoff_date = ShiftDateToTenDaysBefore( download_cutoff_date) if config.has_section("Normdatendifferenzabzug"): if (not CurrentIncrementalAuthorityDumpPresent( config, incremental_authority_cutoff_date)): DownloadData(config, "Normdatendifferenzabzug", ftp, incremental_authority_cutoff_date, msg) else: msg.append( "Skipping Download of \"Normdatendifferenzabzug\" since already present\n" ) CleanUpCumulativeCollection(config) util.SendEmail("BSZ File Update", string.join(msg, ""), priority=5)
def Main(): util.default_email_sender = "*****@*****.**" if len(sys.argv) != 3: print( "invalid arguments! usage: initiate_marc_pipeline.py <default email recipient> <MARC21 pipeline script name>" ) util.SendEmail( "MARC-21 Pipeline Kick-Off (Failure)", "This script needs to be called with two arguments,\n" + "the default email recipient and the name of the MARC-21\n" + "pipeline script to be executed.\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] pipeline_script_name = sys.argv[2] if not os.access(pipeline_script_name, os.X_OK): print("Pipeline script not found or not executable: " + pipeline_script_name) util.SendEmail("MARC-21 Pipeline Kick-Off (Failure)", "Pipeline script not found or not executable: \"" + pipeline_script_name + "\"\n", priority=1) sys.exit(-1) conf = util.LoadConfigFile() link_name = conf.get("Misc", "link_name") if FoundNewBSZDataFile(link_name): if not FoundReftermMutex(): util.Error("No Refterm Mutex found") bsz_data = util.ResolveSymlink(link_name) if not bsz_data.endswith(".tar.gz"): util.Error("BSZ data file must end in .tar.gz!") file_name_list = util.ExtractAndRenameBSZFiles(bsz_data) RunPipelineAndImportIntoSolr(pipeline_script_name, file_name_list[0], conf) util.SendEmail("MARC-21 Pipeline", "Pipeline completed successfully.", priority=5, attachments=[solrmarc_log_summary, import_log_summary]) util.WriteTimestamp() DeleteReftermMutex() WriteImportFinishedFile() else: util.SendEmail("MARC-21 Pipeline Kick-Off", "No new data was found.", priority=5)
def Main(): if len(sys.argv) != 2: util.Error("This script expects one argument: default_email_recipient") util.default_email_recipient = sys.argv[1] config = util.LoadConfigFile() try: generations_to_keep = config.getint("PurgeFiles", "generations_to_keep") except Exception as e: util.Error("failed to read config file! ("+ str(e) + ")") if generations_to_keep < 1: util.Error("generations_to_keep must be at least 1!") all_timestamped_files = glob.glob("*[0-9][0-9][0-9][0-9][0-9][0-9]*") if not all_timestamped_files: util.SendEmail("File Purge Failed", "No timestamped files found!", priority=1) PurgeFiles(generations_to_keep, all_timestamped_files)
def Main(): if len(sys.argv) != 2: util.SendEmail( os.path.basename(sys.argv[0]), "This script needs to be called with an email address as the only argument!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] try: config = util.LoadConfigFile() sftp_host = config.get("SFTP", "host") sftp_user = config.get("SFTP", "username") sftp_keyfile = config.get("SFTP", "keyfile") local_directory = config.get("Upload", "local_directory") directory_on_sftp_server = config.get("Upload", "directory_on_sftp_server") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") # Check directories with new Data fulltext_files = GetExistingFiles(local_directory) dirs_to_transfer = GetFulltextDirectoriesToTransfer( local_directory, fulltext_files) # If nothing to do if not dirs_to_transfer: util.SendEmail("Transfer Fulltexts", "No directories to transfer", priority=5) return # Transfer the data util.ExecOrDie("/usr/local/bin/transfer_fulltext.sh", [ sftp_host, sftp_user, sftp_keyfile, local_directory, directory_on_sftp_server ] + list(dirs_to_transfer)) # Clean up on the server CleanUpFiles(fulltext_files) email_msg_body = "Found Files:\n\n" + '\n'.join( fulltext_files) + "\n\nTransferred directories:\n\n" + '\n'.join( dirs_to_transfer) util.SendEmail("Transfer Fulltexts", email_msg_body, priority=5)
def Main(): util.default_email_sender = "*****@*****.**" util.default_email_recipient = "*****@*****.**" if len(sys.argv) != 2: util.SendEmail("Create Refterm File (Kickoff Failure)", "This script must be called with one argument,\n" + "the default email recipient\n", priority=1); sys.exit(-1) util.default_email_recipient = sys.argv[1] conf = util.LoadConfigFile() title_data_link_name = conf.get("Misc", "link_name") ref_data_pattern = conf.get("Hinweisabzug", "filename_pattern") if ref_data_pattern != "" : ref_data_archive = util.getMostRecentFileMatchingGlob(ref_data_pattern) if ref_data_archive is None: util.SendEmail("Create Refterm File (No Reference Data File Found)", "No File matching pattern \"" + ref_data_pattern + "\" found\n", priority=1) else: ref_data_archive = None if FoundNewBSZDataFile(title_data_link_name): start = datetime.datetime.now() log_file_name = CreateLogFile() title_data_file_orig = ExtractTitleDataMarcFile(title_data_link_name) date_string = GetDateFromFilename(title_data_file_orig) title_data_file = RenameTitleDataFile(title_data_file_orig, date_string) atexit.register(CleanUp, title_data_file, log_file_name) SetupTemporarySolrInstance(title_data_file, conf, log_file_name) create_ref_term_process = multiprocessing.Process(target=CreateRefTermFile, name="Create Reference Terms File", args=[ ref_data_archive, date_string, conf, log_file_name ]) create_serial_sort_term_process = multiprocessing.Process(target=CreateSerialSortDate, name="Serial Sort Date", args=[ title_data_file, date_string, log_file_name ]) create_match_db_log_file_name = util.MakeLogFileName("create_match_db", util.GetLogDirectory()) create_match_db_process = multiprocessing.Process(target=CreateMatchDB, name="Create Match DB", args=[ title_data_file, create_match_db_log_file_name ]) ExecuteInParallel(create_ref_term_process, create_serial_sort_term_process, create_match_db_process) end = datetime.datetime.now() duration_in_minutes = str((end - start).seconds / 60.0) util.SendEmail("Create Refterm File", "Refterm file successfully created in " + duration_in_minutes + " minutes.", priority=5) else: util.SendEmail("Create Refterm File", "No new data was found.", priority=5)
def Main(): util.default_email_recipient = sys.argv[1] config = util.LoadConfigFile() if config.has_option("Global", "validate_ssl_certificates"): if not config.getboolean("Global", "validate_ssl_certificates"): ssl._create_default_https_context = ssl._create_unverified_context for section in config.sections(): if section == "Global": continue url = config.get(section, "url") expected = None if config.has_option(section, "expected"): expected = config.get(section, "expected") timeout = None if config.has_option(section, "timeout"): timeout = config.getfloat(section, "timeout") if not RunTest(section, url, timeout, expected): util.SendEmail("Black Box Test Failed!", "Test " + section + " failed!\n\n--Your friendly black box monitor", "*****@*****.**", priority=1)
def Main(): if len(sys.argv) != 2: util.SendEmail( os.path.basename(sys.argv[0]), "This script needs to be called with an email address as the only argument!\n", priority=1) sys.exit(-1) util.default_email_recipient = sys.argv[1] try: config = util.LoadConfigFile() ftp_host = config.get("FTP", "host") ftp_user = config.get("FTP", "username") ftp_passwd = config.get("FTP", "password") except Exception as e: util.Error("failed to read config file! (" + str(e) + ")") ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd) msg = [] tempdir = tempfile.TemporaryDirectory() bsz_dir = os.getcwd() os.chdir(tempdir.name) download_cutoff_date = IncrementStringDate( GetCutoffDateForDownloads(config)) complete_data_filenames = DownloadCompleteData(config, ftp, download_cutoff_date, msg) all_downloaded_files = [] if complete_data_filenames == None else complete_data_filenames downloaded_at_least_some_new_titles = False if complete_data_filenames is not None: download_cutoff_date = ExtractDateFromFilename( complete_data_filenames[0]) downloaded_at_least_some_new_titles = True all_downloaded_files += DownloadData(config, "Differenzabzug", ftp, download_cutoff_date, msg) if all_downloaded_files is not []: downloaded_at_least_some_new_titles = True all_downloaded_files += DownloadData(config, "Loeschlisten", ftp, download_cutoff_date, msg) if config.has_section("Loeschlisten2"): all_downloaded_files += DownloadData(config, "Loeschlisten2", ftp, download_cutoff_date, msg) if config.has_section("Hinweisabzug"): DownloadData(config, "Hinweisabzug", ftp, "000000", msg) if config.has_section("Errors"): all_downloaded_files += DownloadData(config, "Errors", ftp, download_cutoff_date, msg) incremental_authority_cutoff_date = ShiftDateToTenDaysBefore( download_cutoff_date) if config.has_section("Normdatendifferenzabzug"): if (not CurrentIncrementalAuthorityDumpPresent( config, incremental_authority_cutoff_date)): all_downloaded_files += DownloadData( config, "Normdatendifferenzabzug", ftp, incremental_authority_cutoff_date, msg) else: msg.append( "Skipping Download of \"Normdatendifferenzabzug\" since already present\n" ) try: for downloaded_file in all_downloaded_files: shutil.copy(downloaded_file, bsz_dir) except Exception as e: util.Error( "Moving a downloaded file to the BSZ download directory failed! (" + str(e) + ")") AddToCumulativeCollection(all_downloaded_files, config) CleanUpCumulativeCollection(config) if downloaded_at_least_some_new_titles: util.Touch("/tmp/bsz_download_happened" ) # Must be the same path as in the merge script! util.SendEmail("BSZ File Update", ''.join(msg), priority=5)