def test_md5_check(self): """ Test md5 checking done by APS Harvester. """ # Create temporary file to test with hashtarget_filepath = get_temporary_file(directory="/tmp") tmpfd = open(hashtarget_filepath, "w") tmpfd.write("this is a test") tmpfd.close() dirpath, filename = os.path.split(hashtarget_filepath) hashtarget_md5 = calculate_md5_external(hashtarget_filepath) # Create a md5 keyfile looking like: # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz md5_keyfile = get_temporary_file(directory="/tmp") tmpfd = open(md5_keyfile, "w") tmpfd.write("%s %s\n" % (hashtarget_md5, filename)) tmpfd.close() dirpath, filename = os.path.split(md5_keyfile) res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename) self.assertTrue(len(res) == 1)
def test_md5_check(self): """ Test md5 checking done by APS Harvester. """ # Create temporary file to test with hashtarget_filepath = get_temporary_file(directory="/tmp") tmpfd = open(hashtarget_filepath, 'w') tmpfd.write("this is a test") tmpfd.close() dirpath, filename = os.path.split(hashtarget_filepath) hashtarget_md5 = calculate_md5_external(hashtarget_filepath) # Create a md5 keyfile looking like: # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz md5_keyfile = get_temporary_file(directory="/tmp") tmpfd = open(md5_keyfile, 'w') tmpfd.write("%s %s\n" % (hashtarget_md5, filename)) tmpfd.close() dirpath, filename = os.path.split(md5_keyfile) res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename) self.assertTrue(len(res) == 1)
stream=sys.stderr) write_message("No fulltext found for %s" % (record.recid or record.doi,)) yield record, msg continue raise finally: request_end = time.time() # Unzip the compressed file unzipped_folder = unzip(result_file, base_directory=self.out_folder) # Validate the checksum of the compressed fulltext file. try: checksum_validated_files = find_and_validate_md5_checksums( in_folder=unzipped_folder, md5key_filename=CFG_APSHARVEST_MD5_FILE) except APSFileChecksumError, e: info_msg = "Skipping %s in %s" % \ (record.recid or record.doi, unzipped_folder) msg = "Error while validating checksum: %s\n%s\n%s" % \ (info_msg, str(e), traceback.format_exc()[:-1]) write_message(msg) yield record, msg continue if not checksum_validated_files: write_message("Warning: No files found to perform checksum" " validation on inside %s" % (unzipped_folder,)) continue elif not [name for name in checksum_validated_files if name.endswith('fulltext.xml')]: msg = "Warning: No fulltext file found inside %s for %s" % \
stream=sys.stderr) write_message("No fulltext found for %s" % (record.recid or record.doi,)) yield record, msg continue raise finally: request_end = time.time() # Unzip the compressed file unzipped_folder = unzip(result_file, base_directory=self.out_folder) # Validate the checksum of the compressed fulltext file. try: checksum_validated_files = find_and_validate_md5_checksums( in_folder=unzipped_folder, md5key_filename=CFG_APSHARVEST_MD5_FILE) except APSFileChecksumError, e: info_msg = "Skipping %s in %s" % \ (record.recid or record.doi, unzipped_folder) msg = "Error while validating checksum: %s\n%s\n%s" % \ (info_msg, str(e), traceback.format_exc()[:-1]) write_message(msg) yield record, msg continue if not checksum_validated_files: write_message("Warning: No files found to perform checksum" " validation on inside %s" % (unzipped_folder,)) elif len(checksum_validated_files) != 1 or \ not 'fulltext.xml' in checksum_validated_files[0]: msg = "Warning: No fulltext file found inside %s for %s" % \