Пример #1
0
    def test_md5_check(self):
        """
        Test md5 checking done by APS Harvester.
        """
        # Create temporary file to test with
        hashtarget_filepath = get_temporary_file(directory="/tmp")
        tmpfd = open(hashtarget_filepath, "w")
        tmpfd.write("this is a test")
        tmpfd.close()
        dirpath, filename = os.path.split(hashtarget_filepath)

        hashtarget_md5 = calculate_md5_external(hashtarget_filepath)

        # Create a md5 keyfile looking like:
        # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz
        md5_keyfile = get_temporary_file(directory="/tmp")
        tmpfd = open(md5_keyfile, "w")
        tmpfd.write("%s %s\n" % (hashtarget_md5, filename))
        tmpfd.close()

        dirpath, filename = os.path.split(md5_keyfile)
        res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename)
        self.assertTrue(len(res) == 1)
Пример #2
0
    def test_md5_check(self):
        """
        Test md5 checking done by APS Harvester.
        """
        # Create temporary file to test with
        hashtarget_filepath = get_temporary_file(directory="/tmp")
        tmpfd = open(hashtarget_filepath, 'w')
        tmpfd.write("this is a test")
        tmpfd.close()
        dirpath, filename = os.path.split(hashtarget_filepath)

        hashtarget_md5 = calculate_md5_external(hashtarget_filepath)

        # Create a md5 keyfile looking like:
        # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz
        md5_keyfile = get_temporary_file(directory="/tmp")
        tmpfd = open(md5_keyfile, 'w')
        tmpfd.write("%s %s\n" % (hashtarget_md5, filename))
        tmpfd.close()

        dirpath, filename = os.path.split(md5_keyfile)
        res = find_and_validate_md5_checksums(in_folder=dirpath,
                                              md5key_filename=filename)
        self.assertTrue(len(res) == 1)
Пример #3
0
                                  stream=sys.stderr)
                    write_message("No fulltext found for %s" %
                                 (record.recid or record.doi,))
                    yield record, msg
                    continue
                raise
            finally:
                request_end = time.time()

            # Unzip the compressed file
            unzipped_folder = unzip(result_file, base_directory=self.out_folder)

            # Validate the checksum of the compressed fulltext file.
            try:
                checksum_validated_files = find_and_validate_md5_checksums(
                    in_folder=unzipped_folder,
                    md5key_filename=CFG_APSHARVEST_MD5_FILE)
            except APSFileChecksumError, e:
                info_msg = "Skipping %s in %s" % \
                           (record.recid or record.doi, unzipped_folder)
                msg = "Error while validating checksum: %s\n%s\n%s" % \
                      (info_msg, str(e), traceback.format_exc()[:-1])
                write_message(msg)
                yield record, msg
                continue
            if not checksum_validated_files:
                write_message("Warning: No files found to perform checksum"
                              " validation on inside %s" % (unzipped_folder,))
                continue
            elif not [name for name in checksum_validated_files if name.endswith('fulltext.xml')]:
                msg = "Warning: No fulltext file found inside %s for %s" % \
Пример #4
0
                                  stream=sys.stderr)
                    write_message("No fulltext found for %s" %
                                 (record.recid or record.doi,))
                    yield record, msg
                    continue
                raise
            finally:
                request_end = time.time()

            # Unzip the compressed file
            unzipped_folder = unzip(result_file, base_directory=self.out_folder)

            # Validate the checksum of the compressed fulltext file.
            try:
                checksum_validated_files = find_and_validate_md5_checksums(
                    in_folder=unzipped_folder,
                    md5key_filename=CFG_APSHARVEST_MD5_FILE)
            except APSFileChecksumError, e:
                info_msg = "Skipping %s in %s" % \
                           (record.recid or record.doi, unzipped_folder)
                msg = "Error while validating checksum: %s\n%s\n%s" % \
                      (info_msg, str(e), traceback.format_exc()[:-1])
                write_message(msg)
                yield record, msg
                continue
            if not checksum_validated_files:
                write_message("Warning: No files found to perform checksum"
                              " validation on inside %s" % (unzipped_folder,))
            elif len(checksum_validated_files) != 1 or \
                    not 'fulltext.xml' in checksum_validated_files[0]:
                msg = "Warning: No fulltext file found inside %s for %s" % \