def MD5Sum(uri): """Compute or retrieve MD5 sum of uri. Supported for: local files, GS files. Args: uri: The /unix/path or gs:// uri to compute the md5sum on. Returns: A string representing the md5sum of the file/uri passed in. None if we do not understand the uri passed in or cannot compute the md5sum. """ uri_type = GetUriType(uri) if uri_type == TYPE_LOCAL: return filelib.MD5Sum(uri) elif uri_type == TYPE_GS: try: return gslib.MD5Sum(uri) except gslib.GSLibError: return None # Colossus does not have a command for getting MD5 sum. We could # copy the file to local disk and calculate it, but it seems better # to explicitly say it is not supported. raise NotSupportedForType(uri_type)
def _StorePayloadJson(self, metadata_signatures): """Generate the payload description json file. The payload description contains a dictionary with the following fields populated. { "version": 2, "sha1_hex": <payload sha1 hash as a hex encoded string>, "sha256_hex": <payload sha256 hash as a hex encoded string>, "md5_hex": <payload md5 hash as a hex encoded string>, "metadata_size": <integer of payload metadata covered by signature>, "metadata_signature": <metadata signature as base64 encoded string or nil> } Args: metadata_signatures: A list of signatures in binary string format. """ # Decide if we use the signed or unsigned payload file. payload_file = self.payload_file if self.signer: payload_file = self.signed_payload_file # Locate everything we put in the json. sha1_hex, sha256_hex = filelib.ShaSums(payload_file) md5_hex = filelib.MD5Sum(payload_file) metadata_signature = None if metadata_signatures: if len(metadata_signatures) != 1: self._GenerateSignerResultsError( 'Received %d metadata signatures, only one supported.', len(metadata_signatures)) metadata_signature = base64.b64encode(metadata_signatures[0]) # Bundle it up in a map matching the Json format. # Increment DESCRIPTION_FILE_VERSION, if changing this map. payload_map = { 'version': DESCRIPTION_FILE_VERSION, 'sha1_hex': sha1_hex, 'sha256_hex': sha256_hex, 'md5_hex': md5_hex, 'metadata_size': self._MetadataSize(payload_file), 'metadata_signature': metadata_signature, } # Convert to Json. payload_json = json.dumps(payload_map, sort_keys=True) # Write out the results. osutils.WriteFile(self.description_file, payload_json)
def Cmp(path1, path2): """Return True if paths hold identical files, according to MD5 sum. Note that this function relies on MD5Sum, which means it also can only promise eventual consistency. A recently uploaded file in Google Storage may behave badly in this comparison function. If either file is missing then always return False. Args: path1: URI to a file. Local paths also supported. path2: URI to a file. Local paths also supported. Returns: True if files are the same, False otherwise. """ md5_1 = MD5Sum(path1) if IsGsURI(path1) else filelib.MD5Sum(path1) if not md5_1: return False md5_2 = MD5Sum(path2) if IsGsURI(path2) else filelib.MD5Sum(path2) return md5_1 == md5_2
def testMD5Sum(self): gs_path = 'gs://bucket/some/path' local_path = '/some/local/path' http_path = 'http://host.domain/some/path' self.mox.StubOutWithMock(gslib, 'MD5Sum') self.mox.StubOutWithMock(filelib, 'MD5Sum') # Set up the test replay script. # Run 1, GS. gslib.MD5Sum(gs_path).AndReturn('TheResult') # Run 3, local file. filelib.MD5Sum(local_path).AndReturn('TheResult') self.mox.ReplayAll() # Run the test verification. self.assertEquals('TheResult', urilib.MD5Sum(gs_path)) self.assertEquals('TheResult', urilib.MD5Sum(local_path)) self.assertRaises(urilib.NotSupportedForType, urilib.MD5Sum, http_path) self.mox.VerifyAll()
def testMD5Sum(self): """Test MD5Sum output with the /usr/bin/md5sum binary.""" file_path = os.path.abspath(__file__) self.assertEqual(self._MD5Sum(file_path), filelib.MD5Sum(file_path))