def test_DownloadMismatchArchiveUponExtraction(self): # Test that mismatching archive files are downloaded upon extraction. with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: mock_file1 = self.GenerateMockFile(work_dir, mock_file='mockfile1.txt') mock_file2 = self.GenerateMockFile(work_dir, mock_file='mockfile2.txt') tar_dir = os.path.join(work_dir, 'tar_dir') dest_dir = os.path.join(work_dir, 'dest_dir') mock_tars_dir = os.path.join(work_dir, 'mock_tars') package_target = 'custom_package_target' package_name = 'custom_package' package_revision = 10 # Create mock tars and mock URLS where the tars can be downloaded from. os.makedirs(mock_tars_dir) mock_tar1 = os.path.join(mock_tars_dir, 'mock1.tar') mock_url1 = 'https://www.mock.com/tar1.tar' with tarfile.TarFile(mock_tar1, 'w') as f: f.add(mock_file1, arcname=os.path.basename(mock_file1)) self._fake_downloader.StoreURL(mock_url1, mock_tar1) mock_tar2 = os.path.join(mock_tars_dir, 'mock2.tar') mock_url2 = 'https://www.mock.com/tar2.tar' with tarfile.TarFile(mock_tar2, 'w') as f: f.add(mock_file2, arcname=os.path.basename(mock_file2)) self._fake_downloader.StoreURL(mock_url2, mock_tar2) # Have tar1 be missing, have tar2 be a file with invalid data. mismatch_tar2 = package_locations.GetLocalPackageArchiveFile( tar_dir, os.path.basename(mock_tar2), archive_info.GetArchiveHash(mock_tar2)) os.makedirs(os.path.dirname(mismatch_tar2)) with open(mismatch_tar2, 'wb') as f: f.write('mismatch tar') package_desc = self.GeneratePackageInfo([mock_tar1, mock_tar2], url_dict={ mock_tar1: mock_url1, mock_tar2: mock_url2 }) package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name) package_desc.SavePackageFile(package_file) package_version.ExtractPackageTargets( [(package_target, package_name)], tar_dir, dest_dir, downloader=self._fake_downloader.Download) self.assertEqual( self._fake_downloader.GetDownloadCount(), 2, "Expected to download exactly 2 mismatched archives.") full_dest_dir = package_locations.GetFullDestDir( dest_dir, package_target, package_name) dest_mock_file2 = os.path.join(full_dest_dir, os.path.basename(mock_file2)) dest_mock_file1 = os.path.join(full_dest_dir, os.path.basename(mock_file1)) with open(mock_file1, 'rb') as f: mock_contents1 = f.read() with open(mock_file2, 'rb') as f: mock_contents2 = f.read() with open(dest_mock_file1, 'rb') as f: dest_mock_contents1 = f.read() with open(dest_mock_file2, 'rb') as f: dest_mock_contents2 = f.read() self.assertEqual(mock_contents1, dest_mock_contents1) self.assertEqual(mock_contents2, dest_mock_contents2)
def test_OverlayPackageTargets(self): # Tests that we can extract package targets with an overlay directory with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: mock_file1 = self.GenerateMockFile(work_dir, mock_file='mockfile1.txt') mock_file2 = self.GenerateMockFile(work_dir, mock_file='mockfile2.txt') mock_file3 = self.GenerateMockFile(work_dir, mock_file='mockfile3.txt') tar_dir = os.path.join(work_dir, 'tar_dir') overlay_dir = os.path.join(work_dir, 'overlay_dir') dest_dir = os.path.join(work_dir, 'dest_dir') package_target = 'custom_package_target' package_name = 'custom_package' package_revision = 10 os.makedirs(tar_dir) os.makedirs(overlay_dir) # Tar1 (mockfile1) will be a regular archive within the tar directory, # while tar2 (mockfile2) will be overlaid and replaced by # overlay_tar2 (mockfile3). mock_tar1 = os.path.join(tar_dir, 'archive_name1.tar') with tarfile.TarFile(mock_tar1, 'w') as f: f.add(mock_file1, arcname=os.path.basename(mock_file1)) mock_tar2 = os.path.join(tar_dir, 'archive_name2.tar') with tarfile.TarFile(mock_tar2, 'w') as f: f.add(mock_file2, arcname=os.path.basename(mock_file2)) overlay_tar2 = os.path.join(overlay_dir, 'archive_name2.tar') with tarfile.TarFile(overlay_tar2, 'w') as f: f.add(mock_file3, arcname=os.path.basename(mock_file3)) self.CopyToLocalArchiveFile(mock_tar1, tar_dir) self.CopyToLocalArchiveFile(mock_tar2, tar_dir) self.CopyToLocalArchiveFile(overlay_tar2, overlay_dir) # Generate the regular package file, along with the overlay package file. package_desc = self.GeneratePackageInfo([mock_tar1, mock_tar2]) package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name) package_desc.SavePackageFile(package_file) overlay_package_desc = self.GeneratePackageInfo([overlay_tar2]) overlay_package_file = package_locations.GetLocalPackageFile( overlay_dir, package_target, package_name) overlay_package_desc.SavePackageFile(overlay_package_file) package_version.ExtractPackageTargets( [(package_target, package_name)], tar_dir, dest_dir, downloader=self._fake_downloader.Download, overlay_tar_dir=overlay_dir, ) full_dest_dir = package_locations.GetFullDestDir( dest_dir, package_target, package_name) dest_mock_file1 = os.path.join(full_dest_dir, os.path.basename(mock_file1)) dest_mock_file2 = os.path.join(full_dest_dir, os.path.basename(mock_file2)) dest_mock_file3 = os.path.join(full_dest_dir, os.path.basename(mock_file3)) # mock_file2 should not exist in the destination since it was replaced. self.assertFalse(os.path.isfile(dest_mock_file2)) with open(mock_file1, 'rb') as f: mock_contents1 = f.read() with open(mock_file3, 'rb') as f: mock_contents3 = f.read() with open(dest_mock_file1, 'rb') as f: dest_mock_contents1 = f.read() with open(dest_mock_file3, 'rb') as f: dest_mock_contents3 = f.read() self.assertEqual(mock_contents1, dest_mock_contents1) self.assertEqual(mock_contents3, dest_mock_contents3)
def ExtractPackageTargets(package_target_packages, tar_dir, dest_dir, downloader=None): """Extracts package targets from the tar directory to the destination. Each package archive within a package will be verified before being extracted. If a package archive does not exist or does not match the hash stored within the package file, it will be re-downloaded before being extracted. Args: package_target_packages: List of tuples of package target and package names. tar_dir: Source tar directory where package archives live. dest_dir: Root destination directory where packages will be extracted to. downloader: function which takes a url and a file path for downloading. """ if downloader is None: downloader = gsd_storage.HttpDownload for package_target, package_name in package_target_packages: package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name ) package_desc = package_info.PackageInfo(package_file) dest_package_dir = package_locations.GetFullDestDir( dest_dir, package_target, package_name ) dest_package_file = package_locations.GetDestPackageFile( dest_dir, package_target, package_name ) # Only do the extraction if the extract packages do not match. if os.path.isfile(dest_package_file): dest_package_desc = package_info.PackageInfo(dest_package_file) if dest_package_desc == package_desc: logging.debug('Skipping extraction for package (%s)', package_name) continue if os.path.isdir(dest_package_dir): logging.info('Deleting old package directory: %s', dest_package_dir) pynacl.file_tools.RemoveDir(dest_package_dir) logging.info('Extracting package (%s) to directory: %s', package_name, dest_package_dir) for archive_desc in package_desc.GetArchiveList(): archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_desc.name ) # Upon extraction, some files may not be downloaded (or have stale files), # we need to check the hash of each file and attempt to download it if # they do not match. archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: logging.warn('Expected archive missing, downloading: %s', archive_desc.name) if archive_desc.url is None: raise IOError('Invalid archive file and URL: %s' % archive_file) pynacl.file_tools.MakeParentDirectoryIfAbsent(archive_file) downloader(archive_desc.url, archive_file) archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: raise IOError('Downloaded archive file does not match hash.' ' [%s] Expected %s, received %s.' % (archive_file, archive_desc.hash, archive_hash)) destination_dir = os.path.join(dest_package_dir, archive_desc.extract_dir) logging.info('Extracting %s to %s...', archive_desc.name, destination_dir) temp_dir = os.path.join(destination_dir, '.tmp') pynacl.file_tools.RemoveDir(temp_dir) os.makedirs(temp_dir) with tarfile.TarFile(archive_file, 'r') as f: f.extractall(temp_dir) temp_src_dir = os.path.join(temp_dir, archive_desc.tar_src_dir) pynacl.file_tools.MoveAndMergeDirTree(temp_src_dir, destination_dir) pynacl.file_tools.RemoveDir(temp_dir) pynacl.file_tools.MakeParentDirectoryIfAbsent(dest_package_file) shutil.copy(package_file, dest_package_file)
def test_ExtractPackageTargets(self): # Tests that we can extract package targets from the tar directory properly. with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: mock_file1 = self.GenerateMockFile(work_dir, mock_file='mockfile1.txt') mock_file2 = self.GenerateMockFile(work_dir, mock_file='mockfile2.txt') mock_file3 = self.GenerateMockFile(work_dir, mock_file='mockfile3.txt') tar_dir = os.path.join(work_dir, 'tar_dir') dest_dir = os.path.join(work_dir, 'dest_dir') package_target = 'custom_package_target' package_name = 'custom_package' package_revision = 10 mock_tar1 = os.path.join(work_dir, 'archive_name1.tar') with tarfile.TarFile(mock_tar1, 'w') as f: f.add(mock_file1, arcname=os.path.basename(mock_file1)) mock_tar2 = os.path.join(work_dir, 'archive_name2.tar') with tarfile.TarFile(mock_tar2, 'w') as f: f.add(mock_file2, arcname=os.path.basename(mock_file2)) mock_tar3 = os.path.join(work_dir, 'archive_name3.tar') with tarfile.TarFile(mock_tar3, 'w') as f: arcname = os.path.join('rel_dir', os.path.basename(mock_file3)) f.add(mock_file3, arcname=arcname) self.CopyToLocalArchiveFile(mock_tar1, tar_dir) self.CopyToLocalArchiveFile(mock_tar2, tar_dir) self.CopyToLocalArchiveFile(mock_tar3, tar_dir) package_desc = self.GeneratePackageInfo( [mock_tar1, mock_tar2, mock_tar3], dir_dict={mock_tar2: 'tar2_dir'}, src_dir_dict={mock_tar3: 'rel_dir'}, ) package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name) package_desc.SavePackageFile(package_file) package_version.ExtractPackageTargets( [(package_target, package_name)], tar_dir, dest_dir, downloader=self._fake_downloader.Download) self.assertEqual( self._fake_downloader.GetDownloadCount(), 0, "Extracting a package should not download anything.") full_dest_dir = package_locations.GetFullDestDir( dest_dir, package_target, package_name) dest_mock_file1 = os.path.join(full_dest_dir, os.path.basename(mock_file1)) dest_mock_file2 = os.path.join(full_dest_dir, 'tar2_dir', os.path.basename(mock_file2)) dest_mock_file3 = os.path.join(full_dest_dir, os.path.basename(mock_file3)) with open(mock_file1, 'rb') as f: mock_contents1 = f.read() with open(mock_file2, 'rb') as f: mock_contents2 = f.read() with open(mock_file3, 'rb') as f: mock_contents3 = f.read() with open(dest_mock_file1, 'rb') as f: dest_mock_contents1 = f.read() with open(dest_mock_file2, 'rb') as f: dest_mock_contents2 = f.read() with open(dest_mock_file3, 'rb') as f: dest_mock_contents3 = f.read() self.assertEqual(mock_contents1, dest_mock_contents1) self.assertEqual(mock_contents2, dest_mock_contents2) self.assertEqual(mock_contents3, dest_mock_contents3)
def ExtractPackageTargets(package_target_packages, tar_dir, dest_dir, downloader=None, skip_missing=False, quiet=False): """Extracts package targets from the tar directory to the destination. Each package archive within a package will be verified before being extracted. If a package archive does not exist or does not match the hash stored within the package file, it will be re-downloaded before being extracted. Args: package_target_packages: List of tuples of package target and package names. tar_dir: Source tar directory where package archives live. dest_dir: Root destination directory where packages will be extracted to. downloader: function which takes a url and a file path for downloading. """ if downloader is None: downloader = pynacl.gsd_storage.HttpDownload for package_target, package_name in package_target_packages: package_file = package_locations.GetLocalPackageFile(tar_dir, package_target, package_name) package_desc = package_info.PackageInfo(package_file, skip_missing=skip_missing) dest_package_dir = package_locations.GetFullDestDir(dest_dir, package_target, package_name) dest_package_file = package_locations.GetDestPackageFile(dest_dir, package_target, package_name) # Only do the extraction if the extract packages do not match. if os.path.isfile(dest_package_file): try: dest_package_desc = package_info.PackageInfo(dest_package_file) if dest_package_desc == package_desc: logging.debug('Skipping extraction for package (%s)', package_name) continue except: # Destination package file cannot be trusted, if invalid re-extract. pass # Delete the old package file before we extract. os.unlink(dest_package_file) if os.path.isdir(dest_package_dir): logging.debug('Deleting old package directory: %s', dest_package_dir) pynacl.file_tools.RemoveDir(dest_package_dir) logging.info('Extracting package (%s) to directory: %s', package_name, dest_package_dir) archive_list = package_desc.GetArchiveList() num_archives = len(archive_list) for index, archive_obj in enumerate(archive_list): archive_desc = archive_obj.GetArchiveData() archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_desc.name ) # Upon extraction, some files may not be downloaded (or have stale files), # we need to check the hash of each file and attempt to download it if # they do not match. archive_hash = archive_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: if archive_desc.url is None: if skip_missing: logging.info('Skipping extraction of missing archive: %s' % archive_file) continue raise IOError('Invalid archive file and URL: %s' % archive_file) logging.warn('Expected archive missing, downloading: %s', archive_desc.name) pynacl.file_tools.MakeParentDirectoryIfAbsent(archive_file) downloader(archive_desc.url, archive_file) archive_hash = archive_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: raise IOError('Downloaded archive file does not match hash.' ' [%s] Expected %s, received %s.' % (archive_file, archive_desc.hash, archive_hash)) destination_dir = os.path.join(dest_package_dir, archive_desc.extract_dir) logging.info('Extracting %s (%d/%d)' % (archive_desc.name, index+1, num_archives)) temp_dir = os.path.join(destination_dir, '.tmp') pynacl.file_tools.RemoveDir(temp_dir) os.makedirs(temp_dir) tar_output = not quiet tar = cygtar.CygTar(archive_file, 'r:*', verbose=tar_output) curdir = os.getcwd() os.chdir(temp_dir) try: tar.Extract() tar.Close() finally: os.chdir(curdir) temp_src_dir = os.path.join(temp_dir, archive_desc.tar_src_dir) pynacl.file_tools.MoveAndMergeDirTree(temp_src_dir, destination_dir) pynacl.file_tools.RemoveDir(temp_dir) pynacl.file_tools.MakeParentDirectoryIfAbsent(dest_package_file) package_desc.SavePackageFile(dest_package_file)