def update_bootstrap(self): # The files are named <git number>.zip gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() self.RunGit(['gc']) # Run Garbage Collect to compress packfile. # Creating a temp file and then deleting it ensures we can use this name. _, tmp_zipfile = tempfile.mkstemp(suffix='.zip') os.remove(tmp_zipfile) subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path) gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) dest_name = 'gs://%s/%s/%s.zip' % ( self.bootstrap_bucket, self.basedir, gen_number) gsutil.call('cp', tmp_zipfile, dest_name) os.remove(tmp_zipfile)
def update_bootstrap(self): # The files are named <git number>.zip gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() self.RunGit(['gc']) # Run Garbage Collect to compress packfile. # Creating a temp file and then deleting it ensures we can use this name. _, tmp_zipfile = tempfile.mkstemp(suffix='.zip') os.remove(tmp_zipfile) subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path) gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) dest_name = 'gs://%s/%s/%s.zip' % (self.bootstrap_bucket, self.basedir, gen_number) gsutil.call('cp', tmp_zipfile, dest_name) os.remove(tmp_zipfile)
def update_bootstrap(self, prune=False, gc_aggressive=False): # The folder is <git number> gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) src_name = self.mirror_path dest_prefix = '%s/%s' % (self._gs_path, gen_number) # ls_out lists contents in the format: gs://blah/blah/123... _, ls_out, _ = gsutil.check_call('ls', self._gs_path) # Check to see if folder already exists in gs ls_out_set = set(ls_out.strip().splitlines()) if (dest_prefix + '/' in ls_out_set and dest_prefix + '.ready' in ls_out_set): print('Cache %s already exists.' % dest_prefix) return # Run Garbage Collect to compress packfile. # It appears core.deltaBaseCacheLimit restricts the file size of the pack # files produced, while we want 1 big pack file for efficiency. try: self.RunGit(['config', '--unset-all', 'core.deltaBaseCacheLimit'], cwd=cwd) except subprocess.CalledProcessError: pass # Nothing to unset is fine. gc_args = ['gc', '--prune=all'] if gc_aggressive: gc_args.append('--aggressive') self.RunGit(gc_args) gsutil.call('-m', 'cp', '-r', src_name, dest_prefix) # Create .ready file and upload _, ready_file_name = tempfile.mkstemp(suffix='.ready') try: gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix)) finally: os.remove(ready_file_name) # remove all other directory/.ready files in the same gs_path # except for the directory/.ready file previously created # which can be used for bootstrapping while the current one is # being uploaded if not prune: return prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set) if not prev_dest_prefix: return for path in ls_out_set: if (path == prev_dest_prefix + '/' or path == prev_dest_prefix + '.ready'): continue if path.endswith('.ready'): gsutil.call('rm', path) continue gsutil.call('-m', 'rm', '-r', path)
def bootstrap_repo(self, directory): """Bootstrap the repo from Google Stroage if possible. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing(). """ python_fallback = False if sys.platform.startswith("win") and not gclient_utils.FindExecutable("7z"): python_fallback = True elif sys.platform.startswith("darwin"): # The OSX version of unzip doesn't support zip64. python_fallback = True elif not gclient_utils.FindExecutable("unzip"): python_fallback = True gs_folder = "gs://%s/%s" % (self.bootstrap_bucket, self.basedir) gsutil = Gsutil(self.gsutil_exe, boto_path=None) # Get the most recent version of the zipfile. _, ls_out, _ = gsutil.check_call("ls", gs_folder) ls_out_sorted = sorted(ls_out.splitlines()) if not ls_out_sorted: # This repo is not on Google Storage. return False latest_checkout = ls_out_sorted[-1] # Download zip file to a temporary directory. try: tempdir = tempfile.mkdtemp(prefix="_cache_tmp", dir=self.GetCachePath()) self.print("Downloading %s" % latest_checkout) code = gsutil.call("cp", latest_checkout, tempdir) if code: return False filename = os.path.join(tempdir, latest_checkout.split("/")[-1]) # Unpack the file with 7z on Windows, unzip on linux, or fallback. if not python_fallback: if sys.platform.startswith("win"): cmd = ["7z", "x", "-o%s" % directory, "-tzip", filename] else: cmd = ["unzip", filename, "-d", directory] retcode = subprocess.call(cmd) else: try: with zipfile.ZipFile(filename, "r") as f: f.printdir() f.extractall(directory) except Exception as e: self.print("Encountered error: %s" % str(e), file=sys.stderr) retcode = 1 else: retcode = 0 finally: # Clean up the downloaded zipfile. gclient_utils.rm_file_or_tree(tempdir) if retcode: self.print("Extracting bootstrap zipfile %s failed.\n" "Resuming normal operations." % filename) return False return True
def update_bootstrap(self, prune=False): # The files are named <git number>.zip gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() self.RunGit(['gc']) # Run Garbage Collect to compress packfile. # Creating a temp file and then deleting it ensures we can use this name. _, tmp_zipfile = tempfile.mkstemp(suffix='.zip') os.remove(tmp_zipfile) subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path) gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) dest_name = '%s/%s.zip' % (gs_folder, gen_number) gsutil.call('cp', tmp_zipfile, dest_name) os.remove(tmp_zipfile) # Remove all other files in the same directory. if prune: _, ls_out, _ = gsutil.check_call('ls', gs_folder) for filename in ls_out.splitlines(): if filename == dest_name: continue gsutil.call('rm', filename)
def update_bootstrap(self, prune=False): # The files are named <git number>.zip gen_number = subprocess.check_output([self.git_exe, "number", "master"], cwd=self.mirror_path).strip() # Run Garbage Collect to compress packfile. self.RunGit(["gc", "--prune=all"]) # Creating a temp file and then deleting it ensures we can use this name. _, tmp_zipfile = tempfile.mkstemp(suffix=".zip") os.remove(tmp_zipfile) subprocess.call(["zip", "-r", tmp_zipfile, "."], cwd=self.mirror_path) gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) gs_folder = "gs://%s/%s" % (self.bootstrap_bucket, self.basedir) dest_name = "%s/%s.zip" % (gs_folder, gen_number) gsutil.call("cp", tmp_zipfile, dest_name) os.remove(tmp_zipfile) # Remove all other files in the same directory. if prune: _, ls_out, _ = gsutil.check_call("ls", gs_folder) for filename in ls_out.splitlines(): if filename == dest_name: continue gsutil.call("rm", filename)
def update_bootstrap(self, prune=False): # The folder is <git number> gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() # Run Garbage Collect to compress packfile. self.RunGit(['gc', '--prune=all']) gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) src_name = self.mirror_path dest_name = '%s/%s' % (self._gs_path, gen_number) # check to see if folder already exists in gs _, ls_out, ls_err = gsutil.check_call('ls', dest_name) _, ls_out_ready, ls_err_ready = (gsutil.check_call( 'ls', dest_name + '.ready')) # only printing out errors because the folder/ready file # might not exist yet, so it will error no matter what if ls_err: print('Failed to check GS:\n%s' % (ls_err)) if ls_err_ready: print('Failed to check GS:\n%s' % (ls_err_ready)) if not (ls_out == '' and ls_out_ready == ''): return gsutil.call('-m', 'cp', '-r', src_name, dest_name) #TODO(karenqian): prune old caches # create .ready file and upload _, ready_file_name = tempfile.mkstemp(suffix='.ready') try: gsutil.call('cp', ready_file_name, '%s.ready' % (dest_name)) finally: os.remove(ready_file_name)
def update_bootstrap(self, prune=False): # The folder is <git number> gen_number = subprocess.check_output( [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) src_name = self.mirror_path dest_prefix = '%s/%s' % (self._gs_path, gen_number) # ls_out lists contents in the format: gs://blah/blah/123... _, ls_out, _ = gsutil.check_call('ls', self._gs_path) # Check to see if folder already exists in gs ls_out_set = set(ls_out.strip().splitlines()) if (dest_prefix + '/' in ls_out_set and dest_prefix + '.ready' in ls_out_set): print('Cache %s already exists.' % dest_prefix) return # Run Garbage Collect to compress packfile. self.RunGit(['gc', '--prune=all']) gsutil.call('-m', 'cp', '-r', src_name, dest_prefix) # Create .ready file and upload _, ready_file_name = tempfile.mkstemp(suffix='.ready') try: gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix)) finally: os.remove(ready_file_name) # remove all other directory/.ready files in the same gs_path # except for the directory/.ready file previously created # which can be used for bootstrapping while the current one is # being uploaded if not prune: return prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set) if not prev_dest_prefix: return for path in ls_out_set: if (path == prev_dest_prefix + '/' or path == prev_dest_prefix + '.ready'): continue if path.endswith('.ready'): gsutil.call('rm', path) continue gsutil.call('-m', 'rm', '-r', path)
def bootstrap_repo(self, directory): """Bootstrap the repo from Google Storage if possible. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing(). """ if not self.bootstrap_bucket: return False gsutil = Gsutil(self.gsutil_exe, boto_path=None) # Get the most recent version of the directory. # This is determined from the most recent version of a .ready file. # The .ready file is only uploaded when an entire directory has been # uploaded to GS. _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path) ls_out_set = set(ls_out.strip().splitlines()) latest_dir = self._GetMostRecentCacheDirectory(ls_out_set) if not latest_dir: self.print('No bootstrap file for %s found in %s, stderr:\n %s' % (self.mirror_path, self.bootstrap_bucket, ' '.join( (ls_err or '').splitlines(True)))) return False try: # create new temporary directory locally tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) self.RunGit(['init', '--bare'], cwd=tempdir) self.print('Downloading files in %s/* into %s.' % (latest_dir, tempdir)) with self.print_duration_of('download'): code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*", tempdir) if code: return False # A quick validation that all references are valid. self.RunGit(['for-each-ref'], cwd=tempdir) except Exception as e: self.print('Encountered error: %s' % str(e), file=sys.stderr) gclient_utils.rmtree(tempdir) return False # delete the old directory if os.path.exists(directory): gclient_utils.rmtree(directory) self.Rename(tempdir, directory) return True
def bootstrap_repo(self, directory): """Bootstrap the repo from Google Stroage if possible. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing(). """ python_fallback = False if (sys.platform.startswith('win') and not gclient_utils.FindExecutable('7z')): python_fallback = True elif sys.platform.startswith('darwin'): # The OSX version of unzip doesn't support zip64. python_fallback = True elif not gclient_utils.FindExecutable('unzip'): python_fallback = True gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) gsutil = Gsutil(self.gsutil_exe, boto_path=None) # Get the most recent version of the zipfile. _, ls_out, _ = gsutil.check_call('ls', gs_folder) ls_out_sorted = sorted(ls_out.splitlines()) if not ls_out_sorted: # This repo is not on Google Storage. return False latest_checkout = ls_out_sorted[-1] # Download zip file to a temporary directory. try: tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) self.print('Downloading %s' % latest_checkout) code = gsutil.call('cp', latest_checkout, tempdir) if code: return False filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) # Unpack the file with 7z on Windows, unzip on linux, or fallback. if not python_fallback: if sys.platform.startswith('win'): cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename] else: cmd = ['unzip', filename, '-d', directory] retcode = subprocess.call(cmd) else: try: with zipfile.ZipFile(filename, 'r') as f: f.printdir() f.extractall(directory) except Exception as e: self.print('Encountered error: %s' % str(e), file=sys.stderr) retcode = 1 else: retcode = 0 finally: # Clean up the downloaded zipfile. # # This is somehow racy on Windows. # Catching OSError because WindowsError isn't portable and # pylint complains. exponential_backoff_retry( lambda: gclient_utils.rm_file_or_tree(tempdir), excs=(OSError,), name='rmtree [%s]' % (tempdir,), printerr=self.print) if retcode: self.print( 'Extracting bootstrap zipfile %s failed.\n' 'Resuming normal operations.' % filename) return False return True
def update_bootstrap(self, prune=False, gc_aggressive=False, branch='master'): # The folder is <git number> gen_number = subprocess.check_output( [self.git_exe, 'number', branch], cwd=self.mirror_path).decode('utf-8', 'ignore').strip() gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) src_name = self.mirror_path dest_prefix = '%s/%s' % (self._gs_path, gen_number) # ls_out lists contents in the format: gs://blah/blah/123... _, ls_out, _ = gsutil.check_call('ls', self._gs_path) # Check to see if folder already exists in gs ls_out_set = set(ls_out.strip().splitlines()) if (dest_prefix + '/' in ls_out_set and dest_prefix + '.ready' in ls_out_set): print('Cache %s already exists.' % dest_prefix) return # Reduce the number of individual files to download & write on disk. self.RunGit(['pack-refs', '--all']) # Run Garbage Collect to compress packfile. gc_args = ['gc', '--prune=all'] if gc_aggressive: # The default "gc --aggressive" is often too aggressive for some machines, # since it attempts to create as many threads as there are CPU cores, # while not limiting per-thread memory usage, which puts too much pressure # on RAM on high-core machines, causing them to thrash. Using lower-level # commands gives more control over those settings. # This might not be strictly necessary, but it's fast and is normally run # by 'gc --aggressive', so it shouldn't hurt. self.RunGit(['reflog', 'expire', '--all']) # These are the default repack settings for 'gc --aggressive'. gc_args = ['repack', '-d', '-l', '-f', '--depth=50', '--window=250', '-A', '--unpack-unreachable=all'] # A 1G memory limit seems to provide comparable pack results as the # default, even for our largest repos, while preventing runaway memory (at # least on current Chromium builders which have about 4G RAM per core). gc_args.append('--window-memory=1g') # NOTE: It might also be possible to avoid thrashing with a larger window # (e.g. "--window-memory=2g") by limiting the number of threads created # (e.g. "--threads=[cores/2]"). Some limited testing didn't show much # difference in outcomes on our current repos, but it might be worth # trying if the repos grow much larger and the packs don't seem to be # getting compressed enough. self.RunGit(gc_args) gsutil.call('-m', 'cp', '-r', src_name, dest_prefix) # Create .ready file and upload _, ready_file_name = tempfile.mkstemp(suffix='.ready') try: gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix)) finally: os.remove(ready_file_name) # remove all other directory/.ready files in the same gs_path # except for the directory/.ready file previously created # which can be used for bootstrapping while the current one is # being uploaded if not prune: return prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set) if not prev_dest_prefix: return for path in ls_out_set: if (path == prev_dest_prefix + '/' or path == prev_dest_prefix + '.ready'): continue if path.endswith('.ready'): gsutil.call('rm', path) continue gsutil.call('-m', 'rm', '-r', path)
def bootstrap_repo(self, directory): """Bootstrap the repo from Google Storage if possible. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing(). """ if not self.bootstrap_bucket: return False python_fallback = ((sys.platform.startswith('win') and not gclient_utils.FindExecutable('7z')) or (not gclient_utils.FindExecutable('unzip')) or ('ZIP64_SUPPORT' not in subprocess.check_output( ["unzip", "-v"]))) gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) gsutil = Gsutil(self.gsutil_exe, boto_path=None) # Get the most recent version of the zipfile. _, ls_out, ls_err = gsutil.check_call('ls', gs_folder) def compare_filenames(a, b): # |a| and |b| look like gs://.../.../9999.zip. They both have the same # gs://bootstrap_bucket/basedir/ prefix because they come from the same # `gsutil ls`. # This function only compares the numeral parts before .zip. regex_pattern = r'/(\d+)\.zip$' match_a = re.search(regex_pattern, a) match_b = re.search(regex_pattern, b) if (match_a is not None) and (match_b is not None): num_a = int(match_a.group(1)) num_b = int(match_b.group(1)) return cmp(num_a, num_b) # If it doesn't match the format, fallback to string comparison. return cmp(a, b) ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames) if not ls_out_sorted: # This repo is not on Google Storage. self.print('No bootstrap file for %s found in %s, stderr:\n %s' % (self.mirror_path, self.bootstrap_bucket, ' '.join( (ls_err or '').splitlines(True)))) return False latest_checkout = ls_out_sorted[-1] # Download zip file to a temporary directory. try: tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) self.print('Downloading %s' % latest_checkout) with self.print_duration_of('download'): code = gsutil.call('cp', latest_checkout, tempdir) if code: return False filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) # Unpack the file with 7z on Windows, unzip on linux, or fallback. with self.print_duration_of('unzip'): if not python_fallback: if sys.platform.startswith('win'): cmd = [ '7z', 'x', '-o%s' % directory, '-tzip', filename ] else: cmd = ['unzip', filename, '-d', directory] retcode = subprocess.call(cmd) else: try: with zipfile.ZipFile(filename, 'r') as f: f.printdir() f.extractall(directory) except Exception as e: self.print('Encountered error: %s' % str(e), file=sys.stderr) retcode = 1 else: retcode = 0 finally: # Clean up the downloaded zipfile. # # This is somehow racy on Windows. # Catching OSError because WindowsError isn't portable and # pylint complains. exponential_backoff_retry( lambda: gclient_utils.rm_file_or_tree(tempdir), excs=(OSError, ), name='rmtree [%s]' % (tempdir, ), printerr=self.print) if retcode: self.print('Extracting bootstrap zipfile %s failed.\n' 'Resuming normal operations.' % filename) return False return True
def bootstrap_repo(self, directory): """Bootstrap the repo from Google Storage if possible. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing(). """ if not self.bootstrap_bucket: return False python_fallback = ( (sys.platform.startswith('win') and not gclient_utils.FindExecutable('7z')) or (not gclient_utils.FindExecutable('unzip')) or ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"])) ) gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) gsutil = Gsutil(self.gsutil_exe, boto_path=None) # Get the most recent version of the zipfile. _, ls_out, ls_err = gsutil.check_call('ls', gs_folder) ls_out_sorted = sorted(ls_out.splitlines()) if not ls_out_sorted: # This repo is not on Google Storage. self.print('No bootstrap file for %s found in %s, stderr:\n %s' % (self.mirror_path, self.bootstrap_bucket, ' '.join((ls_err or '').splitlines(True)))) return False latest_checkout = ls_out_sorted[-1] # Download zip file to a temporary directory. try: tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) self.print('Downloading %s' % latest_checkout) with self.print_duration_of('download'): code = gsutil.call('cp', latest_checkout, tempdir) if code: return False filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) # Unpack the file with 7z on Windows, unzip on linux, or fallback. with self.print_duration_of('unzip'): if not python_fallback: if sys.platform.startswith('win'): cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename] else: cmd = ['unzip', filename, '-d', directory] retcode = subprocess.call(cmd) else: try: with zipfile.ZipFile(filename, 'r') as f: f.printdir() f.extractall(directory) except Exception as e: self.print('Encountered error: %s' % str(e), file=sys.stderr) retcode = 1 else: retcode = 0 finally: # Clean up the downloaded zipfile. # # This is somehow racy on Windows. # Catching OSError because WindowsError isn't portable and # pylint complains. exponential_backoff_retry( lambda: gclient_utils.rm_file_or_tree(tempdir), excs=(OSError,), name='rmtree [%s]' % (tempdir,), printerr=self.print) if retcode: self.print( 'Extracting bootstrap zipfile %s failed.\n' 'Resuming normal operations.' % filename) return False return True