def stage(self, root): """Synchronize the build directory with the staging bucket under the namespace [username]/[branch]/[edition]/""" tasks = [] redirects = {} htaccess_path = os.path.join(root, '.htaccess') try: if self.branch == 'master': redirects = translate_htaccess(htaccess_path) except IOError: # Only log an error if deploying; staging doesn't use .htaccess level = logging.CRITICAL if self.EXPECT_HTACCESS else logging.INFO LOGGER.log(level, 'Couldn\'t open required %s', htaccess_path) if self.EXPECT_HTACCESS: sys.exit(1) # Ensure that the root ends with a trailing slash to make future # manipulations more predictable. if not root.endswith('/'): root += '/' if not os.path.isdir(root): raise NoSuchEdition(root) # If a redirect is masking a file, we can run into an invalid 404 # when the redirect is deleted but the file isn't republished. # If this is the case, warn and delete the redirect. for src, dest in redirects.items(): src_path = os.path.join(root, src) if os.path.isfile(src_path) and os.path.basename( src_path) in os.listdir(os.path.dirname(src_path)): LOGGER.warn('Would ignore redirect that will mask file: %s', src) # del redirects[src] # Collect files that need to be uploaded for entry in self.collector.collect( root, self.s3.list(prefix=self.namespace)): src = entry.path.replace(root, '', 1) if os.path.islink(entry.path): # If redirecting from a directory, make sure we end it with a '/' suffix = self.PAGE_SUFFIX if os.path.isdir( entry.path) and not entry.path.endswith('/') else '' resolved = os.path.join(os.path.dirname(entry.path), os.readlink(entry.path)) if os.path.islink(resolved): LOGGER.warn('Multiple layers of symbolic link: %s', resolved) if not os.path.exists(resolved): LOGGER.warn('Dead link: %s -> %s', entry.path, resolved) if not resolved.startswith(root): LOGGER.warn( 'Skipping symbolic link %s: outside of root %s', resolved, root) redirects[str( Path(src + suffix).ensure_prefix( self.namespace))] = resolved.replace(root, '/', 1) else: tasks.append( functools.partial( lambda path, file_hash: self.__upload( path, os.path.join(root, path), file_hash), src, entry.file_hash)) # Run our actual staging operations in a thread pool. This would be # better with async IO, but this will do for now. LOGGER.info('Running %s tasks', len(tasks)) run_pool(tasks) # XXX Right now we only sync redirects on master. # Why: Master has the "canonical" .htaccess, and we'd need to attach # metadata to each redirect on S3 to differentiate .htaccess # redirects from symbolic links. # Ramifications: Symbolic link redirects for non-master branches # will never be published. if self.branch == 'master': self.sync_redirects(redirects) # Remove from staging any files that our FileCollector thinks have been # deleted locally. remove_keys = [ str(path.replace_prefix(root, '').ensure_prefix(self.namespace)) for path in [Path(p) for p in self.collector.removed_files] ] if remove_keys: LOGGER.warn('Removing %s', remove_keys) remove_result = self.s3.delete_keys(remove_keys) if remove_result.errors: raise SyncException(remove_result.errors) self.collector.commit()
def stage(self, root): """Synchronize the build directory with the staging bucket under the namespace [username]/[branch]/[edition]/""" tasks = [] redirects = {} htaccess_path = os.path.join(root, '.htaccess') try: if self.branch == 'master': redirects = translate_htaccess(htaccess_path) except IOError: # Only log an error if deploying; staging doesn't use .htaccess level = logging.CRITICAL if self.EXPECT_HTACCESS else logging.INFO LOGGER.log(level, 'Couldn\'t open required %s', htaccess_path) if self.EXPECT_HTACCESS: sys.exit(1) # Ensure that the root ends with a trailing slash to make future # manipulations more predictable. if not root.endswith('/'): root += '/' if not os.path.isdir(root): raise NoSuchEdition(root) # If a redirect is masking a file, we can run into an invalid 404 # when the redirect is deleted but the file isn't republished. # If this is the case, warn and delete the redirect. for src,dest in redirects.items(): src_path = os.path.join(root, src) if os.path.isfile(src_path) and os.path.basename(src_path) in os.listdir(os.path.dirname(src_path)): LOGGER.warn('Would ignore redirect that will mask file: %s', src) # del redirects[src] # Collect files that need to be uploaded for entry in self.collector.collect(root, self.s3.list(prefix=self.namespace)): src = entry.path.replace(root, '', 1) if os.path.islink(entry.path): # If redirecting from a directory, make sure we end it with a '/' suffix = self.PAGE_SUFFIX if os.path.isdir(entry.path) and not entry.path.endswith('/') else '' resolved = os.path.join(os.path.dirname(entry.path), os.readlink(entry.path)) if os.path.islink(resolved): LOGGER.warn('Multiple layers of symbolic link: %s', resolved) if not os.path.exists(resolved): LOGGER.warn('Dead link: %s -> %s', entry.path, resolved) if not resolved.startswith(root): LOGGER.warn('Skipping symbolic link %s: outside of root %s', resolved, root) redirects[str(Path(src + suffix).ensure_prefix(self.namespace))] = resolved.replace(root, '/', 1) else: tasks.append(functools.partial( lambda path, file_hash: self.__upload( path, os.path.join(root, path), file_hash), src, entry.file_hash)) # Run our actual staging operations in a thread pool. This would be # better with async IO, but this will do for now. LOGGER.info('Running %s tasks', len(tasks)) run_pool(tasks) # XXX Right now we only sync redirects on master. # Why: Master has the "canonical" .htaccess, and we'd need to attach # metadata to each redirect on S3 to differentiate .htaccess # redirects from symbolic links. # Ramifications: Symbolic link redirects for non-master branches # will never be published. if self.branch == 'master': self.sync_redirects(redirects) # Remove from staging any files that our FileCollector thinks have been # deleted locally. remove_keys = [str(path.replace_prefix(root, '').ensure_prefix(self.namespace)) for path in [Path(p) for p in self.collector.removed_files]] if remove_keys: LOGGER.warn('Removing %s', remove_keys) remove_result = self.s3.delete_keys(remove_keys) if remove_result.errors: raise SyncException(remove_result.errors) self.collector.commit()
def stage(self, root): """Synchronize the build directory with the staging bucket under the namespace [username]/[branch]/[edition]/""" tasks = [] redirects = {} htaccess_path = os.path.join(root, '.htaccess') try: redirects = translate_htaccess(htaccess_path) except IOError: # Only log an error if deploying; staging doesn't use .htaccess log_function = LOGGER.error if self.EXPECT_HTACCESS else lambda *args: None log_function('No .htaccess found at %s', htaccess_path) # Ensure that the root ends with a trailing slash to make future # manipulations more predictable. if not root.endswith('/'): root += '/' if not os.path.isdir(root): raise NoSuchEdition(root) for entry in self.collector.collect(root, self.s3.list(prefix=self.namespace)): # Run our actual staging operations in a thread pool. This would be # better with async IO, but this will do for now. src = entry.path.replace(root, '', 1) if os.path.islink(entry.path): # If redirecting from a directory, make sure we end it with a '/' suffix = self.PAGE_SUFFIX if os.path.isdir(entry.path) and not entry.path.endswith('/') else '' resolved = os.path.join(os.path.dirname(entry.path), os.readlink(entry.path)) if os.path.islink(resolved): LOGGER.warn('Multiple layers of symbolic link: %s', resolved) if not os.path.exists(resolved): LOGGER.warn('Dead link: %s -> %s', entry.path, resolved) if not resolved.startswith(root): LOGGER.warn('Skipping symbolic link %s: outside of root %s', resolved, root) redirects[str(Path(src + suffix).ensure_prefix(self.namespace))] = resolved.replace(root, '/', 1) else: tasks.append(functools.partial( lambda path, file_hash: self.__upload( path, os.path.join(root, path), file_hash), src, entry.file_hash)) LOGGER.info('Running %s tasks', len(tasks)) run_pool(tasks) self.sync_redirects(redirects) # Remove from staging any files that our FileCollector thinks have been # deleted locally. remove_keys = [str(path.replace_prefix(root, '').ensure_prefix(self.namespace)) for path in [Path(p) for p in self.collector.removed_files]] if remove_keys: LOGGER.info('Removing %s', remove_keys) remove_result = self.s3.delete_keys(remove_keys) if remove_result.errors: raise SyncException(remove_result.errors) self.collector.commit()