def test_reportApprovalConflict_sets_error_output_just_once(self): # Repeated occurrence of the same approval conflict will not # result in repeated setting of error_output. series = self.factory.makeProductSeries() domain = self.factory.getUniqueString() templates = [ self.factory.makePOTemplate(productseries=series, translation_domain=domain) for counter in range(3) ] entry = removeSecurityProxy( self.factory.makeTranslationImportQueueEntry()) entry.reportApprovalConflict(domain, len(templates), templates) original_error = entry.error_output transaction.commit() # Try reporting the conflict again, with the templates # reshuffled to see if reportApprovalConflict can be fooled into # thinking it's a different error. Make as sure as we can that # entry.error_output is not modified. slave_entry = ISlaveStore(entry).get(TranslationImportQueueEntry, entry.id) slave_entry.setErrorOutput = FakeMethod() slave_entry.reportApprovalConflict(domain, len(templates), reversed(templates)) self.assertEqual(original_error, slave_entry.error_output) self.assertIn(domain, original_error) self.assertEqual(0, slave_entry.setErrorOutput.call_count)
def main(self): """See `LaunchpadScript`.""" # Avoid circular imports. from lp.registry.model.product import Product from lp.registry.model.productseries import ProductSeries errorlog.globalErrorUtility.configure(self.config_name) if self.options.no_fudge: self.fudge_factor = timedelta(0) self.logger.info("Exporting to translations branches.") self.store = ISlaveStore(Product) product_join = Join( ProductSeries, Product, ProductSeries.product == Product.id) productseries = self.store.using(product_join).find( ProductSeries, And( Product.translations_usage == ServiceUsage.LAUNCHPAD, ProductSeries.translations_branch != None)) # Anything deterministic will do, and even that is only for # testing. productseries = productseries.order_by(ProductSeries.id) bzrserver = get_rw_server() bzrserver.start_server() try: self._exportToBranches(productseries) finally: bzrserver.stop_server()
def test_reportApprovalConflict_sets_error_output_just_once(self): # Repeated occurrence of the same approval conflict will not # result in repeated setting of error_output. series = self.factory.makeProductSeries() domain = self.factory.getUniqueString() templates = [ self.factory.makePOTemplate( productseries=series, translation_domain=domain) for counter in xrange(3)] entry = removeSecurityProxy( self.factory.makeTranslationImportQueueEntry()) entry.reportApprovalConflict(domain, len(templates), templates) original_error = entry.error_output transaction.commit() # Try reporting the conflict again, with the templates # reshuffled to see if reportApprovalConflict can be fooled into # thinking it's a different error. Make as sure as we can that # entry.error_output is not modified. slave_entry = ISlaveStore(entry).get( TranslationImportQueueEntry, entry.id) slave_entry.setErrorOutput = FakeMethod() slave_entry.reportApprovalConflict( domain, len(templates), reversed(templates)) self.assertEqual(original_error, slave_entry.error_output) self.assertIn(domain, original_error) self.assertEqual(0, slave_entry.setErrorOutput.call_count)
def fetchProjectsForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.product import ( Product, ProductWithLicenses, ) using = [ Product, LeftJoin(LibraryFileAlias, LibraryFileAlias.id == Product.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] columns = ( Product, ProductWithLicenses.composeLicensesColumn(), LibraryFileAlias, LibraryFileContent, ) product_data = ISlaveStore(Product).using(*using).find( columns, Product.translationgroupID == self.id, Product.active == True) product_data = product_data.order_by(Product.displayname) return [ ProductWithLicenses(product, tuple(licenses)) for product, licenses, icon_alias, icon_content in product_data]
def get_distroseries_pofiles(self, series, date=None, component=None, languagepack=None): """See `IVPOExport`. Selects `POFiles` based on the 'series', last modified 'date', archive 'component', and whether it belongs to a 'languagepack' """ tables = [ POFile, POTemplate, ] conditions = [ POTemplate.distroseries == series, POTemplate.iscurrent == True, POFile.potemplate == POTemplate.id, ] if date is not None: conditions.append( Or(POTemplate.date_last_updated > date, POFile.date_changed > date)) if component is not None: tables.extend([ SourcePackagePublishingHistory, Component, ]) conditions.extend([ SourcePackagePublishingHistory.distroseries == series, SourcePackagePublishingHistory.component == Component.id, POTemplate.sourcepackagename == SourcePackagePublishingHistory.sourcepackagenameID, Component.name == component, SourcePackagePublishingHistory.dateremoved == None, SourcePackagePublishingHistory.archive == series.main_archive, ]) if languagepack: conditions.append(POTemplate.languagepack == True) # Use the slave store. We may want to write to the distroseries # to register a language pack, but not to the translation data # we retrieve for it. query = ISlaveStore(POFile).using(*tables).find( POFile, And(*conditions)) # Order by POTemplate. Caching in the export scripts can be # much more effective when consecutive POFiles belong to the # same POTemplate, e.g. they'll have the same POTMsgSets. sort_list = [POFile.potemplateID, POFile.languageID] return query.order_by(sort_list).config(distinct=True)
def determineCandidates(self): """Find all distinct BugTask targets with their cached names. Returns a list of (target, set_of_cached_names) pairs, where target is a tuple of IDs from the columns in target_columns. """ store = ISlaveStore(BugTask) candidate_set = store.find(target_columns).config(distinct=True) candidates = defaultdict(set) for candidate in candidate_set: candidates[candidate[:-1]].add(candidate[-1]) return list(candidates.iteritems())
def get_distroseries_pofiles(self, series, date=None, component=None, languagepack=None): """See `IVPOExport`. Selects `POFiles` based on the 'series', last modified 'date', archive 'component', and whether it belongs to a 'languagepack' """ tables = [ POFile, POTemplate, ] conditions = [ POTemplate.distroseries == series, POTemplate.iscurrent == True, POFile.potemplate == POTemplate.id, ] if date is not None: conditions.append(Or( POTemplate.date_last_updated > date, POFile.date_changed > date)) if component is not None: tables.extend([ SourcePackagePublishingHistory, Component, ]) conditions.extend([ SourcePackagePublishingHistory.distroseries == series, SourcePackagePublishingHistory.component == Component.id, POTemplate.sourcepackagename == SourcePackagePublishingHistory.sourcepackagenameID, Component.name == component, SourcePackagePublishingHistory.dateremoved == None, SourcePackagePublishingHistory.archive == series.main_archive, ]) if languagepack: conditions.append(POTemplate.languagepack == True) # Use the slave store. We may want to write to the distroseries # to register a language pack, but not to the translation data # we retrieve for it. query = ISlaveStore(POFile).using(*tables).find( POFile, And(*conditions)) # Order by POTemplate. Caching in the export scripts can be # much more effective when consecutive POFiles belong to the # same POTemplate, e.g. they'll have the same POTMsgSets. sort_list = [POFile.potemplateID, POFile.languageID] return query.order_by(sort_list).config(distinct=True)
def test_slave_reconnect_after_outage(self): '''The slave is again used once it becomes available.''' self.pgbouncer_fixture.stop() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store) self.pgbouncer_fixture.start() transaction.abort() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store)
def test_can_shutdown_slave_only(self): '''Confirm that this TestCase's test infrastructure works as needed. ''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) # Both Stores work when pgbouncer is up. master_store.get(Person, 1) slave_store.get(Person, 1) # Slave Store breaks when pgbouncer is torn down. Master Store # is fine. self.pgbouncer_fixture.stop() master_store.get(Person, 2) self.assertRaises(DisconnectionError, slave_store.get, Person, 2)
def test_can_shutdown_slave_only(self): """Confirm that this TestCase's test infrastructure works as needed. """ master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) # Both Stores work when pgbouncer is up. master_store.get(Person, 1) slave_store.get(Person, 1) # Slave Store breaks when pgbouncer is torn down. Master Store # is fine. self.pgbouncer_fixture.stop() master_store.get(Person, 2) self.assertRaises(DisconnectionError, slave_store.get, Person, 2)
def fetch_team_participation_info(log): """Fetch people, teams, memberships and participations.""" slurp = partial(execute_long_query, ISlaveStore(TeamParticipation), log, 10000) people = dict( slurp("SELECT id, name FROM Person" " WHERE teamowner IS NULL" " AND merged IS NULL")) teams = dict( slurp("SELECT id, name FROM Person" " WHERE teamowner IS NOT NULL" " AND merged IS NULL")) team_memberships = defaultdict(set) results = slurp("SELECT team, person FROM TeamMembership" " WHERE status in %s" % quote(ACTIVE_STATES)) for (team, person) in results: team_memberships[team].add(person) team_participations = defaultdict(set) results = slurp("SELECT team, person FROM TeamParticipation") for (team, person) in results: team_participations[team].add(person) # Don't hold any locks. transaction.commit() return people, teams, team_memberships, team_participations
def test_exportToStaleBranch(self): # Attempting to export to a stale branch marks it for scanning. self.useBzrBranches(direct_database=False) exporter = ExportTranslationsToBranch(test_args=[]) exporter.logger = BufferLogger() productseries = self.factory.makeProductSeries() db_branch, tree = self.create_branch_and_tree( product=productseries.product) removeSecurityProxy(productseries).translations_branch = db_branch db_branch.last_mirrored_id = 'stale-id' db_branch.last_scanned_id = db_branch.last_mirrored_id self.becomeDbUser('translationstobranch') self.assertFalse(db_branch.pending_writes) self.assertNotEqual(db_branch.last_mirrored_id, tree.branch.last_revision()) # The export code works on a Branch from the slave store. It # shouldn't stop the scan request. slave_series = ISlaveStore(productseries).get(ProductSeries, productseries.id) exporter._exportToBranch(slave_series) self.assertEqual(db_branch.last_mirrored_id, tree.branch.last_revision()) self.assertTrue(db_branch.pending_writes) matches = MatchesRegex( "(.|\n)*WARNING Skipped .* due to stale DB info, and scheduled a " "new scan.") self.assertThat(exporter.logger.getLogBuffer(), matches)
def _put(log, swift_connection, lfc_id, container, obj_name, fs_path): fs_size = os.path.getsize(fs_path) fs_file = HashStream(open(fs_path, 'rb')) db_md5_hash = ISlaveStore(LibraryFileContent).get(LibraryFileContent, lfc_id).md5 assert hasattr(fs_file, 'tell') and hasattr(fs_file, 'seek'), ''' File not rewindable ''' if fs_size <= MAX_SWIFT_OBJECT_SIZE: swift_md5_hash = swift_connection.put_object(container, obj_name, fs_file, fs_size) disk_md5_hash = fs_file.hash.hexdigest() if not (disk_md5_hash == db_md5_hash == swift_md5_hash): log.error("LibraryFileContent({0}) corrupt. " "disk md5={1}, db md5={2}, swift md5={3}".format( lfc_id, disk_md5_hash, db_md5_hash, swift_md5_hash)) try: swift_connection.delete_object(container, obj_name) except Exception: log.exception('Failed to delete corrupt file from Swift') raise AssertionError('md5 mismatch') else: # Large file upload. Create the segments first, then the # manifest. This order prevents partial downloads, and lets us # detect interrupted uploads and clean up. segment = 0 while fs_file.tell() < fs_size: assert segment <= 9999, 'Insane number of segments' seg_name = '%s/%04d' % (obj_name, segment) seg_size = min(fs_size - fs_file.tell(), MAX_SWIFT_OBJECT_SIZE) md5_stream = HashStream(fs_file, length=seg_size) swift_md5_hash = swift_connection.put_object( container, seg_name, md5_stream, seg_size) segment_md5_hash = md5_stream.hash.hexdigest() assert swift_md5_hash == segment_md5_hash, ( "LibraryFileContent({0}) segment {1} upload corrupted".format( lfc_id, segment)) segment = segment + 1 disk_md5_hash = fs_file.hash.hexdigest() if disk_md5_hash != db_md5_hash: # We don't have to delete the uploaded segments, as Librarian # Garbage Collection handles this for us. log.error("Large LibraryFileContent({0}) corrupt. " "disk md5={1}, db_md5={2}".format( lfc_id, disk_md5_hash, db_md5_hash)) raise AssertionError('md5 mismatch') manifest = '{0}/{1}/'.format(urllib.quote(container), urllib.quote(obj_name)) manifest_headers = {'X-Object-Manifest': manifest} swift_connection.put_object(container, obj_name, '', 0, headers=manifest_headers)
def test_startup_with_no_slave(self): '''An attempt is made for the first time to connect to a slave.''' self.pgbouncer_fixture.stop() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) # The master and slave Stores are the same object. self.assertIs(master_store, slave_store)
def getProductsWithInfo(num_products=None): """See `IBranchCloud`.""" distinct_revision_author = Func( "distinct", RevisionCache.revision_author_id) commits = Alias(Count(RevisionCache.revision_id)) epoch = datetime.now(pytz.UTC) - timedelta(days=30) # It doesn't matter if this query is even a whole day out of date, so # use the slave store. result = ISlaveStore(RevisionCache).find( (Product.name, commits, Count(distinct_revision_author), Max(RevisionCache.revision_date)), RevisionCache.product == Product.id, Not(RevisionCache.private), RevisionCache.revision_date >= epoch) result = result.group_by(Product.name) result = result.order_by(Desc(commits)) if num_products: result.config(limit=num_products) return result
def search(self, text): """See `ILanguageSet`.""" if text: text = ensure_unicode(text).lower() results = ISlaveStore(Language).find( Language, Or(Language.code.lower().contains_string(text), Language.englishname.lower().contains_string( text))).order_by(Language.englishname) else: results = None return results
def rough_length(self): """See `IRangeFactory.""" from lp.services.librarian.model import LibraryFileAlias # get_select_expr() requires at least one column as a parameter. # getorderBy() already knows about columns that can appear # in the result set, so let's use them. Moreover, for SELECT # DISTINCT queries, each column used for sorting must appear # in the result. if self.empty_resultset: return 0 columns = [plain_expression(column) for column in self.getOrderBy()] select = removeSecurityProxy( self.plain_resultset).get_select_expr(*columns) explain = 'EXPLAIN ' + convert_storm_clause_to_string(select) result = ISlaveStore(LibraryFileAlias).execute(explain) _rows_re = re.compile("rows=(\d+)\swidth=") first_line = result.get_one()[0] match = _rows_re.search(first_line) if match is None: raise RuntimeError("Unexpected EXPLAIN output %s" % repr(first_line)) return int(match.group(1))
def test_slave_shutdown_between_transactions(self): '''Slave is shutdown in between transactions.''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) transaction.abort() self.pgbouncer_fixture.stop() # The process doesn't notice the slave going down, and things # will fail the next time the slave is used. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) self.assertRaises(DisconnectionError, slave_store.get, Person, 1) # But now it has been discovered the socket is no longer # connected to anything, next transaction we get a master # Store when we ask for a slave. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store)
def test_slave_shutdown_during_transaction(self): '''Slave is shutdown while running, but we can recover.''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) self.pgbouncer_fixture.stop() # The transaction fails if the slave store is used. Robust # processes will handle this and retry (even if just means exit # and wait for the next scheduled invocation). self.assertRaises(DisconnectionError, slave_store.get, Person, 1) transaction.abort() # But in the next transaction, we get the master Store if we ask # for the slave Store so we can continue. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store)
def rough_length(self): """See `IRangeFactory.""" from lp.services.librarian.model import LibraryFileAlias # get_select_expr() requires at least one column as a parameter. # getorderBy() already knows about columns that can appear # in the result set, so let's use them. Moreover, for SELECT # DISTINCT queries, each column used for sorting must appear # in the result. if self.empty_resultset: return 0 columns = [plain_expression(column) for column in self.getOrderBy()] select = removeSecurityProxy(self.plain_resultset).get_select_expr( *columns) explain = 'EXPLAIN ' + convert_storm_clause_to_string(select) result = ISlaveStore(LibraryFileAlias).execute(explain) _rows_re = re.compile("rows=(\d+)\swidth=") first_line = result.get_one()[0] match = _rows_re.search(first_line) if match is None: raise RuntimeError( "Unexpected EXPLAIN output %s" % repr(first_line)) return int(match.group(1))
def test_addFile_uses_master(self): # addFile is a write operation, so it should always use the # master store, even if the slave is the default. Close the # slave store and try to add a file, verifying that the master # is used. client = LibrarianClient() ISlaveStore(LibraryFileAlias).close() with SlaveDatabasePolicy(): alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') transaction.commit() f = client.getFileByAlias(alias_id) self.assertEqual(f.read(), 'sample')
def _getHeadRequest(self): """Return oldest request on the queue.""" # Due to replication lag, it's possible that the slave store # still has copies of requests that have already been completed # and deleted from the master store. So first get the oldest # request that is "live," i.e. still present on the master # store. oldest_live = self._getOldestLiveRequest() if oldest_live is None: return None else: return ISlaveStore(POExportRequest).find( POExportRequest, POExportRequest.id == oldest_live.id).one()
def test_load_with_store(self): # load() can use an alternative store. db_object = self.factory.makeComponent() # Commit so the database object is available in both master # and slave stores. transaction.commit() # Master store. master_store = IMasterStore(db_object) [db_object_from_master] = bulk.load(Component, [db_object.id], store=master_store) self.assertEqual(Store.of(db_object_from_master), master_store) # Slave store. slave_store = ISlaveStore(db_object) [db_object_from_slave] = bulk.load(Component, [db_object.id], store=slave_store) self.assertEqual(Store.of(db_object_from_slave), slave_store)
def check_teamparticipation_circular(log): """Check circular references. There can be no mutual participation between teams. """ query = """ SELECT tp.team, tp2.team FROM TeamParticipation AS tp, TeamParticipation AS tp2 WHERE tp.team = tp2.person AND tp.person = tp2.team AND tp.id != tp2.id; """ circular_references = list(ISlaveStore(TeamParticipation).execute(query)) if len(circular_references) > 0: raise LaunchpadScriptFailure("Circular references found: %s" % circular_references)
def getBuildQueueSizes(self): """See `IBuilderSet`.""" results = ISlaveStore(BuildQueue).find( (Count(), Sum(BuildQueue.estimated_duration), Processor, Coalesce(BuildQueue.virtualized, True)), Processor.id == BuildQueue.processorID, BuildQueue.status == BuildQueueStatus.WAITING).group_by( Processor, Coalesce(BuildQueue.virtualized, True)) result_dict = {'virt': {}, 'nonvirt': {}} for size, duration, processor, virtualized in results: if virtualized is False: virt_str = 'nonvirt' else: virt_str = 'virt' result_dict[virt_str][processor.name] = (size, duration) return result_dict
def test_gen_reload_queries_with_mixed_stores(self): # gen_reload_queries() returns one query for each distinct # store even for the same object type. db_object = self.factory.makeComponent() db_object_type = bulk.get_type(db_object) # Commit so the database object is available in both master # and slave stores. transaction.commit() db_objects = set( (IMasterStore(db_object).get(db_object_type, db_object.id), ISlaveStore(db_object).get(db_object_type, db_object.id))) db_queries = list(bulk.gen_reload_queries(db_objects)) self.failUnlessEqual(2, len(db_queries)) db_objects_loaded = set() for db_query in db_queries: objects = set(db_query) # None of these objects should have been loaded before. self.failUnlessEqual(set(), objects.intersection(db_objects_loaded)) db_objects_loaded.update(objects) self.failUnlessEqual(db_objects, db_objects_loaded)
def fetchDistrosForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.distribution import Distribution using = [ Distribution, LeftJoin( LibraryFileAlias, LibraryFileAlias.id == Distribution.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] tables = ( Distribution, LibraryFileAlias, LibraryFileContent, ) distro_data = ISlaveStore(Distribution).using(*using).find( tables, Distribution.translationgroupID == self.id).order_by( Distribution.display_name) return DecoratedResultSet(distro_data, operator.itemgetter(0))
def getRequest(self): """See `IPOExportRequestSet`.""" # Exports happen off the slave store. To ensure that export # does not happen until requests have been replicated to the # slave, they are read primarily from the slave even though they # are deleted on the master afterwards. head = self._getHeadRequest() if head is None: return None, None, None, None requests = ISlaveStore(POExportRequest).find( POExportRequest, POExportRequest.person == head.person, POExportRequest.format == head.format, POExportRequest.date_created == head.date_created).order_by( POExportRequest.potemplateID) summary = [(request.id, request.pofile or request.potemplate) for request in requests] sources = [source for request_id, source in summary] request_ids = [request_id for request_id, source in summary] return head.person, sources, head.format, request_ids
def check_preconditions(options): """Try to ensure that it's safe to run. This script must not run on a production server, or anything remotely like it. """ store = ISlaveStore(ComponentSelection) # Just a guess, but dev systems aren't likely to have ids this high # in this table. Production data does. real_data = (get_max_id(store, "TranslationMessage") >= 1000000) if real_data and not options.force: raise DoNotRunOnProduction( "Refusing to delete Ubuntu data unless you --force me.") # For some configs it's just absolutely clear this script shouldn't # run. Don't even accept --force there. forbidden_configs = re.compile('(edge|lpnet|production)') current_config = os.getenv('LPCONFIG', 'an unknown config') if forbidden_configs.match(current_config): raise DoNotRunOnProduction( "I won't delete Ubuntu data on %s and you can't --force me." % current_config)
def getProductsWithInfo(num_products=None): """See `IBranchCloud`.""" distinct_revision_author = Func("distinct", RevisionCache.revision_author_id) commits = Alias(Count(RevisionCache.revision_id)) epoch = datetime.now(pytz.UTC) - timedelta(days=30) # It doesn't matter if this query is even a whole day out of date, so # use the slave store. result = ISlaveStore(RevisionCache).find( (Product.name, commits, Count(distinct_revision_author), Max(RevisionCache.revision_date)), RevisionCache.product == Product.id, Not(RevisionCache.private), RevisionCache.revision_date >= epoch) result = result.group_by(Product.name) result = result.order_by(Desc(commits)) if num_products: result.config(limit=num_products) return result
def fetchProjectGroupsForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.projectgroup import ProjectGroup using = [ ProjectGroup, LeftJoin( LibraryFileAlias, LibraryFileAlias.id == ProjectGroup.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] tables = ( ProjectGroup, LibraryFileAlias, LibraryFileContent, ) project_data = ISlaveStore(ProjectGroup).using(*using).find( tables, ProjectGroup.translationgroupID == self.id, ProjectGroup.active == True).order_by(ProjectGroup.display_name) return DecoratedResultSet(project_data, operator.itemgetter(0))
def to_swift(log, start_lfc_id=None, end_lfc_id=None, remove_func=False): '''Copy a range of Librarian files from disk into Swift. start and end identify the range of LibraryFileContent.id to migrate (inclusive). If remove_func is set, it is called for every file after being copied into Swift. ''' swift_connection = connection_pool.get() fs_root = os.path.abspath(config.librarian_server.root) if start_lfc_id is None: start_lfc_id = 1 if end_lfc_id is None: # Maximum id capable of being stored on the filesystem - ffffffff end_lfc_id = 0xffffffff log.info("Walking disk store {0} from {1} to {2}, inclusive".format( fs_root, start_lfc_id, end_lfc_id)) start_fs_path = filesystem_path(start_lfc_id) end_fs_path = filesystem_path(end_lfc_id) # Walk the Librarian on disk file store, searching for matching # files that may need to be copied into Swift. We need to follow # symlinks as they are being used span disk partitions. for dirpath, dirnames, filenames in scandir.walk(fs_root, followlinks=True): # Don't recurse if we know this directory contains no matching # files. if (start_fs_path[:len(dirpath)] > dirpath or end_fs_path[:len(dirpath)] < dirpath): dirnames[:] = [] continue else: # We need to descend in order, making it possible to resume # an aborted job. dirnames.sort() log.debug('Scanning {0} for matching files'.format(dirpath)) _filename_re = re.compile('^[0-9a-f]{2}$') for filename in sorted(filenames): fs_path = os.path.join(dirpath, filename) # Skip any files with names that are not two hex digits. # This is noise in the filesystem database. if _filename_re.match(filename) is None: log.debug('Skipping noise %s' % fs_path) continue if fs_path < start_fs_path: continue if fs_path > end_fs_path: break # Skip files which have been modified recently, as they # may be uploads still in progress. if os.path.getmtime(fs_path) > time.time() - ONE_DAY: log.debug('Skipping recent upload %s' % fs_path) continue # Reverse engineer the LibraryFileContent.id from the # file's path. Warn about and skip bad filenames. rel_fs_path = fs_path[len(fs_root) + 1:] hex_lfc = ''.join(rel_fs_path.split('/')) if len(hex_lfc) != 8: log.warning( 'Filename length fail, skipping {0}'.format(fs_path)) continue try: lfc = int(hex_lfc, 16) except ValueError: log.warning('Invalid hex fail, skipping {0}'.format(fs_path)) continue log.debug('Found {0} ({1})'.format(lfc, filename)) if ISlaveStore(LibraryFileContent).get(LibraryFileContent, lfc) is None: log.info("{0} exists on disk but not in the db".format(lfc)) continue container, obj_name = swift_location(lfc) try: quiet_swiftclient(swift_connection.head_container, container) log.debug2('{0} container already exists'.format(container)) except swiftclient.ClientException as x: if x.http_status != 404: raise log.info('Creating {0} container'.format(container)) swift_connection.put_container(container) try: headers = quiet_swiftclient(swift_connection.head_object, container, obj_name) log.debug("{0} already exists in Swift({1}, {2})".format( lfc, container, obj_name)) if ('X-Object-Manifest' not in headers and int(headers['content-length']) != os.path.getsize(fs_path)): raise AssertionError( '{0} has incorrect size in Swift'.format(lfc)) except swiftclient.ClientException as x: if x.http_status != 404: raise log.info('Putting {0} into Swift ({1}, {2})'.format( lfc, container, obj_name)) _put(log, swift_connection, lfc, container, obj_name, fs_path) if remove_func: remove_func(fs_path)
class ExportTranslationsToBranch(LaunchpadCronScript): """Commit translations to translations_branches where requested.""" commit_message = "Launchpad automatic translations update." # Don't bother looking for a previous translations commit if it's # longer than this ago. previous_commit_cutoff_age = timedelta(days=7) # We can find out when the last translations commit to a branch # completed, and we can find out when the last transaction changing # a POFile started. This is exactly the wrong way around for # figuring out which POFiles need a fresh export, so assume a fudge # factor. fudge_factor = timedelta(hours=6) def add_my_options(self): """See `LaunchpadScript`.""" self.parser.add_option( '-n', '--no-fudge', action='store_true', dest='no_fudge', default=False, help="For testing: no fudge period for POFile changes.") def _checkForObjections(self, source): """Check for reasons why we can't commit to this branch. Raises `ConcurrentUpdateError` if there is such a reason. :param source: the series being exported to its translations_branch. """ if source.translations_branch is None: raise ConcurrentUpdateError( "Translations export for %s was just disabled." % ( source.title)) branch = source.translations_branch jobsource = getUtility(IRosettaUploadJobSource) unfinished_jobs = jobsource.findUnfinishedJobs( branch, since=datetime.now(pytz.UTC) - timedelta(days=1)) if unfinished_jobs.any(): raise ConcurrentUpdateError( "Translations branch for %s has pending translations " "changes. Not committing." % source.title) def _makeDirectBranchCommit(self, db_branch): """Create a `DirectBranchCommit`. :param db_branch: A `Branch` object as defined in Launchpad. :return: A `DirectBranchCommit` for `db_branch`. """ committer_id = 'Launchpad Translations on behalf of %s' % ( db_branch.owner.name) return DirectBranchCommit(db_branch, committer_id=committer_id) def _commit(self, source, committer): """Commit changes to branch. Check for race conditions.""" self._checkForObjections(source) committer.commit(self.commit_message, txn=self.txn) def _isTranslationsCommit(self, revision): """Is `revision` an automatic translations commit?""" return revision.message == self.commit_message def _getRevisionTime(self, revision): """Get timestamp of `revision`.""" # The bzr timestamp is a float representing UTC-based seconds # since the epoch. It stores the timezone as well, but we can # ignore it here. return datetime.fromtimestamp(revision.timestamp, pytz.UTC) def _getLatestTranslationsCommit(self, branch): """Get date of last translations commit to `branch`, if any.""" cutoff_date = datetime.now(pytz.UTC) - self.previous_commit_cutoff_age revno, current_rev = branch.last_revision_info() repository = branch.repository graph = repository.get_graph() for rev_id in graph.iter_lefthand_ancestry( current_rev, (NULL_REVISION, )): revision = repository.get_revision(rev_id) revision_date = self._getRevisionTime(revision) if self._isTranslationsCommit(revision): return revision_date if revision_date < cutoff_date: # Going too far back in history. Give up. return None return None def _findChangedPOFiles(self, source, changed_since): """Return an iterator of POFiles changed since `changed_since`. :param source: a `ProductSeries`. :param changed_since: a datetime object. """ subset = getUtility(IPOTemplateSet).getSubset( productseries=source, iscurrent=True) for template in subset: for pofile in template.pofiles: if (changed_since is None or pofile.date_changed > changed_since or template.date_last_updated > changed_since): yield pofile def _exportToBranch(self, source): """Export translations for source into source.translations_branch. :param source: a `ProductSeries`. """ self.logger.info("Exporting %s." % source.title) self._checkForObjections(source) branch = source.translations_branch branch = source.translations_branch try: committer = self._makeDirectBranchCommit(branch) except StaleLastMirrored as e: # Request a rescan of the branch. Do this on the master # store, or we won't be able to modify the branch object. # (The master copy may also be more recent, in which case # the rescan won't be necessary). master_branch = IMasterStore(branch).get(Branch, branch.id) master_branch.branchChanged(**get_db_branch_info(**e.info)) self.logger.warning( "Skipped %s due to stale DB info, and scheduled a new scan.", branch.bzr_identity) if self.txn: self.txn.commit() return self.logger.debug("Created DirectBranchCommit.") if self.txn: self.txn.commit() bzr_branch = committer.bzrbranch last_commit_date = self._getLatestTranslationsCommit(bzr_branch) if last_commit_date is None: self.logger.debug("No previous translations commit found.") changed_since = None else: # Export files that have been touched since the last export. # Subtract a fudge factor because the last-export date marks # the end of the previous export, and the POFiles' # last-touched timestamp marks the beginning of the last # transaction that changed them. self.logger.debug("Last commit was at %s." % last_commit_date) changed_since = last_commit_date - self.fudge_factor change_count = 0 try: for pofile in self._findChangedPOFiles(source, changed_since): base_path = os.path.dirname(pofile.potemplate.path) language_code = pofile.getFullLanguageCode() self.logger.debug("Exporting %s." % language_code) pofile_path = os.path.join( base_path, language_code + '.po') pofile_contents = pofile.export() committer.writeFile(pofile_path, pofile_contents) change_count += 1 # We're not actually writing any changes to the # database, but it's not polite to stay in one # transaction for too long. if self.txn: self.txn.commit() # We're done with this POFile. Don't bother caching # anything about it any longer. pofile.potemplate.clearPOFileCache() if change_count > 0: self.logger.debug("Writing to branch.") self._commit(source, committer) finally: committer.unlock() def _exportToBranches(self, productseries_iter): """Loop over `productseries_iter` and export their translations.""" items_done = 0 items_failed = 0 unpushed_branches = 0 productseries = shortlist(productseries_iter, longest_expected=2000) for source in productseries: try: self._exportToBranch(source) if self.txn: self.txn.commit() except (KeyboardInterrupt, SystemExit): raise except NotBranchError: unpushed_branches += 1 if self.txn: self.txn.abort() self._handleUnpushedBranch(source) if self.txn: self.txn.commit() except Exception as e: items_failed += 1 self.logger.error( "Failure in %s/%s: %s", source.product.name, source.name, repr(e)) if self.txn: self.txn.abort() items_done += 1 self.logger.info( "Processed %d item(s); %d failure(s), %d unpushed branch(es)." % ( items_done, items_failed, unpushed_branches)) def _sendMail(self, sender, recipients, subject, text): """Wrapper for `simple_sendmail`. Fakeable for easy testing.""" simple_sendmail(sender, recipients, subject, text) def _handleUnpushedBranch(self, productseries): """Branch has never been scanned. Notify owner. This means that as far as the Launchpad database knows, there is no actual bzr branch behind this `IBranch` yet. """ branch = productseries.translations_branch self.logger.info("Notifying %s of unpushed branch %s." % ( branch.owner.name, branch.bzr_identity)) template = get_email_template('unpushed-branch.txt', 'translations') text = template % { 'productseries': productseries.title, 'branch_url': branch.bzr_identity, } recipients = get_contact_email_addresses(branch.owner) sender = format_address( "Launchpad Translations", config.canonical.noreply_from_address) subject = "Launchpad: translations branch has not been set up." self._sendMail(sender, recipients, subject, text) def main(self): """See `LaunchpadScript`.""" # Avoid circular imports. from lp.registry.model.product import Product from lp.registry.model.productseries import ProductSeries errorlog.globalErrorUtility.configure(self.config_name) if self.options.no_fudge: self.fudge_factor = timedelta(0) self.logger.info("Exporting to translations branches.") self.store = ISlaveStore(Product) product_join = Join( ProductSeries, Product, ProductSeries.product == Product.id) productseries = self.store.using(product_join).find( ProductSeries, And( Product.translations_usage == ServiceUsage.LAUNCHPAD, ProductSeries.translations_branch != None)) # Anything deterministic will do, and even that is only for # testing. productseries = productseries.order_by(ProductSeries.id) bzrserver = get_rw_server() bzrserver.start_server() try: self._exportToBranches(productseries) finally: bzrserver.stop_server()
def test_slave_only_fast_downtime_rollout(self): '''You can always access a working slave store during fast downtime. ''' # Everything is running happily. store = ISlaveStore(Person) original_store = store self.assertTrue(self.store_is_working(store)) self.assertTrue(self.store_is_slave(store)) # But fast downtime is about to happen. # Replication is stopped on the slave, and lag starts # increasing. # All connections to the master are killed so database schema # updates can be applied. self.pgbouncer_cur.execute('DISABLE %s' % self.master_dbname) self.pgbouncer_cur.execute('KILL %s' % self.master_dbname) # Of course, slave connections are unaffected. self.assertTrue(self.store_is_working(store)) # After schema updates have been made to the master, it is # reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.master_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.master_dbname) # And the slaves taken down, and replication reenabled so the # schema updates can replicate. self.pgbouncer_cur.execute('DISABLE %s' % self.slave_dbname) self.pgbouncer_cur.execute('KILL %s' % self.slave_dbname) # The next attempt at accessing the slave store will fail # with a DisconnectionError. self.assertRaises(DisconnectionError, store.execute, 'SELECT TRUE') transaction.abort() # But if we handle that and retry, we can continue. # Now the failed connection has been detected, the next Store # we are handed is a master Store instead of a slave. store = ISlaveStore(Person) self.assertTrue(self.store_is_master(store)) self.assertIsNot(ISlaveStore(Person), original_store) # But alas, it might not work the first transaction. If it has # been earlier, its connection was killed by pgbouncer earlier # but it hasn't noticed yet. self.assertFalse(self.store_is_working(store)) transaction.abort() # Next retry attempt, everything is fine using the master # connection, even though our code only asked for a slave. store = ISlaveStore(Person) self.assertTrue(self.store_is_master(store)) self.assertTrue(self.store_is_working(store)) # The original Store is busted though. You cannot reuse Stores # across transaction bounderies because you might end up using # the wrong Store. self.assertFalse(self.store_is_working(original_store)) transaction.abort() # Once replication has caught up, the slave is reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.slave_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.slave_dbname) # And next transaction, we are back to normal. store = ISlaveStore(Person) self.assertTrue(self.store_is_working(store)) self.assertTrue(self.store_is_slave(store)) self.assertIs(original_store, store)
def test_master_slave_fast_downtime_rollout(self): '''Parts of your app can keep working during a fast downtime update. ''' # Everything is running happily. master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertTrue(self.store_is_working(slave_store)) # But fast downtime is about to happen. # Replication is stopped on the slave, and lag starts # increasing. # All connections to the master are killed so database schema # updates can be applied. self.pgbouncer_cur.execute('DISABLE %s' % self.master_dbname) self.pgbouncer_cur.execute('KILL %s' % self.master_dbname) # Of course, slave connections are unaffected. self.assertTrue(self.store_is_working(slave_store)) # But attempts to use a master store will fail. self.assertFalse(self.store_is_working(master_store)) transaction.abort() # After schema updates have been made to the master, it is # reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.master_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.master_dbname) # And the slaves taken down, and replication reenabled so the # schema updates can replicate. self.pgbouncer_cur.execute('DISABLE %s' % self.slave_dbname) self.pgbouncer_cur.execute('KILL %s' % self.slave_dbname) # The master store is working again. master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) # The next attempt at accessing the slave store will fail # with a DisconnectionError. slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertRaises(DisconnectionError, slave_store.execute, 'SELECT TRUE') transaction.abort() # But if we handle that and retry, we can continue. # Now the failed connection has been detected, the next Store # we are handed is a master Store instead of a slave. slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_master(slave_store)) self.assertTrue(self.store_is_working(slave_store)) # Once replication has caught up, the slave is reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.slave_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.slave_dbname) # And next transaction, we are back to normal. transaction.abort() master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertTrue(self.store_is_working(slave_store))
def get_stacked_branches(): """Iterate over all branches that, according to the db, are stacked.""" # Avoiding circular import. from lp.code.model.branch import Branch return ISlaveStore(Branch).find(Branch, Not(Branch.stacked_on == None))