def test__parse_snapshots_names_with_multiple_schemas__multiple_schemas__ambiguous( ): with pytest.raises(ValueError) as e: parse_snapshots_names_with_multiple_schemas([ "snap-2018-09-06-11-30", ], [ "snap-%Y-%m-%d-%H-%M", "snap-%Y-%d-%m-%H-%M", ]) assert e.value.args[0] == ( "Snapshot name snap-2018-09-06-11-30 was parsed ambiguously: as 2018-09-06 11:30:00, " "and, with naming schema snap-%Y-%d-%m-%H-%M, as 2018-06-09 11:30:00")
def get_snapshots_to_send_with_naming_schemas(src_snapshots, dst_snapshots, replication_task): naming_schemas = replication_task_naming_schemas(replication_task) parsed_src_snapshots = parse_snapshots_names_with_multiple_schemas( src_snapshots, naming_schemas) parsed_dst_snapshots = parse_snapshots_names_with_multiple_schemas( dst_snapshots, naming_schemas) try: parsed_incremental_base = sorted( set(parsed_src_snapshots) & set(parsed_dst_snapshots), key=parsed_snapshot_sort_key, )[-1] incremental_base = parsed_incremental_base.name except IndexError: parsed_incremental_base = None incremental_base = None snapshots_to_send = [ parsed_snapshot for parsed_snapshot in sorted(parsed_src_snapshots, key=parsed_snapshot_sort_key) if (( parsed_incremental_base is None or # is newer than incremental base parsed_snapshot != parsed_incremental_base and sorted( [parsed_snapshot, parsed_incremental_base], key=parsed_snapshot_sort_key)[0] == parsed_incremental_base) and replication_task_should_replicate_parsed_snapshot( replication_task, parsed_snapshot)) ] # Do not send something that will immediately be removed by retention policy will_be_removed = replication_task.retention_policy.calculate_delete_snapshots( # We don't know what time it is, our best guess is newest snapshot datetime max([ parsed_src_snapshot.datetime for parsed_src_snapshot in parsed_src_snapshots ] or [datetime.max]), snapshots_to_send, snapshots_to_send) snapshots_to_send = [ parsed_snapshot.name for parsed_snapshot in snapshots_to_send if parsed_snapshot not in will_be_removed ] return incremental_base, snapshots_to_send, False
async def count_eligible_manual_snapshots(self, data): if data["naming_schema"] and data["name_regex"]: raise CallError("`naming_schema` and `name_regex` cannot be used simultaneously", errno.EINVAL) async with self._handle_ssh_exceptions(): async with self._get_zettarepl_shell(data["transport"], data["ssh_credentials"]) as shell: snapshots = await self.middleware.run_in_thread( multilist_snapshots, shell, [(dataset, False) for dataset in data["datasets"]] ) if data["naming_schema"]: parsed = parse_snapshots_names_with_multiple_schemas([s.name for s in snapshots], data["naming_schema"]) elif data["name_regex"]: try: name_pattern = compile_name_regex(data["name_regex"]) except Exception as e: raise CallError(f"Invalid `name_regex`: {e}") parsed = [s.name for s in snapshots if name_pattern.match(s.name)] else: raise CallError("Either `naming_schema` or `name_regex` must be specified", errno.EINVAL) return { "total": len(snapshots), "eligible": len(parsed), }
def calculate_dataset_snapshots_to_remove(owners: [SnapshotOwner], dataset: str, snapshots: [Snapshot]): try: parsed_snapshot_names = parse_snapshots_names_with_multiple_schemas( snapshots, set().union(*[set(owner.get_naming_schemas()) for owner in owners])) except ValueError as e: logger.warning("Error parsing snapshot names for dataset %r: %r", dataset, e) return [] result = [] for parsed_snapshot_name in parsed_snapshot_names: snapshot_owners = [ owner for owner in owners if (parsed_snapshot_name.naming_schema in owner.get_naming_schemas( ) and owner.owns_snapshot(parsed_snapshot_name)) ] if (snapshot_owners and any(owner.wants_to_delete() for owner in snapshot_owners) and not any( owner.should_retain(dataset, parsed_snapshot_name) for owner in snapshot_owners)): logger.debug("No one of %r retains snapshot %r", snapshot_owners, parsed_snapshot_name.name) result.append(parsed_snapshot_name.name) return result
def calculate_dataset_snapshots_to_remove(owners: [SnapshotOwner], dataset: str, snapshots: [Snapshot]): try: parsed_snapshot_names = parse_snapshots_names_with_multiple_schemas( snapshots, set().union(*[set(owner.get_naming_schemas()) for owner in owners])) except ValueError as e: logger.warning("Error parsing snapshot names for dataset %r: %r", dataset, e) return [] newest_snapshot_for_naming_schema = {} for parsed_snapshot_name in parsed_snapshot_names: if (parsed_snapshot_name.naming_schema not in newest_snapshot_for_naming_schema or (newest_snapshot_for_naming_schema[ parsed_snapshot_name.naming_schema].parsed_datetime < parsed_snapshot_name.parsed_datetime)): newest_snapshot_for_naming_schema[ parsed_snapshot_name.naming_schema] = parsed_snapshot_name newest_snapshot_for_naming_schema = { k: v.name for k, v in newest_snapshot_for_naming_schema.items() } snapshots_left_for_naming_schema = defaultdict(set) for parsed_snapshot_name in parsed_snapshot_names: snapshots_left_for_naming_schema[ parsed_snapshot_name.naming_schema].add(parsed_snapshot_name.name) result = [] for parsed_snapshot_name in parsed_snapshot_names: snapshot_owners = [ owner for owner in owners if (parsed_snapshot_name.naming_schema in owner.get_naming_schemas( ) and owner.owns_snapshot(parsed_snapshot_name)) ] if (snapshot_owners and any(owner.wants_to_delete() for owner in snapshot_owners) and not any( owner.should_retain(dataset, parsed_snapshot_name) for owner in snapshot_owners)): logger.debug("No one of %r retains snapshot %r", snapshot_owners, parsed_snapshot_name.name) snapshots_left_for_naming_schema[ parsed_snapshot_name.naming_schema].discard( parsed_snapshot_name.name) result.append(parsed_snapshot_name.name) for naming_schema, snapshots_left in snapshots_left_for_naming_schema.items( ): if not snapshots_left: newest_snapshot = newest_snapshot_for_naming_schema[naming_schema] logger.info( "Not destroying %r as it is the only snapshot left for naming schema %r", newest_snapshot, naming_schema) result.remove(newest_snapshot) return result
async def count_eligible_manual_snapshots(self, datasets, naming_schemas, transport, ssh_credentials=None): async with self._handle_ssh_exceptions(): async with self._get_zettarepl_shell(transport, ssh_credentials) as shell: snapshots = await self.middleware.run_in_thread( multilist_snapshots, shell, [(dataset, False) for dataset in datasets] ) parsed = parse_snapshots_names_with_multiple_schemas([s.name for s in snapshots], naming_schemas) return { "total": len(snapshots), "eligible": len(parsed), }
async def count_eligible_manual_snapshots(self, dataset, naming_schemas, transport, ssh_credentials=None): try: shell = await self._get_zettarepl_shell(transport, ssh_credentials) snapshots = await self.middleware.run_in_thread(list_snapshots, shell, dataset, False) except Exception as e: raise CallError(repr(e)) parsed = parse_snapshots_names_with_multiple_schemas([s.name for s in snapshots], naming_schemas) return { "total": len(snapshots), "eligible": len(parsed), }
def test__parse_snapshots_names_with_multiple_schemas__multiple_schemas(): assert set( parse_snapshots_names_with_multiple_schemas([ "snap-2018-09-06-11-30", "snap-2018-09-06-11_31", ], [ "snap-%Y-%m-%d-%H_%M", "snap-%Y-%m-%d-%H-%M", ])) == { ParsedSnapshotName("snap-%Y-%m-%d-%H-%M", "snap-2018-09-06-11-30", datetime(2018, 9, 6, 11, 30), None), ParsedSnapshotName("snap-%Y-%m-%d-%H_%M", "snap-2018-09-06-11_31", datetime(2018, 9, 6, 11, 31), None), }
async def count_eligible_manual_snapshots(self, datasets, naming_schemas, transport, ssh_credentials=None): try: async with self._get_zettarepl_shell(transport, ssh_credentials) as shell: snapshots = await self.middleware.run_in_thread( multilist_snapshots, shell, [(dataset, False) for dataset in datasets] ) except SSH_EXCEPTIONS as e: raise CallError(repr(e).replace("[Errno None] ", ""), errno=errno.EACCES) parsed = parse_snapshots_names_with_multiple_schemas([s.name for s in snapshots], naming_schemas) return { "total": len(snapshots), "eligible": len(parsed), }
def test__parse_snapshots_names_with_multiple_schemas__multiple_schemas__inambiguous(): assert set( parse_snapshots_names_with_multiple_schemas( [ "snap-2018-09-06-11-30-1w", "snap-2018-09-06-11-30-2m", ], [ "snap-%Y-%m-%d-%H-%M-1w", "snap-%Y-%m-%d-%H-%M-2m", ] ) ) == { ParsedSnapshotName("snap-%Y-%m-%d-%H-%M-1w", "snap-2018-09-06-11-30-1w", datetime(2018, 9, 6, 11, 30)), ParsedSnapshotName("snap-%Y-%m-%d-%H-%M-2m", "snap-2018-09-06-11-30-2m", datetime(2018, 9, 6, 11, 30)), }