class AliasPseudonym(Pseudonym): """A pseudonym that introduces package aliases based on the supplied configuration.""" CONFIGURATION_DEFAULT = {"package_name": None, "package_version": None, "index_url": None} CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): str, Required("package_version"): SchemaAny(str, None), Required("index_url"): SchemaAny(str, None), Required("aliases"): Schema( [Schema({Required("package_name"): str, Required("package_version"): str, Required("index_url"): str})] ), } ) @classmethod def should_include(cls, _: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Register self, never.""" yield from () return None def run(self, package_version: PackageVersion) -> Generator[Tuple[str, str, str], None, None]: """Create alternatives to packages based on the configuration supplied.""" if ( self.configuration["package_version"] is not None and self.configuration["package_version"] != package_version.locked_version ): yield from () return if self.configuration["index_url"] is not None and self.configuration["index_url"] != package_version.index.url: yield from () return yield from ((i["package_name"], i["package_version"], i["index_url"]) for i in self.configuration["aliases"])
class SkipPackageStepPrescription(StepPrescription): """Skip package prescription step unit implementation.""" CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): SchemaAny(str, None), Required("match"): PRESCRIPTION_SKIP_PACKAGE_STEP_MATCH_ENTRY_SCHEMA, Required("multi_package_resolution"): bool, Required("run"): SchemaAny(PRESCRIPTION_SKIP_PACKAGE_STEP_RUN_SCHEMA, None), Required("prescription"): Schema({"run": bool}), } ) def run(self, state: State, package_version: PackageVersion) -> None: """Run main entry-point for steps to skip packages.""" if not self._index_url_check(self._index_url, package_version.index.url): return None if self._specifier and package_version.locked_version not in self._specifier: return None if self._develop is not None and package_version.develop != self._develop: return None if not self._run_state_with_initiator(state, package_version): return None try: self._run_base() finally: self._configuration["prescription"]["run"] = True raise SkipPackage
class SkipPackageSievePrescription(UnitPrescription): """Skip package prescription unit implementation.""" CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Required("run"): SchemaAny(PRESCRIPTION_SKIP_PACKAGE_SIEVE_RUN_ENTRY_SCHEMA, None), Required("match"): PRESCRIPTION_SKIP_PACKAGE_SIEVE_MATCH_ENTRY_SCHEMA, }) _logged = attr.ib(type=bool, kw_only=True, init=False, default=False) @staticmethod def is_sieve_unit_type() -> bool: """Check if this unit is of type sieve.""" return True @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore if isinstance(prescription["match"], list): for item in prescription["match"]: yield { "package_name": item["package_name"], "run": prescription.get("run"), "match": item } else: yield { "package_name": prescription["match"]["package_name"], "run": prescription.get("run"), "match": prescription["match"], } return None yield from () return None def pre_run(self) -> None: """Initialize this unit before each run.""" self._logged = False super().pre_run() def run( self, _: Generator[PackageVersion, None, None] ) -> Generator[PackageVersion, None, None]: """Run main entry-point for sieves to filter and score packages.""" if not self._logged: self._logged = True self._run_log() self._run_stack_info() raise SkipPackage
class SievePrescription(UnitPrescription): """Sieve base class implementation.""" CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): SchemaAny(str, None), Required("match"): PRESCRIPTION_SIEVE_MATCH_ENTRY_SCHEMA, Required("run"): SchemaAny(PRESCRIPTION_SIEVE_RUN_SCHEMA, None), } ) _logged = attr.ib(type=bool, kw_only=True, init=False, default=False) _specifier = attr.ib(type=Optional[SpecifierSet], kw_only=True, init=False, default=None) _index_url = attr.ib(type=Optional[str], kw_only=True, init=False, default=None) @staticmethod def is_sieve_unit_type() -> bool: """Check if this unit is of type sieve.""" return True @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore yield from cls._yield_should_include(prescription) return None yield from () return None def pre_run(self) -> None: """Prepare before running this pipeline unit.""" package_version = self.match_prescription.get("package_version", {}) version_specifier = package_version.get("version") if version_specifier: self._specifier = SpecifierSet(version_specifier) self._specifier.prereleases = True self._index_url = package_version.get("index_url") self._logged = False super().pre_run() def run(self, package_versions: Generator[PackageVersion, None, None]) -> Generator[PackageVersion, None, None]: """Run main entry-point for sieves to filter and score packages.""" for package_version in package_versions: if (not self._specifier or package_version.locked_version in self._specifier) and ( not self._index_url or package_version.index.url == self._index_url ): if not self._logged: self._logged = True self._run_log() self._run_stack_info() continue yield package_version
class SetScoreStep(Step): """A step that is setting score for packages.""" # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema({ Required("multi_package_resolution"): bool, Required("package_name"): str, SchemaOptional("index_url"): SchemaAny(str, None), SchemaOptional("package_version"): SchemaAny(str, None), SchemaOptional("score"): SchemaAny(float, None), }) CONFIGURATION_DEFAULT: Dict[str, Any] = { "index_url": None, "multi_package_resolution": False, "package_name": None, "package_version": None, "score": None, } @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Register self, never.""" return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" if self.configuration["score"] is None: self.configuration["score"] = random.uniform( self.SCORE_MIN, self.SCORE_MAX) super().pre_run() def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package.""" if (self.configuration["package_version"] is not None and package_version.locked_version != self.configuration["package_version"]) or ( self.configuration["index_url"] is not None and package_version.index.url != self.configuration["index_url"]): return None return self.configuration["score"], None
class MockScoreStep(Step): """A step that is mocking scoring of packages.""" # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema( { SchemaOptional("package_name"): SchemaAny(str, None), SchemaOptional("assign_probability"): float, SchemaRequired("multi_package_resolution"): bool, } ) CONFIGURATION_DEFAULT: Dict[str, Any] = { "package_name": None, "assign_probability": 0.75, "multi_package_resolution": False, } _score_history = attr.ib(type=Dict[Tuple[str, str, str], float], factory=dict, init=False) @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Register self, never.""" yield from () return None def pre_run(self) -> None: """Initialize self, before each run.""" self._score_history.clear() super().pre_run() def post_run(self) -> None: """Print the generated history after the run.""" packages = {} # type: Dict[Any, Any] for key, value in self._score_history.items(): packages.setdefault(key[0], []).append((key, value)) for key, value in packages.items(): packages[key] = sorted(value, key=operator.itemgetter(1), reverse=True) # type: ignore print("-" * 10, " Mock score report ", "-" * 10, file=sys.stderr) for key in sorted(packages): print(key, file=sys.stderr) for entry in packages[key]: print(f"{str((entry[0][1], entry[0][2])):>50} | {entry[1]:+f}", file=sys.stderr) print("-" * 40, file=sys.stderr) def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package regardless of the state.""" # Using seed set to process on the adviser run affects this call - so adviser # with same seed set shared scores generated across runs. score = self._score_history.setdefault( package_version.to_tuple(), random.uniform(self.SCORE_MIN, self.SCORE_MAX) if random.random() <= self.configuration["assign_probability"] else 0.0, ) return score, None
class Step(Unit): """Step base class implementation. Configuration option `multi_package_resolution` states whether a step should be run if package is resolved multiple times for the same stack. """ CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Required("multi_package_resolution"): bool }) CONFIGURATION_DEFAULT: Dict[str, Any] = { "package_name": None, "multi_package_resolution": False } SCORE_MAX = 1.0 SCORE_MIN = -1.0 @staticmethod def is_step_unit_type() -> bool: """Check if this unit is of type step.""" return True @abc.abstractmethod def run( self, state: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Run main entry-point for steps to filter and score packages."""
class Stride1(Stride): """A testing stride implementation.""" CONFIGURATION_DEFAULT = { "linus": { "residence": "oregon", "children": 3, "parents": ["nils", "anna"] }, "package_name": None, } CONFIGURATION_SCHEMA: Schema = Schema({ Required("linus"): object, Required("package_name"): SchemaAny(str, None) }) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Check if this pipeline unit should be included in the pipeline configuration.""" yield from () return None def run(self, state: State) -> Optional[Tuple[float, List[Dict[str, str]]]]: """Run noop method."""
class Wrap1(Wrap): """A testing wrap implementation.""" CONFIGURATION_DEFAULT = { "thoth": [2018, 2019], "cities": ["Brno", "Bonn", "Boston", "Milan"], "package_name": None, } CONFIGURATION_SCHEMA: Schema = Schema({ Required("thoth"): object, Required("cities"): object, Required("package_name"): SchemaAny(str) }) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Check if this pipeline unit should be included in the pipeline configuration.""" def run(self, state: State) -> None: """Run noop method."""
class WrapPrescription(UnitPrescription): """Wrap base class implementation.""" CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Optional("match"): PRESCRIPTION_WRAP_MATCH_ENTRY_SCHEMA, Required("run"): PRESCRIPTION_WRAP_RUN_SCHEMA, Required("prescription"): Schema({"run": bool}), }) @staticmethod def is_wrap_unit_type() -> bool: """Check if this unit is of type wrap.""" return True @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore yield from cls._yield_should_include_with_state(prescription) return None yield from () return None def pre_run(self) -> None: """Prepare this pipeline unit before run.""" self._prepare_justification_link( self.run_prescription.get("justification", [])) super().pre_run() def run(self, state: State) -> None: """Run main entry-point for wrap units to filter and score packages.""" if not self._run_state(state): return None prescription_conf = self._configuration["prescription"] if not prescription_conf["run"]: justification = self.run_prescription.get("justification") if justification: state.add_justification(justification) advised_manifest_changes = self.run_prescription.get( "advised_manifest_changes") if advised_manifest_changes: state.advised_manifest_changes.append(advised_manifest_changes) try: self._run_base() finally: prescription_conf["run"] = True
class AdviserTestCase: """A base class for implementing adviser's test cases.""" data_dir = Path(os.path.dirname(os.path.realpath(__file__))) / "data" JUSTIFICATION_SAMPLE_1 = [ {"message": "Justification sample 1", "type": "WARNING", "link": "https://thoth-station.ninja"}, {"message": "Justification sample 1", "type": "INFO", "link": "https://thoth-station.ninja"}, {"message": "Justification sample 1", "type": "ERROR", "link": "https://thoth-station.ninja"}, ] JUSTIFICATION_SAMPLE_2 = [ { "message": "Justification sample 2", "type": "INFO", "link": "https://thoth-station.ninja", "advisory": "Bark!", }, ] JUSTIFICATION_SAMPLE_3 = [ { "message": "Justification sample 2", "type": "INFO", "link": "https://thoth-station.ninja", "package_name": "tensorflow", "version_range": "<2.3>=", }, ] _JUSTIFICATION_SCHEMA = Schema( [ { "message": All(str, Length(min=1)), "type": SchemaAny("INFO", "WARNING", "ERROR", "LATEST", "CVE"), "link": All(str, Length(min=1)), SchemaOptional("advisory"): All(str, Length(min=1)), SchemaOptional("cve_id"): All(str, Length(min=1)), SchemaOptional("cve_name"): All(str, Length(min=1)), SchemaOptional("package_name"): All(str, Length(min=1)), SchemaOptional("version_range"): All(str, Length(min=1)), } ] ) @classmethod def verify_justification_schema(cls, justification: Optional[List[Dict[str, Any]]]) -> bool: """Verify the justification schema is correct.""" if justification is None: return True try: cls._JUSTIFICATION_SCHEMA(justification) except Invalid as exc: raise AdviserJustificationSchemaError(exc.msg) from exc else: return True
class BootPrescription(UnitPrescription): """Boot prescription implementation.""" CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): SchemaAny(str, None), Required("match"): SchemaAny(PRESCRIPTION_BOOT_MATCH_ENTRY_SCHEMA, None), Required("run"): PRESCRIPTION_BOOT_RUN_SCHEMA, } ) @staticmethod def is_boot_unit_type() -> bool: """Check if this unit is of type boot.""" return True @staticmethod def _yield_should_include(unit_prescription: Dict[str, Any]) -> Generator[Dict[str, Any], None, None]: """Yield for every entry stated in the match field.""" match = unit_prescription.get("match", {}) run = unit_prescription.get("run", {}) if isinstance(match, list): for item in match: yield {"package_name": item.get("package_name"), "match": item, "run": run} else: yield {"package_name": match.get("package_name") if match else None, "match": match, "run": run} @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore yield from cls._yield_should_include(prescription) return None yield from () return None def run(self) -> None: """Run main entry-point for boot units.""" super()._run_base()
class Stride2(Stride): """A testing stride implementation.""" CONFIGURATION_DEFAULT = {"foo": None, "package_name": "thamos"} CONFIGURATION_SCHEMA: Schema = Schema({ Required("foo"): object, Required("package_name"): SchemaAny(str, None) }) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Check if this pipeline unit should be included in the pipeline configuration.""" def run(self, state: State) -> Optional[Tuple[float, List[Dict[str, str]]]]: """Run noop method."""
class StridePrescription(UnitPrescription): """Stride base class implementation.""" CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): SchemaAny(str, None), Required("match"): PRESCRIPTION_STRIDE_MATCH_ENTRY_SCHEMA, Required("run"): PRESCRIPTION_STRIDE_RUN_SCHEMA, Required("prescription"): Schema({"run": bool}), } ) @staticmethod def is_stride_unit_type() -> bool: """Check if this unit is of type stride.""" return True @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore yield from cls._yield_should_include_with_state(prescription) return None yield from () return None def run(self, state: State) -> None: """Run main entry-point for strides.""" if not self._run_state(state): return None try: self._run_base() finally: self._configuration["prescription"]["run"] = True
class SolvedSieve(Sieve): """Filter out build time/installation errors of Python packages.""" CONFIGURATION_DEFAULT = {"package_name": None, "without_error": True} CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Required("without_error"): bool }) _JUSTIFICATION_LINK = jl("buildtime_error") _messages_logged = attr.ib(type=Set[Tuple[str, str, str]], factory=set, init=False) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Include solved pipeline sieve for adviser or Dependency Monkey on pipeline creation.""" if not builder_context.is_included(cls): return {} return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" self._messages_logged.clear() super().pre_run() def run( self, package_versions: Generator[PackageVersion, None, None] ) -> Generator[PackageVersion, None, None]: """Filter out packages based on build time/installation issues..""" for package_version in package_versions: package_tuple = package_version.to_tuple() try: has_error = self.context.graph.has_python_solver_error( package_version.name, package_version.locked_version, package_version.index.url, os_name=self.context.project.runtime_environment. operating_system.name, os_version=self.context.project.runtime_environment. operating_system.version, python_version=self.context.project.runtime_environment. python_version, ) except NotFoundError as exc: _LOGGER.debug( "Removing package %r as it was not solved: %s", package_tuple, str(exc), ) continue if has_error and self.configuration["without_error"]: if package_tuple not in self._messages_logged: self._messages_logged.add(package_tuple) message = f"Removing package {package_tuple} due to build time error in the software environment" _LOGGER.warning("%s - see %s", message, self._JUSTIFICATION_LINK) self.context.stack_info.append({ "type": "WARNING", "message": message, "link": self._JUSTIFICATION_LINK, }) continue yield package_version
class AddPackageStepPrescription(StepPrescription): """Add package prescription step unit implementation.""" CONFIGURATION_SCHEMA: Schema = Schema( { Required("package_name"): SchemaAny(str, None), Required("match"): PRESCRIPTION_ADD_PACKAGE_STEP_MATCH_ENTRY_SCHEMA, Required("multi_package_resolution"): bool, Required("run"): SchemaAny(PRESCRIPTION_ADD_PACKAGE_STEP_RUN_SCHEMA, None), Required("prescription"): Schema({"run": bool}), } ) def run(self, state: State, package_version: PackageVersion) -> None: """Run main entry-point for steps to skip packages.""" if not self._index_url_check(self._index_url, package_version.index.url): return None if self._specifier and package_version.locked_version not in self._specifier: return None if self._develop is not None and package_version.develop != self._develop: return None if not self._run_state_with_initiator(state, package_version): return None add_package_version = self.run_prescription["package_version"] add_package_version_name = add_package_version["name"] add_package_version_version = add_package_version["locked_version"][2:] add_package_version_index_url = add_package_version["index_url"] add_package_version_develop = add_package_version["develop"] add_package_version_tuple = ( add_package_version_name, add_package_version_version, add_package_version_index_url, ) resolved = state.resolved_dependencies.get(add_package_version_name) if resolved: if resolved == add_package_version_tuple: _LOGGER.debug( "%s: Not adding package %r as it is already in the resolved listing", self.get_unit_name(), add_package_version_tuple, ) else: _LOGGER.debug( "%s: Not adding package %r as another package %r is already present in the resolved listing", self.get_unit_name(), add_package_version_tuple, resolved, ) return None runtime_env = self.context.project.runtime_environment py_ver = runtime_env.python_version.replace(".", "") # XXX: this could be moved to thoth-common solver_name = f"solver-{runtime_env.operating_system.name}-{runtime_env.operating_system.version}-py{py_ver}" if not self.context.graph.python_package_version_exists( add_package_version_name, add_package_version_version, add_package_version_index_url, solver_name=solver_name, ): _LOGGER.debug( "%s: Not adding package %r as the given package was not solved by %r", self.get_unit_name(), add_package_version_tuple, solver_name, ) return None try: if not self.context.graph.is_python_package_index_enabled(add_package_version_index_url): _LOGGER.debug( "%s: Not adding package %r as index %r is not enabled", self.get_unit_name(), add_package_version_tuple, add_package_version_index_url, ) return None except NotFoundError: _LOGGER.debug( "%s: Not adding package %r as index %r is not known to the resolver", self.get_unit_name(), add_package_version_tuple, add_package_version_index_url, ) return None try: self._run_base() finally: self._configuration["prescription"]["run"] = True pv = PackageVersion( name=add_package_version_name, version=add_package_version["locked_version"], index=Source(add_package_version_index_url), develop=add_package_version_develop, ) self.context.register_package_version(pv) state.add_unresolved_dependency(add_package_version_tuple)
class GenerateScoreStep(Step): """A step that is assigning scores in a deterministic way. This unit can be used to measure assigning score in a deterministic way across multiple runs without a need to store all the score for packages. """ # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema({ SchemaOptional("assign_probability"): float, SchemaOptional("buffer_size"): int, SchemaOptional("package_name"): SchemaAny(str, None), SchemaOptional("seed"): int, SchemaRequired("multi_package_resolution"): bool, }) CONFIGURATION_DEFAULT: Dict[str, Any] = { "assign_probability": 0.75, "buffer_size": 1024, "multi_package_resolution": False, "package_name": None, "seed": 42, } _history = attr.ib(type=Dict[Tuple[str, str, str], float], factory=dict, init=False) _buffer = attr.ib(type=List[float], factory=list, init=False) _idx = attr.ib(type=int, default=0, init=False) def pre_run(self) -> None: """Initialize this pipeline units before each run.""" self._history.clear() self._idx = 0 if not self._buffer: state = random.getstate() random.seed(self.configuration["seed"]) self._buffer = [0.0] * self.configuration["buffer_size"] for i in range(self.configuration["buffer_size"]): self._buffer[i] = ( random.uniform(self.SCORE_MIN, self.SCORE_MAX) if random.random() <= self.configuration["assign_probability"] else 0.0) random.setstate(state) super().pre_run() def post_run(self) -> None: """Print the generated scores on finish to stdout.""" pprint(self._history) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Register self, never.""" yield from () return None def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package.""" package_tuple = package_version.to_tuple() score = self._history.get(package_tuple) if score is not None: return score, None idx = self._idx self._idx = (self._idx + 1) % self.configuration["buffer_size"] self._history[package_tuple] = self._buffer[idx] return self._buffer[idx], None
class Unit(metaclass=abc.ABCMeta): """A base class for implementing pipeline units - strides and steps.""" _CONTEXT: Optional[Context] = None CONFIGURATION_SCHEMA: Schema = Schema( {Required("package_name"): SchemaAny(str, None)}) CONFIGURATION_DEFAULT: Dict[str, Any] = {"package_name": None} unit_run = attr.ib(type=bool, default=False, kw_only=True) _configuration = attr.ib(type=Dict[str, Any], kw_only=True) _RE_CAMEL2SNAKE = re.compile("(?!^)([A-Z]+)") _AICOE_PYTHON_PACKAGE_INDEX_URL = "https://tensorflow.pypi.thoth-station.ninja/index/" _VALIDATE_UNIT_CONFIGURATION_SCHEMA = bool( int(os.getenv("THOTH_ADVISER_VALIDATE_UNIT_CONFIGURATION_SCHEMA", 1))) _DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") @classmethod def get_unit_name(cls) -> str: """Get name of the unit.""" return cls.__name__ @staticmethod def is_boot_unit_type() -> bool: """Check if the unit is of type boot.""" return False @staticmethod def is_pseudonym_unit_type() -> bool: """Check if the unit is of type pseudonym.""" return False @staticmethod def is_sieve_unit_type() -> bool: """Check if the unit is of type sieve.""" return False @staticmethod def is_step_unit_type() -> bool: """Check if the unit is of type step.""" return False @staticmethod def is_stride_unit_type() -> bool: """Check if the unit is of type step.""" return False @staticmethod def is_wrap_unit_type() -> bool: """Check if the unit is of type wrap.""" return False @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" raise NotImplementedError( f"Please implement method to register pipeline unit {cls.get_unit_name()!r} to pipeline configuration" ) @classmethod @contextmanager def assigned_context(cls, context: Context) -> Generator[None, None, None]: """Assign context to all units.""" try: cls._CONTEXT = context yield finally: cls._CONTEXT = None def __attrs_post_init__(self) -> None: """Initialize post-init attributes.""" # Initialize unit_run always to False so the pipeline unit JSON report can be reused across # multiple pipeline unit runs. self.unit_run = False @_configuration.default def _initialize_default_configuration(self) -> Dict[str, Any]: """Initialize default unit configuration based on declared class' default configuration.""" return dict(self.CONFIGURATION_DEFAULT) @property def context(self) -> Context: """Get context in which the unit runs in.""" if self._CONTEXT is None: raise ValueError( "Requesting resolver context outside of resolver run") return self._CONTEXT @property def name(self) -> str: """Get name of this pipeline unit.""" return self.__class__.__name__ @property def configuration(self) -> Dict[str, Any]: """Get configuration of instantiated pipeline unit.""" return self._configuration def update_configuration(self, configuration_dict: Dict[str, Any]) -> None: """Set configuration for a pipeline unit. If setting configuration fails due to schema checks, configuration are kept in an invalid state. """ self.configuration.update(configuration_dict) if self._VALIDATE_UNIT_CONFIGURATION_SCHEMA and self.CONFIGURATION_SCHEMA: _LOGGER.debug("Validating configuration for pipeline unit %r", self.name) try: self.CONFIGURATION_SCHEMA(self.configuration) except Exception as exc: _LOGGER.exception( "Failed to validate schema for pipeline unit %r: %s", self.name, str(exc), ) raise PipelineUnitConfigurationSchemaError(str(exc)) def to_dict(self) -> Dict[str, Any]: """Turn this pipeline unit into its dictionary representation.""" return { "name": self.name, "configuration": self.configuration, "unit_run": self.unit_run } @classmethod def is_aicoe_release(cls, package_version: PackageVersion) -> bool: """Check if the given package-version is AICoE release.""" return bool( package_version.index.url.startswith( cls._AICOE_PYTHON_PACKAGE_INDEX_URL)) @classmethod def get_aicoe_configuration( cls, package_version: PackageVersion) -> Optional[Dict[str, Any]]: """Get AICoE specific configuration encoded in the AICoE index URL.""" if not package_version.index.url.startswith( cls._AICOE_PYTHON_PACKAGE_INDEX_URL): return None index_url = package_version.index.url[ len(cls._AICOE_PYTHON_PACKAGE_INDEX_URL):] conf_parts = index_url.strip("/").split( "/") # the last is always "simple" if len(conf_parts) == 3: # No OS specific release - e.g. manylinux compliant release. if not conf_parts[0].startswith("manylinux"): _LOGGER.error( "Failed to parse a platform tag, unknown AICoE Index URL: %r", package_version.index.url) return None return { "os_name": None, "os_version": None, "configuration": conf_parts[1], "platform_tag": conf_parts[0], } elif len(conf_parts) == 5: # TODO: We have dropped OS-specific builds, so this can go away in future releases... if conf_parts[0] != "os": _LOGGER.error( "Failed to parse operating system specific URL of AICoE index: %r", package_version.index.url) return None return { "os_name": conf_parts[1], "os_version": conf_parts[2], "configuration": conf_parts[3], "platform_tag": None, } _LOGGER.warning( "Failed to parse AICoE specific package source index configuration: %r", package_version.index.url, ) return None @staticmethod def get_base_image( base_image: str, *, raise_on_error: bool = False) -> Optional[Tuple[str, str]]: """Return information about base image used.""" parts = base_image.split(":", maxsplit=1) if len(parts) != 2: if raise_on_error: raise ParseBaseImageError( f"Cannot determine Thoth s2i version information from {base_image}, " "recommendations specific for ABI used will not be taken into account" ) return None thoth_s2i_image_name, thoth_s2i_image_version = parts if thoth_s2i_image_version.startswith("v"): # Not nice as we always prefix with "v" but do not store it with "v" in the database # (based on env var exported and detected in Thoth's s2i). thoth_s2i_image_version = thoth_s2i_image_version[1:] return thoth_s2i_image_name, thoth_s2i_image_version def pre_run(self) -> None: # noqa: D401 """Called before running any pipeline unit with context already assigned. This method should not raise any exception. """ self.unit_run = False def post_run(self) -> None: # noqa: D401 """Called after the resolution is finished. This method should not raise any exception. """ def post_run_report(self, report): # type:('Unit', Union[Report, DependencyMonkeyReport]) -> None """Post-run method run after the resolving has finished - this method is called only if resolving with a report.
class TensorFlowAPISieve(Sieve): """A sieve that makes sure the right TensorFlow release is used based on user's API usage.""" CONFIGURATION_SCHEMA: Schema = Schema( {Required("package_name"): SchemaAny(str)}) CONFIGURATION_DEFAULT = {"package_name": "tensorflow"} _LINK_API = jl("tf_api") _LINK_NO_API = jl("tf_no_api") _PACKAGES_AFFECTED = ("tensorflow", "tensorflow-gpu", "intel-tensorflow", "tensorflow-cpu") _messages_logged = attr.ib(type=Set[str], factory=set, init=False) _no_api_logged = attr.ib(type=bool, default=False, init=False) _acceptable_releases = attr.ib(type=Optional[Set[str]], default=None, init=False) def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" self._messages_logged.clear() self._acceptable_releases = None self._no_api_logged = False super().pre_run() def _pre_compute_releases(self) -> None: """Pre-compute releases that match library usage supplied by the user.""" with open(os.path.join(self._DATA_DIR, "tensorflow", "api.json"), "r") as api_file: known_api = json.load(api_file) self._acceptable_releases = set() tf_api_used = set( i for i in ((self.context.library_usage.get("report") or {} ).get("tensorflow") or []) # type: ignore ) for tf_version, tf_api in known_api.items(): if tf_api_used.issubset(tf_api): self._acceptable_releases.add(tf_version) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Register this pipeline unit for adviser library usage is provided.""" if not builder_context.is_adviser_pipeline( ) or not builder_context.library_usage: return None if builder_context.recommendation_type in (RecommendationType.LATEST, RecommendationType.TESTING): # Use any TensorFlow for testing purposes or when resolving latest stack. return None if "tensorflow" not in (builder_context.library_usage.get("report") or {}): return None # Include this pipeline unit in configurations for various builds. included_units = builder_context.get_included_sieves(cls) if len(included_units) >= len(cls._PACKAGES_AFFECTED): return None return { "package_name": cls._PACKAGES_AFFECTED[len(included_units)], } def run( self, package_versions: Generator[PackageVersion, None, None] ) -> Generator[PackageVersion, None, None]: """Use specific TensorFlow release based on library usage as supplied by the user.""" if self._acceptable_releases is None: self._pre_compute_releases() if not self._acceptable_releases: if not self._no_api_logged: self._no_api_logged = True msg = "No TensorFlow symbols API found in the database that would match TensorFlow symbols used" _LOGGER.warning("%s - see %s", msg, self._LINK_NO_API) self.context.stack_info.append({ "type": "WARNING", "message": msg, "link": self._LINK_NO_API, }) yield from package_versions return for package_version in package_versions: version = ".".join( map(str, package_version.semantic_version.release[:2])) if version in self._acceptable_releases: yield package_version elif version not in self._messages_logged: self._messages_logged.add(version) msg = ( f"Removing TensorFlow {package_version.to_tuple()!r} as it does not provide required symbols " f"in the exposed API") _LOGGER.warning("%s - see %s", msg, self._LINK_API) self.context.stack_info.append({ "type": "WARNING", "message": msg, "link": self._LINK_API })
class StepPrescription(UnitPrescription): """Step base class implementation. Configuration option `multi_package_resolution` states whether a step should be run if package is resolved multiple times for the same stack. """ CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Required("match"): PRESCRIPTION_STEP_MATCH_ENTRY_SCHEMA, Required("multi_package_resolution"): bool, Required("run"): PRESCRIPTION_STEP_RUN_SCHEMA, Required("prescription"): Schema({"run": bool}), }) SCORE_MAX = 1.0 SCORE_MIN = -1.0 _specifier = attr.ib(type=Optional[SpecifierSet], kw_only=True, init=False, default=None) _index_url = attr.ib(type=Optional[str], kw_only=True, init=False, default=None) _develop = attr.ib(type=Optional[bool], kw_only=True, init=False, default=None) @staticmethod def is_step_unit_type() -> bool: """Check if this unit is of type step.""" return True @staticmethod def _yield_should_include( unit_prescription: Dict[str, Any] ) -> Generator[Dict[str, Any], None, None]: """Yield for every entry stated in the match field.""" match = unit_prescription["match"] run = unit_prescription["run"] prescription_conf = {"run": False} if isinstance(match, list): for item in match: yield { "package_name": item["package_version"].get("name"), "multi_package_resolution": run.get("multi_package_resolution", False), "match": item, "run": run, "prescription": prescription_conf, } else: yield { "package_name": match["package_version"].get("name"), "multi_package_resolution": run.get("multi_package_resolution", False), "match": match, "run": run, "prescription": prescription_conf, } @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Check if the given pipeline unit should be included in the given pipeline configuration.""" if cls._should_include_base(builder_context): prescription: Dict[str, Any] = cls._PRESCRIPTION # type: ignore yield from cls._yield_should_include(prescription) return None yield from () return None def pre_run(self) -> None: """Prepare before running this pipeline unit.""" package_version = self.match_prescription.get("package_version", {}) version_specifier = package_version.get("version") if version_specifier: self._specifier = SpecifierSet(version_specifier) self._index_url = package_version.get("index_url") self._prepare_justification_link( self.run_prescription.get("justification", [])) self._develop = package_version.get("develop") super().pre_run() def run( self, state: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Run main entry-point for steps to filter and score packages.""" if not self._index_url_check(self._index_url, package_version.index.url): return None if self._specifier and package_version.locked_version not in self._specifier: return None if self._develop is not None and package_version.develop != self._develop: return None if not self._run_state_with_initiator(state, package_version): return None prescription_conf = self._configuration["prescription"] try: self._run_base() score = self.run_prescription.get("score") if not prescription_conf["run"]: justification = self.run_prescription.get("justification") return score, justification # Provide justification just once per prescription. return score, None finally: prescription_conf["run"] = True
class SolvedSieve(Sieve): """Filter out build time/installation errors of Python packages.""" CONFIGURATION_DEFAULT = {"package_name": None, "without_error": True} CONFIGURATION_SCHEMA: Schema = Schema({ Required("package_name"): SchemaAny(str, None), Required("without_error"): bool }) _JUSTIFICATION_LINK = jl("install_error") _messages_logged = attr.ib(type=Set[Tuple[str, str, str]], factory=set, init=False) _packages_filtered = attr.ib( factory=_default_dict_factory, init=False, ) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Include solved pipeline sieve for adviser or Dependency Monkey on pipeline creation.""" if not builder_context.is_included(cls): yield {} return None yield from () return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" self._messages_logged.clear() self._packages_filtered.clear() super().pre_run() def post_run(self) -> None: """Post-run method for wrapping up the work.""" for package_name, package_items in sorted( self._packages_filtered.items(), key=operator.itemgetter(0)): for index_url, version in sorted(package_items.items(), key=operator.itemgetter(0)): self.context.stack_info.append({ "type": "WARNING", "message": f"The following versions of {package_name!r} from {index_url!r} were " "removed due to installation issues in the target environment: " + ", ".join(version), "link": self._JUSTIFICATION_LINK, }) def run( self, package_versions: Generator[PackageVersion, None, None] ) -> Generator[PackageVersion, None, None]: """Filter out packages based on build time/installation issues..""" for package_version in package_versions: package_tuple = package_version.to_tuple() try: has_error = self.context.graph.has_python_solver_error( package_version.name, package_version.locked_version, package_version.index.url, os_name=self.context.project.runtime_environment. operating_system.name, os_version=self.context.project.runtime_environment. operating_system.version, python_version=self.context.project.runtime_environment. python_version, ) except NotFoundError as exc: _LOGGER.debug( "Removing package %r as it was not solved: %s", package_tuple, exc, ) continue if has_error and self.configuration["without_error"]: if package_tuple not in self._messages_logged: self._messages_logged.add(package_tuple) message = ( f"Removing package {package_tuple} due to installation time error in the software environment" ) _LOGGER.warning("%s - see %s", message, self._JUSTIFICATION_LINK) self._packages_filtered[package_tuple[0]][ package_tuple[2]].append(package_tuple[1]) continue yield package_version
class SolverRulesSieve(Sieve): """A sieve for filtering out Python packages that have rules assigned.""" CONFIGURATION_DEFAULT = {"package_name": None} CONFIGURATION_SCHEMA: Schema = Schema( {Required("package_name"): SchemaAny(str, None)}) _JUSTIFICATION_LINK = jl("rules") _messages_logged = attr.ib(type=Set[Tuple[str, str, str]], factory=set, init=False) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Include pipeline sieve.""" if not builder_context.is_included(cls): yield {} return None yield from () return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" self._messages_logged.clear() super().pre_run() def run( self, package_versions: Generator[PackageVersion, None, None] ) -> Generator[PackageVersion, None, None]: """Filter out packages that have rules assigned.""" for package_version in package_versions: package_tuple = package_version.to_tuple() solver_rules = ( # Rules specific to index. self.context.graph.get_python_package_version_solver_rules_all( package_version.name, package_version.locked_version, package_version.index.url, ) + # Rules agnostic to index. self.context.graph.get_python_package_version_solver_rules_all( package_version.name, package_version.locked_version, )) if solver_rules: if package_tuple not in self._messages_logged: for solver_rule in solver_rules: self._messages_logged.add(package_tuple) message = f"Removing package {package_tuple} based on solver rule configured: {solver_rule}" _LOGGER.warning("%s - see %s", message, self._JUSTIFICATION_LINK) self.context.stack_info.append({ "type": "WARNING", "message": message, "link": self._JUSTIFICATION_LINK, }) continue yield package_version
class SolverRulesSieve(Sieve): """A sieve for filtering out Python packages that have rules assigned.""" CONFIGURATION_DEFAULT = {"package_name": None} CONFIGURATION_SCHEMA: Schema = Schema({Required("package_name"): SchemaAny(str, None)}) _JUSTIFICATION_LINK = jl("rules") _rules_logged = attr.ib(type=Set[int], factory=set, init=False) _messages_logged = attr.ib(type=Set[Tuple[int, str, str, str]], factory=set, init=False) @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Include pipeline sieve.""" if not builder_context.is_included(cls): yield {} return None yield from () return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" self._rules_logged.clear() self._messages_logged.clear() super().pre_run() def run(self, package_versions: Generator[PackageVersion, None, None]) -> Generator[PackageVersion, None, None]: """Filter out packages that have rules assigned.""" for package_version in package_versions: package_tuple = package_version.to_tuple() solver_rules = ( # Rules specific to index. self.context.graph.get_python_package_version_solver_rules_all( package_version.name, package_version.locked_version, package_version.index.url, ) + # Rules agnostic to index. self.context.graph.get_python_package_version_solver_rules_all( package_version.name, package_version.locked_version, ) ) if not solver_rules: yield package_version continue for solver_rule in solver_rules: rule_id, version_range, index_url, description = solver_rule message_logged_entry: Tuple[int, str, str, str] = ( rule_id, package_tuple[0], package_tuple[1], package_tuple[2], ) if message_logged_entry not in self._messages_logged: self._messages_logged.add(message_logged_entry) _LOGGER.warning( "Removing package %r based on solver rule configured: %s", package_tuple, description, ) if rule_id in self._rules_logged: continue self._rules_logged.add(rule_id) message = f"Removing package {package_tuple[0]!r}" message += f" in versions {version_range!r}" if version_range else " in all versions" message += f" from index {index_url!r}" if index_url else " from all registered indexes" message += f" based on rule: {description}" self.context.stack_info.append( { "type": "WARNING", "message": message, "link": self._JUSTIFICATION_LINK, } )