def check_data(cls, data): cls.check_version(data) cls.check_kind(data) if data[cls.KIND] != cls._SPEC_KIND: raise PolyaxonfileError( "The specification used `{}` is incompatible with the kind `{}`.".format( cls.__name__, data[cls.KIND] ) ) for key in set(data.keys()) - set(cls.SECTIONS): in_specification = "Polyaxonfile" if data.get(cls.VERSION): in_specification = "Polyaxonfile version `{}`".format( data.get(cls.VERSION) ) if data.get(cls.IS_PRESET): in_specification = "Polyaxonfile preset" raise PolyaxonfileError( "Unexpected section `{}` in {}. " "Please check the Polyaxonfile specification " "for this version.".format(key, in_specification) ) for key in cls.REQUIRED_SECTIONS: if data.get(cls.IS_PRESET) and key == cls.VERSION: continue if key not in data: raise PolyaxonfileError( "{} is a required section for a valid Polyaxonfile".format(key) )
def collect_references(config: V1Operation, path_context: str = None): if config.has_component_reference: return config elif config.has_hub_reference: component = ConfigSpec.get_from(config.hub_ref, "hub").read() elif config.has_url_reference: component = ConfigSpec.get_from(config.url_ref, "url").read() elif config.has_path_reference: path_ref = config.path_ref if path_context: path_ref = os.path.join( os.path.dirname(os.path.abspath(path_context)), path_ref) component = ConfigSpec.get_from(path_ref).read() else: raise PolyaxonfileError("Operation found without component") component = get_specification(data=component) if component.kind != kinds.COMPONENT: if config.has_url_reference: ref_type = "Url ref" ref = config.url_ref else: ref_type = "Path ref" ref = config.path_ref raise PolyaxonfileError( "the reference ({}) `{}` is of kind `{}`, it should be a `{}`". format(ref, ref_type, component.kind, kinds.COMPONENT)) config.component = component if component.is_dag_run: component.run.collect_components() return config
def parse_params(params, is_cli: bool = True): if isinstance(params, Mapping): return params parsed_params = {} for param in params: index = param.find("=") if index == -1: message = ( "Invalid format for -P parameter: '%s'. Use -P name=value." % param) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) name = param[:index] value = param[index + 1:] if name in parsed_params: message = "Repeated parameter: '%s'" % name if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) parsed_params[name] = {"value": value} return parsed_params
def check_kind(cls, data): if cls.KIND not in data: raise PolyaxonfileError( "The Polyaxonfile `kind` must be specified.") if data[cls.KIND] not in kinds.KINDS: raise PolyaxonfileError( "The Polyaxonfile with kind `{}` is not a supported value.". format(data[cls.KIND]))
def check_version(cls, data): if cls.VERSION not in data: raise PolyaxonfileError( "The Polyaxonfile `version` must be specified.") if not cls.MIN_VERSION <= data[cls.VERSION] <= cls.MAX_VERSION: raise PolyaxonfileError( "The Polyaxonfile's version specified is not supported by your current CLI." "Your CLI support Polyaxonfile versions between: {} <= v <= {}." "You can run `polyaxon upgrade` and " "check documentation for the specification.".format( cls.MIN_VERSION, cls.MAX_VERSION))
def get_op_specification(self, params=None, profile=None, queue=None, nocache=None): job_data = {"version": self.specification.version, "kind": kinds.OP} if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params) ) job_data["params"] = params if profile: job_data["profile"] = profile if queue: job_data["queue"] = queue if nocache is not None: job_data["nocache"] = nocache if self.specification.is_op: specification = get_specification( data=[self.specification.config.to_dict(), job_data] ) else: job_data["component"] = self.specification.config.to_dict() specification = get_specification(data=[job_data]) # Sanity check if params were passed run_spec = get_specification(specification.generate_run_data()) run_spec.validate_params(params=params, is_template=False) if run_spec.has_dag_run: run_spec.apply_context() return specification
def get_op_specification( self, params=None, profile=None, queue=None, nocache=None ) -> V1Operation: job_data = {"version": self.config.version, "kind": kinds.OPERATION} if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params) ) job_data["params"] = params if profile: job_data["profile"] = profile if queue: job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if self.config.kind == kinds.OPERATION: config = get_specification(data=[self.config.to_dict(), job_data]) else: job_data["component"] = self.config.to_dict() config = get_specification(data=[job_data]) params = copy.deepcopy(config.params) # Sanity check if params were passed run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_context(run_config) return config
def __init__(self, filepaths): filepaths = to_list(filepaths) for filepath in filepaths: if not os.path.isfile(filepath): raise PolyaxonfileError("`{}` must be a valid file".format(filepath)) self._filenames = [os.path.basename(filepath) for filepath in filepaths] self.specification = get_specification(data=reader.read(filepaths))
def validate_keys(section, config, section_data): extra_args = [ key for key in section_data.keys() if key not in config.SCHEMA().fields ] if extra_args: raise PolyaxonfileError( "Extra arguments passed for `{}`: {}".format(section, extra_args) )
def check_data(self, data=None): data = data or self._data self.check_version(data) self.check_kind(data) if data[self.KIND] != self._SPEC_KIND: raise PolyaxonfileError( "The specification used `{}` is incompatible with the kind `{}`." .format(self.__class__.__name__, data[self.KIND])) for key in set(six.iterkeys(data)) - set(self.SECTIONS): raise PolyaxonfileError( "Unexpected section `{}` in Polyaxonfile version `{}`. " "Please check the Polyaxonfile specification " "for this version.".format(key, data[self.VERSION])) for key in self.REQUIRED_SECTIONS: if key not in data: raise PolyaxonfileError( "{} is a required section for a valid Polyaxonfile".format( key))
def _apply_run_context(self): params = self.validate_params(is_template=False, check_runs=True) for param in params: if param.entity_ref: raise PolyaxonfileError( "apply_context recieved a non-resolved " "ref param `{}` with value `{}`".format( param.name, param.value)) params = {param.name: param for param in params} return self._parse(params)
def collect_references(config: V1Operation): if config.has_component_reference or config.has_hub_reference: return config elif config.has_url_reference: component = ConfigSpec.get_from(config.url_ref, "url").read() elif config.has_path_reference: component = ConfigSpec.get_from(config.path_ref).read() else: raise PolyaxonfileError("Operation found without component") component = get_specification(data=component) config.component = component return config
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub: str = None, params: Dict = None, profile: str = None, queue: str = None, nocache: bool = None, path_context: str = None, validate_params: bool = True, ) -> V1Operation: job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION, } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params) ) job_data["params"] = params if profile: job_data["profile"] = profile if queue: # Check only get_queue_info(queue) job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if config and config.kind == kinds.COMPONENT: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) elif config and config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) elif hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) if hub and config.hub_ref is None: config.hub_ref = hub hub = config.hub_ref public_hub = config.has_public_hub_reference params = copy.deepcopy(config.params) # Sanity check if params were passed and we are not dealing with a hub component if validate_params and not (hub and not public_hub): run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: run_config.run.set_path_context(path_context) CompiledOperationSpecification.apply_operation_contexts(run_config) return config
def validate_params( self, params=None, context=None, is_template=True, check_runs=False ): try: return ops_params.validate_params( inputs=self.config.inputs, outputs=self.config.outputs, params=params, context=context, is_template=is_template, check_runs=check_runs, ) except ValidationError as e: raise PolyaxonfileError("Params validation error: `{}`".format(e))
def __init__(self, values): self._values = to_list(values) self._data = reader.read( [{"kind": self._SPEC_KIND, "version": SCHEMA_VERSION}] + self._values ) try: self._config = self.CONFIG.from_dict(copy.deepcopy(self.data)) except (ValidationError, TypeError) as e: raise PolyaxonfileError( "Received a non valid config `{}`: `{}`".format(self._SPEC_KIND, e) ) self.check_data() self._extra_validation()
def _apply_run_context(cls, config: V1CompiledOperation) -> V1CompiledOperation: param_specs = config.validate_params(is_template=False, check_runs=True) for param_spec in param_specs: if not param_spec.param.is_literal: raise PolyaxonfileError( "apply_context received a non-resolved " "ref param `{}` with value `{}`".format( param_spec.name, param_spec.param.to_dict())) param_specs = { param_spec.name: param_spec for param_spec in param_specs } return cls._parse(config, param_specs)
def calculate_context_spec( cls, config: V1CompiledOperation, contexts: Dict = None, should_be_resolved: bool = False, ) -> Dict[str, ParamSpec]: param_spec = config.validate_params(is_template=False, check_runs=True) if should_be_resolved: for p_spec in param_spec: if not p_spec.param.is_literal: raise PolyaxonfileError( "calculate_context_spec received a non-resolved " "ref param `{}` with value `{}`".format( p_spec.name, p_spec.param.to_dict() ) ) param_spec = {param.name: param for param in param_spec} param_spec.update(cls.dict_to_param_spec(contexts=contexts)) return param_spec
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub=None, params=None, profile=None, queue=None, nocache=None, ) -> V1Operation: job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params)) job_data["params"] = params if profile: job_data["profile"] = profile if queue: job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) elif config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) else: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) params = copy.deepcopy(config.params) # Sanity check if params were passed and we are not dealing with a hub component if not hub: run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_context(run_config) return config
def check_polyaxonfile( polyaxonfile: str = None, python_module: str = None, url: str = None, hub: str = None, params=None, profile=None, queue=None, nocache=None, log=True, is_cli: bool = True, to_op: bool = True, ): if sum([1 for i in [polyaxonfile, python_module, url, hub] if i]) > 1: message = ( "You can only use one and only one option: " "hub, url, module, or path ro polyaxonfile.".format(hub) ) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) if not any([polyaxonfile, python_module, url, hub]): polyaxonfile = check_default_path(path=".") if not any([polyaxonfile, python_module, url, hub]): polyaxonfile = "" if hub and not to_op: message = "Something went wrong, calling hub component `{}` without operation.".format(hub) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) polyaxonfile = to_list(polyaxonfile, check_none=True) parsed_params = None if params: parsed_params = parse_params(params, is_cli=is_cli) if not any([os.path.isfile(f) for f in polyaxonfile]) and not any( [python_module, url, hub] ): message = ( "Please pass a valid polyaxonfile, a python module, url, or component name" ) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) try: plx_file = None if not hub: if python_module: plx_file = ConfigSpec.get_from(python_module, config_type=".py").read() elif url: plx_file = ConfigSpec.get_from(url, "url").read() else: plx_file = ConfigSpec.read_from(polyaxonfile) plx_file = get_specification(data=plx_file) if plx_file.kind == kinds.OPERATION: plx_file = collect_references(plx_file) if to_op or hub: plx_file = get_op_specification( hub=hub, config=plx_file, params=parsed_params, profile=profile, queue=queue, nocache=nocache, ) if log and not is_cli: Printer.print_success("Polyaxonfile valid") return plx_file except Exception as e: message = "Polyaxonfile is not valid." if is_cli: handle_cli_error(e, message=message, sys_exit=True) else: raise PolyaxonfileError(message) from e
def check_polyaxonfile( polyaxonfile: str = None, python_module: str = None, url: str = None, hub: str = None, params: Dict = None, presets: List[str] = None, queue: str = None, nocache: bool = None, cache: bool = None, verbose: bool = True, is_cli: bool = True, to_op: bool = True, validate_params: bool = True, eager: bool = False, git_init: V1Init = None, ignore_template: bool = False, ): if sum([1 for i in [python_module, url, hub] if i]) > 1: message = ("You can only use one and only one option: " "hub, url, or a python module.".format(hub)) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) if not any([polyaxonfile, python_module, url, hub]): polyaxonfile = check_default_path(path=".") if not any([polyaxonfile, python_module, url, hub]): message = ( "Something went wrong, `check_polyaxonfile` was called without a polyaxonfile, " "a hub component reference, a url or a python module.") if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) if hub and not to_op: message = "Something went wrong, calling hub component `{}` without operation.".format( hub) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) polyaxonfile = to_list(polyaxonfile, check_none=True) parsed_params = None if params: parsed_params = parse_params(params, is_cli=is_cli) if not any([os.path.isfile(f) for f in polyaxonfile]) and not any([python_module, url, hub]): message = "Please pass a valid polyaxonfile, a python module, a url, or a component name" if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) try: path_context = None if python_module: path_context = python_module plx_file = (ConfigSpec.get_from(python_module, config_type=".py").read().to_dict( include_kind=True, include_version=True)) elif url: plx_file = ConfigSpec.get_from(url, "url").read() elif hub: plx_file = ConfigSpec.get_from(hub, "hub").read() else: path_context = polyaxonfile.pop(0) plx_file = ConfigSpec.read_from(path_context) plx_file = get_specification(data=plx_file) if plx_file.kind == kinds.OPERATION: plx_file = collect_references(plx_file, path_context) plx_component = plx_file.component else: plx_component = plx_file if plx_component.is_dag_run: collect_dag_components(plx_component.run, path_context) if to_op or hub: plx_file = get_op_specification( hub=hub, config=plx_file, params=parsed_params, presets=presets, queue=queue, nocache=nocache, cache=cache, validate_params=validate_params, preset_files=polyaxonfile, git_init=git_init, ) if verbose and is_cli: Printer.print_success("Polyaxonfile valid") if ignore_template: plx_file.disable_template() if plx_file.is_template(): template_message = "This polyaxonfile was marked as template by the owner:" if plx_file.template.description: template_message += "\ntemplate description: {}".format( plx_file.template.description) if plx_file.template.fields: template_message += "\ntemplate fields that need changes: {}".format( plx_file.template.fields) Printer.print_warning(template_message) if eager: is_supported_in_eager_mode(spec=plx_file) return plx_file except Exception as e: message = "Polyaxonfile is not valid." if is_cli: handle_cli_error(e, message=message, sys_exit=True) else: raise PolyaxonfileError(message) from e
def check_polyaxonfile( polyaxonfile: str = None, python_module: str = None, url: str = None, hub: str = None, params: Dict = None, profile: str = None, queue: str = None, nocache: bool = None, verbose: bool = True, eager_hub: bool = True, is_cli: bool = True, to_op: bool = True, validate_params: bool = True, eager: bool = False, ): if sum([1 for i in [polyaxonfile, python_module, url, hub] if i]) > 1: message = ("You can only use one and only one option: " "hub, url, module, or path or polyaxonfile.".format(hub)) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) if not any([polyaxonfile, python_module, url, hub]): polyaxonfile = check_default_path(path=".") if not any([polyaxonfile, python_module, url, hub]): polyaxonfile = "" if hub and not to_op: message = "Something went wrong, calling hub component `{}` without operation.".format( hub) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) polyaxonfile = to_list(polyaxonfile, check_none=True) parsed_params = None if params: parsed_params = parse_params(params, is_cli=is_cli) if not any([os.path.isfile(f) for f in polyaxonfile]) and not any([python_module, url, hub]): message = ( "Please pass a valid polyaxonfile, a python module, url, or component name" ) if is_cli: Printer.print_error(message, sys_exit=True) else: raise PolyaxonfileError(message) try: plx_file = None path_context = None public_hub = hub and "/" not in hub if not hub or (public_hub and eager_hub): if python_module: path_context = python_module plx_file = (ConfigSpec.get_from( python_module, config_type=".py").read().to_dict(include_kind=True, include_version=True)) elif url: plx_file = ConfigSpec.get_from(url, "url").read() elif hub: plx_file = ConfigSpec.get_from(hub, "hub").read() else: path_context = polyaxonfile[0] plx_file = ConfigSpec.read_from(polyaxonfile) plx_file = get_specification(data=plx_file) if plx_file.kind == kinds.OPERATION: plx_file = collect_references(plx_file, path_context) if to_op or hub: plx_file = get_op_specification( hub=hub, config=plx_file, params=parsed_params, profile=profile, queue=queue, nocache=nocache, path_context=path_context, validate_params=validate_params, ) if verbose and is_cli: Printer.print_success("Polyaxonfile valid") if eager: is_supported_in_eager_mode(spec=plx_file) return plx_file except Exception as e: message = "Polyaxonfile is not valid." if is_cli: handle_cli_error(e, message=message, sys_exit=True) else: raise PolyaxonfileError(message) from e
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub: str = None, params: Dict = None, presets: List[str] = None, queue: str = None, nocache: bool = None, cache: bool = None, validate_params: bool = True, preset_files: List[str] = None, git_init: V1Init = None, ) -> V1Operation: if cache and nocache: raise PolyaxonfileError("Received both cache and nocache") job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION, } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params)) job_data["params"] = params if presets: job_data["presets"] = presets if queue: # Check only get_queue_info(queue) job_data["queue"] = queue if cache: job_data["cache"] = {"disable": False} if nocache: job_data["cache"] = {"disable": True} if config and config.kind == kinds.COMPONENT: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) elif config and config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) elif hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) if hub and config.hub_ref is None: config.hub_ref = hub # Check if there's presets for preset_plx_file in preset_files: preset_plx_file = OperationSpecification.read(preset_plx_file, is_preset=True) config = config.patch(preset_plx_file, strategy=preset_plx_file.patch_strategy) # Turn git_init to a pre_merge preset if git_init: git_preset = V1Operation(run_patch={"init": [git_init.to_dict()]}, is_preset=True) config = config.patch(git_preset, strategy=V1PatchStrategy.PRE_MERGE) # Sanity check if params were passed and we are not dealing with a hub component params = copy.deepcopy(config.params) if validate_params: # Avoid in-place patch run_config = get_specification(config.to_dict()) run_config = OperationSpecification.compile_operation(run_config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_operation_contexts(run_config) return config
def _parse_graph(cls, spec, graph, params): # noqa, too-many-branches input_layers = to_list(graph["input_layers"]) layer_names = set(input_layers) tags = {} layers = [] outputs = [] layers_counters = defaultdict(int) unused_layers = set(input_layers) if not isinstance(graph["layers"], list): raise PolyaxonfileError( "Graph definition expects a list of layer definitions." ) def add_tag(tag, layer_value): if tag in tags: tags[tag] = to_list(tags[tag]) tags[tag].append(layer_value["name"]) else: tags[tag] = layer_value["name"] def get_layer_name(layer_value, layer_type): if "name" not in layer_value: layers_counters[layer_type] += 1 return "{}_{}".format(layer_type, layers_counters[layer_type]) return layer_value["name"] layers_params = {} layers_params.update(params) last_layer = None first_layer = True for layer_expression in graph["layers"]: parsed_layer = cls.parse_expression( spec, layer_expression, layers_params, True ) # Gather all tags from the layers parsed_layer = to_list(parsed_layer) for layer in parsed_layer: if not layer: continue layer_type, layer_value = list(six.iteritems(layer))[0] if layer_value is None: layer_value = {} # Check that the layer has a name otherwise generate one name = get_layer_name(layer_value, layer_type) if name not in layer_names: layer_names.add(name) layer_value["name"] = name else: raise PolyaxonfileError( "The name `{}` is used 2 times in the graph. " "All layer names should be unique. " "If you need to reference a layer in a for loop " "think about using `tags`".format(name) ) for tag in to_list(layer_value.get("tags", [])): add_tag(tag, layer_value) # Check if the layer is an output if layer_value.get("is_output", False) is True: outputs.append(layer_value["name"]) else: # Add the layer to unused unused_layers.add(layer_value["name"]) # Check the layers inputs if not layer_value.get("inbound_nodes"): if last_layer is not None: layer_value["inbound_nodes"] = [ last_layer["name"] # noqa, unsubscriptable-object ] if first_layer and len(input_layers) == 1: layer_value["inbound_nodes"] = input_layers if first_layer and len(input_layers) > 1: raise PolyaxonfileError( "The first layer must indicate which input to use," "You have {} layers: {}".format( len(input_layers), input_layers ) ) first_layer = False for input_layer in layer_value.get("inbound_nodes", []): if input_layer not in layer_names: raise PolyaxonfileError( "The layer `{}` has a non existing " "inbound node `{}`".format(layer_value["name"], input_layer) ) if input_layer in unused_layers: unused_layers.remove(input_layer) # Add layer layers.append({layer_type: layer_value}) # Update layers_params layers_params["tags"] = tags # Update last_layer last_layer = layer_value # Add last layer as output if last_layer: if last_layer["name"] not in outputs: outputs.append(last_layer["name"]) # Remove last layer from unused layers if last_layer["name"] in unused_layers: unused_layers.remove(last_layer["name"]) # Check if some layers are unused if unused_layers: raise PolyaxonfileError( "These layers `{}` were declared but are not used.".format( unused_layers ) ) return { "input_layers": to_list(graph["input_layers"]), "layers": layers, "output_layers": outputs, }