def _assert_valid_input(model, requirements): '''Raises AcumosError if inputs are invalid''' if not isinstance(model, Model): raise AcumosError("Input `model` must be of type {}".format(get_qualname(Model))) if requirements is not None and not isinstance(requirements, Requirements): raise AcumosError("Input `requirements` must be of type {}".format(get_qualname(Requirements)))
def compile_protostr(proto_str, package_name, module_name, out_dir): '''Compiles a Python module from a protobuf definition str and returns the module abspath''' _assert_protoc() with TemporaryDirectory() as tdir: protopath = path_join(tdir, package_name, "{}.proto".format(module_name)) makedirs(dirname(protopath)) with open(protopath, 'w') as f: f.write(proto_str) cmd = "protoc --python_out {tdir} --proto_path {tdir} {protopath}".format( tdir=tdir, protopath=protopath).split() p = Popen(cmd, stderr=PIPE) _, err = p.communicate() if p.returncode != 0: raise AcumosError( "A failure occurred while generating source code from protobuf: {}" .format(err)) gen_module_name = "{}_pb2.py".format(module_name) gen_module_path = path_join(tdir, package_name, gen_module_name) if not isfile(gen_module_path): raise AcumosError( "An unknown failure occurred while generating Python module {}" .format(gen_module_path)) out_module_path = path_join(out_dir, gen_module_name) shutil.copy(gen_module_path, out_module_path) return out_module_path
def _gather_scripts(context, reqs): '''Yields absolute paths of Python script dependencies''' for script in context.scripts: # script can be a python built-in module with no associated file if script.__spec__.origin != 'built-in': yield script.__file__ for script_path in reqs.scripts: script_abspath = abspath(expanduser(script_path)) if not exists(script_abspath): raise AcumosError( "Provided script requirement {} does not exist".format( script_path)) if isdir(script_abspath): globbed_scripts = glob(path_join(script_abspath, _PYGLOB)) if not globbed_scripts: raise AcumosError( "Provided script requirement directory {} does not contain Python scripts" .format(script_path)) else: yield from globbed_scripts elif isfile(script_abspath) and script_abspath.endswith(_PYEXT): yield script_abspath else: raise AcumosError( "Provided script requirement {} is invalid. See acumos.metadata.Requirements for documentation" .format(script_path))
def _wrap_function(f, name=None): '''Returns a function that has its arguments and return wrapped in NameTuple types''' spec = getfullargspec(f) anno = spec.annotations if 'return' not in anno: raise AcumosError("Function {} must have a return annotation".format(f)) for a in spec.args: if a not in anno: raise AcumosError("Function argument {} does not have an annotation".format(a)) if name is None: name = f.__name__ title = ''.join(s for s in name.title().split('_')) field_types = [(a, anno[a]) for a in spec.args] ret_type = anno['return'] args_are_raw = any([is_raw_type(field_type) for field_name, field_type in field_types]) ret_is_raw = is_raw_type(ret_type) if args_are_raw and len(field_types) > 1: raise AcumosError("Cannot process a function with more than 1 argument when using raw types as input") if not args_are_raw: for field_name, field_type in field_types: with reraise('Function {} argument {} is invalid', (name, field_name)): _assert_valid_type(field_type) if not ret_is_raw and ret_type not in (None, NoReturn): if ret_type not in (None, NoReturn): with reraise('Function {} return type {} is invalid', (name, ret_type)): _assert_valid_type(ret_type) wrap_input = True wrap_output = True if args_are_raw or _already_wrapped(field_types): input_type = field_types[0][1] wrap_input = False else: input_type = _create_input_type(title, field_types) with reraise('Function {} wrapped input type is invalid', (name,)): _assert_valid_type(input_type) if ret_is_raw or _is_namedtuple(ret_type): output_type = ret_type wrap_output = False else: output_type = _create_ret_type(title, ret_type) with reraise('Function {} wrapped output type is invalid', (name,)): _assert_valid_type(output_type) wrapper = _get_wrapper(wrap_input, wrap_output) return wrapper(f, input_type, output_type), input_type, output_type
def _wrap_function(f, name=None): '''Returns a function that has its arguments and return wrapped in NameTuple types''' spec = getfullargspec(f) anno = spec.annotations if 'return' not in anno: raise AcumosError( "Function {} must have a return annotation".format(f)) for a in spec.args: if a not in anno: raise AcumosError( "Function argument {} does not have an annotation".format(a)) if name is None: name = f.__name__ title = ''.join(s for s in name.title().split('_')) field_types = [(a, anno[a]) for a in spec.args] ret_type = anno['return'] for field_name, field_type in field_types: with reraise('Function {} argument {} is invalid', (name, field_name)): _assert_valid_type(field_type) if ret_type not in (None, NoReturn): with reraise('Function {} return type {} is invalid', (name, ret_type)): _assert_valid_type(ret_type) if _already_wrapped(field_types): input_type = field_types[0][1] if _is_namedtuple(ret_type): output_type = ret_type wrapped_f = f else: output_type = _create_ret_type(title, ret_type) wrapped_f = _create_wrapper_ret(f, input_type, output_type) else: input_type = _create_input_type(title, field_types) if _is_namedtuple(ret_type): output_type = ret_type wrapped_f = _create_wrapper_args(f, input_type, ret_type) else: output_type = _create_ret_type(title, ret_type) wrapped_f = _create_wrapper_both(f, input_type, output_type) with reraise('Function {} wrapped input type is invalid', (name, )): _assert_valid_type(input_type) with reraise('Function {} wrapped output type is invalid', (name, )): _assert_valid_type(output_type) return wrapped_f, input_type, output_type
def add_module(self, module): '''Adds a module to the context module set''' if isinstance(module, str): try: module = import_module(module) except ImportError: raise AcumosError( "Module '{}' was identified as a dependency, but cannot be imported. Ensure that it is installed and available" .format(module)) elif not isinstance(module, ModuleType): raise AcumosError( "Module must be of type str or types.ModuleType, not {}". format(type(module))) self._modules.add(module)
def create_dataframe(name, df): '''Returns a NamedTuple type corresponding to a pandas DataFrame instance''' import pandas as pd if not isinstance(df, pd.DataFrame): raise AcumosError('Input `df` must be a pandas.DataFrame') dtypes = list(df.dtypes.iteritems()) for field_name, dtype in dtypes: if dtype not in _dtype2prim: raise AcumosError("DataFrame column '{}' has an unsupported type '{}'. Supported types are: {}".format(field_name, dtype, _NUMPY_PRIMITIVES)) field_types = [(n, List[_dtype2prim[dt]]) for n, dt in dtypes] df_type = NamedTuple(name, field_types) return df_type
def _authenticate(auth_api): '''Authenticates and returns the jwt string''' username = environ.get(_USERNAME_VAR) password = environ.get(_PASSWORD_VAR) # user/pass supported for now. use if explicitly provided instead of prompting for token if username and password: headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' } request_body = { 'request_body': { 'username': username, 'password': password } } r = requests.post(auth_api, json=request_body, headers=headers) if r.status_code != 200: raise AcumosError("Authentication failure: {}".format(r.text)) jwt = r.json()['jwtToken'] else: jwt = gettoken('Enter onboarding token: ') return jwt
def reraise(prefix, prefix_args): '''Reraises an exception with a more informative prefix''' try: yield except AcumosError as e: raise AcumosError("{}: {}".format(prefix.format(*prefix_args), e)).with_traceback(e.__traceback__)
def _post_model(files, push_api, auth_api, tries, max_tries, extra_headers, options) -> Optional[str]: '''Attempts to post the model to Acumos, returns the docker image URI''' headers = { 'Authorization': get_jwt(auth_api), 'isCreateMicroservice': 'true' if options.create_microservice else 'false', 'deploy': 'true' if options.deploy else 'false' } if extra_headers is not None: headers.update(extra_headers) resp = requests.post(push_api, files=files, headers=headers) if resp.ok: logger.info("Model pushed successfully to {}".format(push_api)) if options.create_microservice: try: docker_image_uri = resp.json()["dockerImageUri"] logger.info( f"Acumos model docker image successfully created: {docker_image_uri}" ) return docker_image_uri except KeyError: logger.warning( "Docker image URI could not be found in server response, " "on-boarding server is probably running a version prior to Demeter." ) else: clear_jwt() if resp.status_code == 401 and tries != max_tries: logger.warning( 'Model push failed due to an authorization failure. Clearing credentials and trying again' ) _post_model(files, push_api, auth_api, tries + 1, max_tries, extra_headers, options) elif resp.status_code == 500 and tries != max_tries: with ExitStack() as stack: print( "\x1b[31m Warning : Status code 500 received, Trying with the 0.4.0 Clio schema\x1b[39m \n" ) dump_dir = str(files['model'][1]).split('name=')[1].split( 'model.zip')[0].split('\'')[1] meta_clio = stack.enter_context( open(path_join(dump_dir, 'metadata_clio.json'))) model = stack.enter_context( open(path_join(dump_dir, 'model.zip'), 'rb')) proto = stack.enter_context( open(path_join(dump_dir, 'model.proto'))) files_Clio = { 'model': ('model.zip', model, 'application/zip'), 'metadata': ('metadata.json', meta_clio, 'application/json'), 'schema': ('model.proto', proto, 'text/plain') } _post_model(files_Clio, push_api, auth_api, tries + 1, max_tries, extra_headers, options) else: raise AcumosError("Model push failed: {}".format( _ServerResponse(resp.status_code, resp.reason, resp.text)))
def get_context(name=_DEFAULT): '''Returns an existing AcumosContext''' if name in _contexts: return _contexts[name] else: raise AcumosError( "AcumosContext '{}' has not been created".format(name))
def _get_distribution(req_name): '''Returns (name, version) tuple given a requirement''' try: return str(get_distribution(req_name).as_requirement()).split('==') except DistributionNotFound: raise AcumosError( "Module {} was detected as a dependency, but not found as a pip installed package. Use acumos.session.Requirements to declare custom packages or map module names to pip-installable names (e.g. Requirements(req_map=dict(PIL='pillow')) )" .format(req_name))
def _assert_valid_apis(**apis): '''Raises AcumosError if api are invalid''' for param, api in apis.items(): if api is None: raise AcumosError("AcumosSession.push requires that the API for `{}` be provided".format(param)) if not api.startswith('https'): logger.warning("Provided `{}` API {} does not begin with 'https'. Your password and token are visible in plaintext!".format(param, api))
def __init__(self, **kwargs): if not kwargs: raise AcumosError('No functions were provided to Model') self._methods = {name: _create_function(func, name) if not isinstance(func, Function) else func for name, func in kwargs.items()} for k, v in self._methods.items(): setattr(self, k, v)
def _type2proto(t): '''Returns a string corresponding to the protobuf type''' if t in _type_lookup: return _type_lookup[t] elif _is_namedtuple(t) or issubclass(t, Enum): return t.__name__ else: raise AcumosError("Unknown protobuf mapping for type {}".format(t))
def _validate_options(options): '''Validates and returns an `Options` object''' if options is None: options = Options() elif not isinstance(options, Options): raise AcumosError( 'The `options` parameter must be of type `acumos.metadata.Options`' ) return options
def _field2proto(name, type_, index, type_names, rjust=None): '''Returns a protobuf schema field str from a NamedTuple field''' string = None inspected = inspect_type(type_) if type_ in _type_lookup: string = "{} {} = {};".format(_type2proto(type_), name, index) elif _is_namedtuple(type_) or issubclass(inspected.origin, Enum): tn = type_.__name__ if tn not in type_names: raise AcumosError( "Could not build protobuf field using unknown custom type {}". format(tn)) string = "{} {} = {};".format(tn, name, index) elif issubclass(inspected.origin, List): inner = inspected.args[0] if _is_container(inner): raise NestedTypeError( "Nested container {} is not yet supported; try using NamedTuple instead" .format(type_)) string = "repeated {}".format( _field2proto(name, inner, index, type_names, 0)) elif issubclass(inspected.origin, Dict): k, v = inspected.args if any(map(_is_container, (k, v))): raise NestedTypeError( "Nested container {} is not yet supported; try using NamedTuple instead" .format(type_)) string = "map<{}, {}> {} = {};".format(_type2proto(k), _type2proto(v), name, index) if string is None: raise AcumosError( "Could not build protobuf field due to unsupported type {}".format( type_)) if rjust is None: rjust = len(string) + 2 return string.rjust(rjust, ' ')
def _assert_valid_type(t, container=None): '''Raises AcumosError if the input type contains an invalid type''' if t in _VALID_PRIMITIVES: pass elif _is_namedtuple(t): if t.__name__ in _RESERVED_NAMES and t not in _RESERVED_TYPES: raise AcumosError( "NamedTuple {} cannot use a reserved name: {}".format( t, _RESERVED_NAMES)) for tt in t._field_types.values(): _assert_valid_type(tt) elif _is_subclass(t, List): if container is not None: raise AcumosError( "List types cannot be nested within {} types. Use NamedTuple instead" .format(container.__name__)) _assert_valid_type(t.__args__[0], container=List) elif _is_subclass(t, Dict): if container is not None: raise AcumosError( "Dict types cannot be nested within {} types. Use NamedTuple instead" .format(container.__name__)) key_type, value_type = t.__args__ if key_type is not str: raise AcumosError('Dict keys must be str type') _assert_valid_type(value_type, container=Dict) elif _is_subclass(t, Enum): pass else: raise AcumosError( "Type {} is not one of the supported types: {}".format( t, _VALID_TYPES))
def _get_requirement_name(req): '''Returns the str name of a requirement''' if isinstance(req, ModuleType): name = req.__name__ elif isinstance(req, str): name = req else: raise AcumosError( "Requirement {} is invalid; must be ModuleType or string".format( req)) return name
def _DirManager(dir_=None): '''Wrapper that passes dir_ through or creates a temporary directory''' if dir_ is not None: if not isdir(dir_): raise AcumosError( "Provided AcumosContext rootdir {} does not exist".format( dir_)) yield dir_ else: with tempfile.TemporaryDirectory() as tdir: yield tdir
def load_module(fullname, path): '''Imports and returns a module from path''' ver_info = sys.version_info if (3, 3) <= ver_info < (3, 5): return _load_module_py33(fullname, path) elif (3, 5) <= ver_info: return _load_module_py35(fullname, path) else: raise AcumosError( "Attempted to import Python module from path, but Python {} is not supported" .format(ver_info))
def _gather_package_scripts(packages): '''Yields (relpath, abspath) tuples of Python scripts from a sequence of packages''' for path in packages: path = expanduser(path) if not isdir(path): raise AcumosError("Path {} is not a directory".format(path)) for root, dirnames, filenames in walk(path): for filename in fnmatch.filter(filenames, '*.py'): script_abspath = path_join(root, filename) script_relpath = path_join(basename(path), relpath(script_abspath, path)) yield script_relpath, script_abspath
def _require_unique(types): '''Returns a list of unique types. Raises AcumosError if named types are not uniquely defined''' dd = defaultdict(list) for t in types: dd[t.__name__].append(t) for n, l in dd.items(): if len(l) > 1 and not all(_types_equal(l[0], t) for t in l[1:]): raise AcumosError( "Multiple definitions found for type {}: {}".format(n, l)) return [l[0] for l in dd.values()]
def __init__(self, root_dir): if not isdir(root_dir): raise AcumosError( "AcumosContext root directory {} does not exist".format( root_dir)) self._modules = set() self._root_dir = root_dir self._params_path = path_join(root_dir, 'context.json') self.parameters = self._load_params() for mod in _DEFAULT_MODULES: self.add_module(mod)
def _copy_dir(src_dir, outdir, name): '''Copies a directory to a new location''' dst_path = path_join(outdir, name) # Remove metadata_clio.json file if exist clioFile = path_join(src_dir, "metadata__clio.json") if os.path.isfile(clioFile): os.remove(clioFile) if isdir(dst_path): raise AcumosError("Model {} has already been dumped".format(dst_path)) shutil.copytree(src_dir, dst_path)
def _require_unique(types): '''Returns a list of unique types. Raises AcumosError if named types are not uniquely defined''' types_by_name = defaultdict(list) for _type in types: types_by_name[_type.__name__].append(_type) for name, types in types_by_name.items(): if len(types) > 1 and not all( _types_equal(types[0], t) for t in types[1:]): raise AcumosError( "Multiple definitions found for type {}: {}".format( name, types)) return [types[0] for types in types_by_name.values()]
def dump_zip(self, model: Model, name: str, outfile: Union[Path, str], requirements: Optional[Requirements] = None, replace: bool = False): ''' Creates a zipped package located at ``outfile`` Parameters ---------- model : ``acumos.modeling.Model`` An Acumos model instance name : str The name of your model outfile : str or Path The name or path to the ouput .zip requirements : ``acumos.metadata.Requirements``, optional Additional Python dependencies that you can optionally specify replace: bool If the model zip already exists, acumos will fail unless replace is set to True ''' _assert_valid_input(model, requirements) outfile = Path(outfile) if not outfile.name.lower().endswith(".zip"): raise AcumosError("outfile must have a zip extension.") with _dump_model(model, name, requirements) as dump_dir: import zipfile if outfile.exists() and not replace: raise AcumosError( "Model {} has already been dumped, set replace to True to overwrite." .format(outfile)) with zipfile.ZipFile(outfile, 'w') as model_zip: for root, _, files in os.walk(dump_dir): for file in files: model_zip.write(filename=os.path.join(root, file), arcname=file)
def _infer_model_dir(path): '''Returns an absolute path to the model dir. Unzips the model archive if `path` contains it''' model_zip_path = path_join(path, 'model.zip') if isfile(model_zip_path): model_dir = path_join(path, 'model') zip_file = ZipFile(model_zip_path) zip_file.extractall(model_dir) else: model_dir = path pkl_path = path_join(model_dir, 'model.pkl') if not isfile(pkl_path): raise AcumosError("Provided path {} does not contain an Acumos model".format(path)) return model_dir
def AcumosContextManager(rootdir=None, name=_DEFAULT): '''Context manager that provides a AcumosContext object''' with _patch_dill(): if name in _contexts: raise AcumosError( "AcumosContext '{}' has already been created. Use `get_context` to access it." .format(name)) try: with _DirManager(rootdir) as rootdir: context = AcumosContext(rootdir) _contexts[name] = context yield context context.save_params() finally: del _contexts[name]
def _post_model(files, push_api, auth_api, tries, max_tries, extra_headers): '''Attempts to post the model to Acumos''' headers = {'Authorization': get_jwt(auth_api)} if extra_headers is not None: headers.update(extra_headers) r = requests.post(push_api, files=files, headers=headers) if r.status_code == 201: logger.info("Model pushed successfully to {}".format(push_api)) else: clear_jwt() if r.status_code == 401 and tries != max_tries: logger.warning('Model push failed due to an authorization failure. Clearing credentials and trying again') _post_model(files, push_api, auth_api, tries + 1, max_tries, extra_headers) else: raise AcumosError("Model push failed: {}".format(_ServerResponse(r.status_code, r.reason, r.text)))