def confidentialAuditParam (self): import yaml original_method = self.param.getVal('model') if self.param.getVal ('quantitative'): if original_method != 'PLSR': self.param.setVal('model', 'PLSR') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, ' f'the original method {original_method} was not suitable to build confidential models') else: if original_method != 'PLSDA': self.param.setVal('model', 'PLSDA') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, ' f'the original method {original_method} was not suitable to build confidential models') # TODO: conformal support if self.param.getVal('conformal'): self.param.setVal('conformal', False) LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. ' 'Conformal models are not supported for now in confidential models') parameters_file_path = utils.model_path(self.model, 0) parameters_file_name = os.path.join (parameters_file_path, 'parameters.yaml') with open(parameters_file_name, 'w') as pfile: yaml.dump (self.param.p, pfile)
def action_documentation(model, version=None, doc_file=None, oformat='YAML'): ''' Returns an object with whole results info for a given model and version ''' if model is None: return False, 'Empty model label' from flame.documentation import Documentation # get de model repo path rdir = utils.model_path(model, version) if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): return False, 'Info file not found' doc = Documentation(model, version) # INPUT, the doc_file will be applied to doc as a delta if doc_file is not None: success, message = doc.delta(model, version, doc_file, iformat=oformat) return success, message # CONSOLE PRINTING if oformat == 'YAML': yaml = doc.dumpYAML() for line in yaml: line = line.encode("ascii", "ignore") line = line.decode("ascii", "ignore") print(line) return True, 'parameters listed' # OUTPUT, the doc will be returened as a documentation object return True, doc
def verify(endpoint, version=None): result = {} success, result['documentation'] = verify_documentation(endpoint, version) #success, result['model'] = verify_model(endpoint, version) if not success: return False, result success, result['data'] = verify_data(endpoint, version) if not success: return False, result success, result['prediction'] = verify_prediction(endpoint, version) if not success: return False, result meta_path = utils.model_path(endpoint, version) verification_file = os.path.join(meta_path, 'verification.pkl') #Save in the model folder verification.pkl file = open(verification_file, "wb") pickle.dump(result, file) file.close() LOG.info(f'Save verification.pkl file \n') show_result(result) return True, result
def action_model_template(model, version=None, doc_file=None): ''' Returns a TSV model reporting template ''' from flame.documentation import Documentation documentation = Documentation(model, version, context='model') if not model: return False, 'Empty model label' # get de model repo path rdir = utils.model_path(model, version) if not os.path.isfile(os.path.join(rdir, 'results.pkl')): # compatibity method. use info.pkl if not os.path.isfile(os.path.join(rdir, 'info.pkl')): return False, 'Info file not found' else: # new method, use results.pkl if not os.path.isfile(os.path.join(rdir, 'results.pkl')): return False, 'Info file not found' if doc_file is not None: # use the param_file to update existing parameters at the model # directory and save changes to make them persistent success, message = documentation.delta(model, 0, doc_file, iformat='YAML') print(success, message) documentation.get_upf_template2() return True, 'Model documentation template created'
def delta(self, model, version, param_file, iformat='YAML'): ''' load a set of parameters from the configuration file present at the model directory also, inserts the keys present in the param_file provided, assuming that it contains a YAML-compatible format, like the one generated by manage adds some parameters identifying the model and the hash of the configuration file ''' if not self.loadYaml(model, version): return False, 'file not found' # parse parameter file assuning it will be in # a YAML-compatible format try: with open(param_file, 'r') as pfile: if iformat == 'YAML': newp = yaml.load(pfile) elif iformat == 'JSON': newp = json.load(pfile) except Exception as e: return False, e # update interna dict with keys in the input file (delta) black_list = [ 'param_format', 'version', 'model_path', 'endpoint', 'md5' ] for key in newp: if key not in black_list: val = newp[key] # YAML define null values as 'None, which are interpreted # as strings if val == 'None': val = None if isinstance(val, dict): for inner_key in val: inner_val = val[inner_key] self.setInnerVal(key, inner_key, inner_val) #print ('@delta: adding',key, inner_key, inner_val) else: self.setVal(key, val) #print ('@delta: adding',key,val,type(val)) # dump internal dict to the parameters file parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(self.p, pfile) except Exception as e: return False, 'unable to write parameters' return True, 'OK'
def verify_prediction(endpoint, version=None): meta_path = utils.model_path(endpoint, version) training_file = os.path.join(meta_path, 'training_series') if not os.path.isfile(training_file): return True, {'status': 'Failed', 'comments': '', 'Information': []} return True, {'status': 'Passed', 'comments': '', 'Information': []}
def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint # path to endpoint endpoint = utils.model_path(self.model, self.version) if not os.path.isdir(endpoint): self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}') #LOG.error(f'Unable to find model {self.model}') if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath+".idata_child") apply_child = importlib.import_module(modpath+".apply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent') apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
def saveJSON(self, model, version, input_JSON): p = json.load(input_JSON) parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(p, pfile) except Exception as e: return False return True
def loadYaml(self, model, version, isSpace=False): ''' load a set of parameters from the configuration file present at the model directory adds some parameters identifying the model and the hash of the configuration file ''' # obtain the path and the default name of the model parameters if isSpace: parameters_file_path = utils.space_path(model, version) else: parameters_file_path = utils.model_path(model, version) if not os.path.isdir(parameters_file_path): return False, f'Model "{model}", version "{version}" not found' parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') # load the main class dictionary (p) from this yaml file if not os.path.isfile(parameters_file_name): return False, 'Parameters file not found' try: with open(parameters_file_name, 'r') as pfile: self.p = yaml.safe_load(pfile) except Exception as e: return False, e # check version of the parameter file # no 'version' key mans version < 2.0 if 'param_format' in self.p: self.extended = True else: self.extended = False self.param_format = 1.0 # # correct CV to kfold for conformal models # if self.getVal('conformal') is True: # self.setVal('ModelValidationCV','kfold') if self.getVal('model') == 'majority': self.setVal('conformal', False) # add keys for the model and a MD5 hash self.setVal('endpoint', model) self.setVal('version', version) self.setVal('model_path', parameters_file_path) # self.setVal('md5',utils.md5sum(parameters_file_name)) self.setVal('md5', self.idataHash()) return True, 'OK'
def run(self, input_source): ''' Executes a default predicton workflow ''' results = {} # path to endpoint epd = utils.model_path(self.model, 0) if not os.path.isdir(epd): LOG.error(f'Unable to find model {self.model}') results['error'] = 'unable to find model: '+self.model if 'error' not in results: # uses the child classes within the 'model' folder, # to allow customization of the processing applied to each model modpath = utils.module_path(self.model, 0) idata_child = importlib.import_module(modpath+".idata_child") learn_child = importlib.import_module(modpath+".learn_child") odata_child = importlib.import_module(modpath+".odata_child") LOG.debug('child modules imported: ' f' {idata_child.__name__},' f' {learn_child.__name__},' f' {odata_child.__name__}') # run idata object, in charge of generate model idata = idata_child.IdataChild(self.parameters, input_source) results = idata.run() LOG.debug(f'idata child {idata_child.__name__} completed `run()`') if 'error' not in results: if 'xmatrix' not in results: LOG.error(f'Failed to compute MDs') results['error'] = 'Failed to compute MDs' if 'ymatrix' not in results: LOG.error(f'No activity data (Y) found in training series') results['error'] = 'No activity data (Y) found in training series' if 'error' not in results: # run learn object, in charge of generate a prediction from idata learn = learn_child.LearnChild(self.parameters, results) results = learn.run() LOG.debug(f'learn child {learn_child.__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # results and odata will take case of showing an error message odata = odata_child.OdataChild(self.parameters, results) LOG.info('Building completed') return odata.run()
def verify_documentation(endpoint, version=None): ''' Check that the required fields are completed ''' blacklist = [ 'Species', 'Limits_applicability', 'Experimental_protocol', 'location', 'description', 'endpoint_positive', 'endpoint_negative', 'raw_data_url', 'test_set_size', 'training_set_url', 'test_set_url', 'bootstrap', 'ccp_alpha', 'criterion', 'max_depth', 'max_features', 'max_leaf_nodes', 'max_samples', 'min_impurity_decrease', 'min_impurity_split', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_jobs', 'oob_score', 'random_state', 'verbose', 'warm_start', 'confidence', 'ACP_sampler', 'KNN_NN', 'aggregated', 'aggregation_function', 'conformal_predictors', 'normalizing_model', 'Conformal_mean_interval', 'Conformal_accuracy', 'Q2', 'SDEP', 'Comments', 'Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates', 'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', 'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint', 'Endpoint_data_quality_and_variability', 'Descriptor_selection', 'Internal_validation_2', 'External_validation' ] if endpoint is None: return False, 'Empty model label' # get de model repo path rdir = utils.model_path(endpoint, version) if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): return False, 'Info file not found' doc = Documentation(endpoint, version) fields = [field for field in doc.empty_fields() if field not in blacklist] if fields: result = { 'status': 'Failed', 'comments': 'fields not completed', 'Information': fields } else: result = { 'status': 'Passed', 'comments': 'All fields required are completed', 'Information': [] } return True, result
def build_cmd(model, output_format=None): ''' Instantiates a Build object to build a model using the given input file and model. This method must be self-contained and suitable for being called in cascade, by models which use the output of other models as input ''' # safety check if model exists repo_path = pathlib.Path(utils.model_repository_path()) model_list = os.listdir(repo_path) if model['endpoint'] not in model_list: LOG.error('endpoint name not found in model repository.') raise ValueError('Wrong endpoint name. ' f"{model['endpoint']} does not exist") build = Build(model['endpoint'], output_format) ext_input, model_set = build.get_model_set() if ext_input: success, model_res = get_external_input(build, model_set, model['infile']) if not success: return False, model_res # now run the model using the data from the external sources success, results = build.run(model_res) else: ifile = model['infile'] if not os.path.isfile(ifile): return False, 'wrong training series file' epd = utils.model_path(model['endpoint'], 0) lfile = os.path.join(epd, os.path.basename(ifile)) try: shutil.copy(ifile, lfile) except shutil.SameFileError: LOG.warning('Building model with the input SDF' f' present in model folder {lfile}') # run the model with the input file success, results = build.run(lfile) return success, results
def run(self, input_source): ''' Executes a default predicton workflow ''' results = {} # path to endpoint endpoint = utils.model_path(self.model, self.version) if not os.path.isdir(endpoint): LOG.debug('Unable to find model' ' {} version {}'.format(self.model, self.version)) results['error'] = 'unable to find model: ' + \ self.model+' version: '+str(self.version) if 'error' not in results: # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath + ".idata_child") apply_child = importlib.import_module(modpath + ".apply_child") odata_child = importlib.import_module(modpath + ".odata_child") LOG.debug('child modules imported: ' f' {idata_child.__name__},' f' {apply_child.__name__},' f' {odata_child.__name__}') # run idata object, in charge of generate model data from input idata = idata_child.IdataChild(self.parameters, input_source) results = idata.run() LOG.debug(f'idata child {idata_child.__name__} completed `run()`') if 'error' not in results: if 'xmatrix' not in results: LOG.debug(f'Failed to compute MDs') results['error'] = 'Failed to compute MDs' if 'error' not in results: # run apply object, in charge of generate a prediction from idata apply = apply_child.ApplyChild(self.parameters, results) results = apply.run() LOG.debug(f'apply child {apply_child.__name__} completed `run()`') # run odata object, in charge of formatting the prediction results or any error odata = odata_child.OdataChild(self.parameters, results) LOG.info('Prediction completed') return odata.run()
def action_label(model, version=None, labels=None, oformat='text'): ''' Returns / sets the model labels ''' if model is None: return False, 'Empty model label' # get de model repo path rdir = utils.model_path(model, version) if not os.path.isdir(rdir): return False, f'Model {model} not found' if labels is not None: if oformat == 'JSONS': try: p = json.loads(labels) except Exception as e: return False, str(e) else: try: with open(labels, 'r') as fi: p = yaml.safe_load(fi) except Exception as e: return False, e for ikey in p: if len(p[ikey]) > 15: return False, f'labels should be shorter than 15 chars. Label "{ikey} : {p[ikey]}" is {len(p[ikey])} chars long' elif len(p[ikey]) < 2: return False, f'labels should be longer than 2 chars. Label "{ikey} : {p[ikey]}" is {len(p[ikey])} chars long' try: with open(os.path.join(rdir, 'model-labels.pkl'), 'wb') as fo: pickle.dump(p, fo) except Exception as e: return False, e # open labels try: with open(os.path.join(rdir, 'model-labels.pkl'), 'rb') as fi: p = pickle.load(fi) except Exception as e: return False, e if oformat == 'text': for ikey in p: print(f'{ikey}\t{p[ikey]}') return True, 'success' return True, p
def get_verification(endpoint, version): ''' Retrieves the model verification if it exists ''' verification = False meta_path = utils.model_path(endpoint, version) verification_file = os.path.join(meta_path, 'verification.pkl') if os.path.isfile(verification_file): file = open(verification_file, "rb") verification = pickle.load(file) file.close() return True, verification return False
def action_series(model, version): ''' Returns the training series used for building a certain model /version ''' if model is None: return False, 'Empty model label' meta_path = utils.model_path(model, version) training_file = os.path.join(meta_path, 'training_series') if not os.path.isfile(training_file): return False, 'training series file not found' shutil.copy(training_file, './training_series.sdf') LOG.info(f'Training series for model {model}, version {version}, saved as "training_series.sdf"') return True, 'OK'
def action_results(model, version=None, ouput_variables=False): ''' Returns a JSON with whole results info for a given model and version ''' if model is None: return False, 'Empty model label' rdir = utils.model_path(model, version) if not os.path.isfile(os.path.join(rdir, 'results.pkl')): return False, 'results not found' from flame.conveyor import Conveyor conveyor = Conveyor() with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle: conveyor.load(handle) return True, conveyor.getJSON()
def update_file(self, model, version=0): '''Function to save current parameter values modified at the object level (i.e: From a interactive python shell) ''' p = self.p if not p: return False, 'No loaded parameters' parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(p, pfile) except Exception as e: return False, e return True
def action_results(model, version=None, ouput_variables=False): ''' Returns an object with whole results info for a given model and version ''' if model is None: return False, {'code': 1, 'message': 'Empty model label'} results_path = utils.model_path(model, version) results_file = os.path.join(results_path, 'model-results.pkl') if not os.path.isfile(results_file): return False, {'code': 0, 'message': 'Results file not found'} conveyor = Conveyor() with open(results_file, 'rb') as handle: conveyor.load(handle) return True, conveyor
def load_results(self): ''' Load results pickle with model information ''' # obtain the path and the default name of the results file results_file_path = utils.model_path(self.model, self.version) results_file_name = os.path.join(results_file_path, 'results.pkl') self.conveyor = Conveyor() # load the main class dictionary (p) from this yaml file if not os.path.isfile(results_file_name): raise Exception('Results file not found') try: with open(results_file_name, "rb") as input_file: self.conveyor.load(input_file) except Exception as e: # LOG.error(f'No valid results pickle found at: # {results_file_name}') raise e
def action_remove(model, version): ''' Remove the version indicated as argument from the model tree indicated as argument ''' if not model: return False, 'Empty model label' if version == 0: return False, 'Development version cannot be removed, provide a version number' rdir = utils.model_path(model, version) if not os.path.isdir(rdir): return False, f'Version {version} not found' shutil.rmtree(rdir, ignore_errors=True) LOG.info(f'Version {version} of model {model} has been removed') return True, f'Version {version} of model {model} has been removed'
def __init__(self, model, version=0, context='model'): ''' Load the fields from the documentation file''' self.model = model self.version = version self.fields = None self.parameters = Parameters() self.conveyor = None # obtain the path and the default name of the model documents documentation_file_path = utils.model_path(self.model, self.version) documentation_file_name = os.path.join(documentation_file_path, 'documentation.yaml') # load the main class dictionary (p) from this yaml file if not os.path.isfile(documentation_file_name): raise Exception('Documentation file not found') try: with open(documentation_file_name, 'r') as documentation_file: self.fields = yaml.safe_load(documentation_file) except Exception as e: # LOG.error(f'Error loading documentation file with exception: {e}') raise e success, message = self.parameters.loadYaml(model, version) if not success: print( f'Parameters could not be loaded. {message}. Please make sure endpoint and version are correct' ) return # Remove this after acc #self.load_parameters() if context == 'model': self.load_results() self.assign_parameters() self.assign_results() self.autocomplete_documentation() self.setVal('md5', self.idataHash())
def action_remove(model, version): ''' Remove the version indicated as argument from the model tree indicated as argument ''' if not model: LOG.error('empty model label') return False, 'empty model label' if version == 0: LOG.error('development version cannot be removed') return False, 'development version cannot be removed' rdir = utils.model_path(model, version) if not os.path.isdir(rdir): LOG.error('version {} not found') return False, 'version not found' shutil.rmtree(rdir, ignore_errors=True) LOG.info(f'version {version} of model {model} has removed') return True, 'version '+str(version)+' of model '+model+' removed'
def loadYaml(self, model, version): ''' load a set of parameters from the configuration file present at the model directory adds some parameters identifying the model and the hash of the configuration file ''' # obtain the path and the default name of the model parameters parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') # load the main class dictionary (p) from this yaml file if not os.path.isfile(parameters_file_name): return False, 'file not found' try: with open(parameters_file_name, 'r') as pfile: self.p = yaml.safe_load(pfile) except Exception as e: return False, e # check version of the parameter file # no 'version' key mans version < 2.0 if 'param_format' in self.p: self.extended = True else: self.extended = False self.param_format = 1.0 # add keys for the model and a MD5 hash self.setVal('endpoint', model) self.setVal('version', version) self.setVal('model_path', parameters_file_path) self.setVal('md5', utils.md5sum(parameters_file_name)) return True, 'OK'
def __init__(self, model, version): parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') if not os.path.isfile(parameters_file_name): LOG.critical('Unable to load model parameters. Aborting...') sys.exit() try: with open(parameters_file_name, 'r') as pfile: self.p = yaml.load(pfile) except Exception as e: LOG.critical('Unable to load model parameters. Aborting...') sys.exit() self.setVal('endpoint', model) self.setVal('version', version) self.setVal('model_path', parameters_file_path) self.setVal('md5', utils.md5sum(parameters_file_name)) return
def build_cmd(arguments, output_format=None): ''' Instantiates a Build object to build a model using the given input file and model. This method must be self-contained and suitable for being called in cascade, by models which use the output of other models as input ''' from flame.build import Build # safety check if model exists endpoint_dir = utils.model_path(arguments['endpoint'], 0) if not os.path.isdir(endpoint_dir): return False, 'Endpoint name not found in model repository.' # remove pre-existing results file results_file = os.path.join(endpoint_dir, 'model-results.pkl') if os.path.isfile(results_file): os.remove(results_file) meta_file = os.path.join(endpoint_dir, 'model-meta.pkl') if os.path.isfile(meta_file): os.remove(meta_file) # input file provided in the command ifile = arguments['infile'] if ifile is not None and not os.path.isfile (ifile): return False, f'Wrong training series file {ifile}' # lfile is the "training_series" copied internally to the endpoint folder endpoint_path = utils.model_path(arguments['endpoint'], 0) lfile = os.path.join(endpoint_path, 'training_series') if 'param_file' in arguments: build = Build(arguments['endpoint'], param_file=arguments['param_file'], output_format=output_format) elif 'param_string' in arguments: build = Build(arguments['endpoint'], param_string=arguments['param_string'], output_format=output_format) else: build = Build(arguments['endpoint'], output_format=output_format) if utils.isSingleThread(): build.set_single_CPU() ensemble = build.get_ensemble() # ensemble[0] Boolean with True for ensemble models and False otherwyse # ensemble[1] List of ensemble model model_names # ensemble[2] List of ensemble model versions if ensemble[0]: emodels = ensemble[1] evers = ensemble[2] if ifile is None: if not os.path.isfile (lfile): return False, 'no training series detected' else: try: safe_copy(ifile, lfile) # shutil.copy(ifile, lfile) except: return False, 'Unable to copy input file to model directory' success, model_res = get_ensemble_input(build, emodels, evers, lfile) if not success: return False, model_res for i in range(len (emodels)): success, iID = utils.getModelID(emodels[i], evers[i], 'model') if success: build.extend_modelID(iID) LOG.debug(f'New modelID is: {build.conveyor.getMeta("modelID")}') # now run the model using the data from the external sources success, results = build.run(model_res) else: # when a new training series is provided in the command line # try to copy it to the model directory if ifile is not None: # in case of incremental training, add the input file at the end of existing file if arguments['incremental']: if arguments['incremental'] and os.path.isfile(lfile): LOG.info(f'Merging file {ifile} with existing training series') new_training = os.path.join(endpoint_path, 'temp_training') with open(new_training, 'w') as outfile: # handling the extra newline of SDFiles is problematic. We are delaying the # output of the newline by striping newlines and adding an universal newline # at the next line for the first block first = True with codecs.open(lfile, 'r', encoding='utf-8', errors='ignore') as infile: for line in infile: if first: outfile.write(f'{line.rstrip()}') first = False else: outfile.write(f'\n{line.rstrip()}') # for the second block we add the preceding newline in all lines with codecs.open(ifile, 'r', encoding='utf-8', errors='ignore') as infile: for line in infile: outfile.write(f'\n{line.rstrip()}') shutil.move(new_training, lfile) else: try: safe_copy (ifile, lfile) # shutil.copy(ifile, lfile) except: return False, 'Unable to copy input file to model directory' # check that the local copy of the input file exists if not os.path.isfile(lfile): return False, 'No training series found' # run the model with the input file success, results = build.run(lfile) return success, results
def predict_cmd(arguments, output_format=None): ''' Instantiates a Predict object to run a prediction using the given input file and model. This method must be self-contained and suitable for being called in cascade, by models which use the output of other models as input. ''' from flame.predict import Predict # safety check if model exists endpoint_dir = utils.model_path(arguments['endpoint'], 0) if not os.path.isdir(endpoint_dir): return False, 'Endpoint name not found in model repository.' # ** DEPRECATE ** # this is a back-compatibility trick for older versions of APIs # not supporting the label argument if 'label' not in arguments: arguments['label'] = 'temp' if 'output_format' in arguments: output_format = arguments['output_format'] predict = Predict(arguments['endpoint'], version=arguments['version'], output_format=output_format, label=arguments['label']) if utils.isSingleThread(): predict.set_single_CPU() ensemble = predict.get_ensemble() # ensemble[0] Boolean with True for ensemble models and False otherwyse # ensemble[1] List of ensemble model model_names # ensemble[2] List of ensemble model versions if ensemble[0]: if arguments['infile'] is None: return False, 'ensemble models require allways an input file' emodels = ensemble[1] evers = ensemble[2] success, model_res = get_ensemble_input(predict, emodels, evers, arguments['infile']) if not success: predict.conveyor.setError (model_res) LOG.error (model_res) # return False, model_res # TO-DO, comment this line and run prediction to allow odata to generate error info # check the presence of changes in the inner models modelID = predict.conveyor.getMeta('modelID') for i in range(len (emodels)): success, iID = utils.getModelID(emodels[i], evers[i], 'model') if success: if iID not in modelID: predict.conveyor.setWarning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended') LOG.warning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended') # now run the model using the data from the external sources success, results = predict.run(model_res) else: # run the model with the input file success, results = predict.run(arguments['infile']) LOG.info('Prediction completed...') return success, results
def action_results(model, version=None, ouput_variables=False): ''' Returns a JSON with whole results info for a given model and version ''' if model is None: return False, 'empty model label' if version is None: return False, 'no version provided' rdir = utils.model_path(model, version) if not os.path.isfile(os.path.join(rdir, 'results.pkl')): return False, 'results not found' # retrieve a pickle file containing the keys 'model_build' # and 'model_validate' of results with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle: results = pickle.load(handle) # this code serializes the results in a list and then converts it # to a JSON json_results = {} # creates a list with the keys which should NOT be included black_list = [] for k in results['manifest']: ### # By default do not include 'var' arrays, only 'obj' arrays # to avoid including the X matrix and save space # # this black list can be easily tuned to include everything # or to remove other keys ### if not ouput_variables: if (k['dimension'] in ['vars']): black_list.append(k['key']) # iterate keys and for those not in the black list # format the information to JSON for key in results: if key in black_list: continue value = results[key] # np.arrays cannot be serialized to JSON and must be transformed if isinstance(value, np.ndarray): # do not process bi-dimensional arrays if len (np.shape(value)) > 1 : continue # boolean must be transformed to 'True' or 'False' strings if 'bool_' in str(type(value[0])): json_results[key] = [ 'True' if x else 'False' for x in value] # we assume that np.array must contain np.floats else: # This removes NaN and and creates # a plain list from ndarrays json_results[key] = [x if not np.isnan( x) else None for x in value] else: json_results[key] = value try: output = json.dumps(json_results) except: return False, 'unable to serialize to JSON the results' return True, output
def action_info(model, version=None, output='JSON'): ''' Returns a text or JSON with results info for a given model and version ''' if model is None: return False, 'empty model label' if version is None: return False, 'no version provided' rdir = utils.model_path(model, version) if not os.path.isfile(os.path.join(rdir, 'results.pkl')): # compatibity method. use info.pkl if not os.path.isfile(os.path.join(rdir, 'info.pkl')): return False, 'info not found' with open(os.path.join(rdir, 'info.pkl'), 'rb') as handle: #retrieve a pickle file containing the keys 'model_build' #and 'model_validate' of results info = pickle.load(handle) info += pickle.load(handle) # end of compatibility method else: # new method, use results.pkl with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle: results = pickle.load(handle) info = None if 'model_build_info' in results: info = results['model_build_info'] if info == None: return False, 'info not found' if 'model_valid_info' in results: info += results['model_valid_info'] if info == None: return False, 'info not found' # when this function is called from the console, output is 'text' # write and exit if output == 'text': for val in info: if len(val) < 3: print(val) else: print(val[0], ' (', val[1], ') : ', val[2]) return True, 'model informed OK' # this is only reached when this funcion is called from a web service # asking for a JSON # # this code serializes the results in a list and then converts it # to a JSON json_results = [] for i in info: # results must be checked to avoid numpy elements not JSON serializable if 'numpy.int64' in str(type(i[2])): try: v = int(i[2]) except Exception as e: LOG.error(e) v = None json_results.append((i[0], i[1], v)) elif 'numpy.float64' in str(type(i[2])): try: v = float(i[2]) except Exception as e: LOG.error(e) v = None json_results.append((i[0], i[1], v)) elif isinstance(i[2], np.ndarray): if 'bool_' in str(type(i[2][0])): temp_results = [ 'True' if x else 'False' for x in i[2]] else: # This removes NaN and and creates # a plain list of formatted floats from ndarrays temp_results = [float("{0:.4f}".format(x)) if not np.isnan(x) else None for x in i[2]] json_results.append((i[0], i[1], temp_results )) else: json_results.append(i) return True, json.dumps(json_results)
def delta(self, model, version, param, iformat='YAML', isSpace=False): ''' load a set of parameters from the configuration file present at the model directory also, inserts the keys present in the param_file provided, assuming that it contains a YAML-compatible format, like the one generated by manage adds some parameters identifying the model and the hash of the configuration file ''' if not self.loadYaml(model, version, isSpace): return False, 'file not found' # parse parameter file assuning it will be in # a YAML-compatible format if iformat == 'JSONS': try: newp = json.loads(param) except Exception as e: return False, e else: try: with open(param, 'r') as pfile: if iformat == 'YAML': newp = yaml.safe_load(pfile) elif iformat == 'JSON': newp = json.load(pfile) except Exception as e: return False, e self.applyDelta(newp) # # update interna dict with keys in the input file (delta) # black_list = ['param_format','version','model_path','endpoint','md5'] # for key in newp: # if key not in black_list: # val = newp[key] # # YAML define null values as 'None, which are interpreted # # as strings # if val == 'None': # val = None # if isinstance(val ,dict): # for inner_key in val: # inner_val = val[inner_key] # if inner_val == 'None': # inner_val = None # self.setInnerVal(key, inner_key, inner_val) # #print ('@delta: adding',key, inner_key, inner_val) # else: # self.setVal(key,val) # #print ('@delta: adding',key,val,type(val)) # dump internal dict to the parameters file if isSpace: parameters_file_path = utils.space_path(model, version) else: parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(self.p, pfile) except Exception as e: return False, 'unable to write parameters' # # correct CV to kfold for conformal models # if self.getVal('conformal') is True: # self.setVal('ModelValidationCV','kfold') if self.getVal('model') == 'majority': self.setVal('conformal', False) # self.setVal('md5',utils.md5sum(parameters_file_name)) self.setVal('md5', self.idataHash()) return True, 'OK'