def autocomplete_documentation(self): """ Auto complete fields in model documentation """ #ID, Model identifier. self.fields['ID']['value'] = utils.getModelID(self.model, self.version, 'model')[1] #Version self.fields['Version']['value'] = str(self.version) #Date, Date of model development and Date of QMRF. today = date.today().strftime("%B %d, %Y") self.fields['Date']['value'] = today self.fields['Date_of_QMRF']['value'] = today #format, Format used(SDF,TSV) if self.parameters.getVal('input_type') == 'data': self.fields['Data_info']['value']['format']['value'] = 'TSV' else: self.fields['Data_info']['value']['format']['value'] = 'SDF' #Algorithm, type: QSAR. self.fields['Algorithm']['value']['type']['value'] = 'QSAR' #Model, Main modelling program, version, description and license. software = "Flame, 1.0rc3" fieldsapplysoftware = ['model', 'descriptors', 'applicability_domain'] for field in fieldsapplysoftware: if field == 'applicability_domain': if self.parameters.getVal('conformal'): self.fields['Software']['value'][field]['value'] = software else: self.fields['Software']['value'][field]['value'] = software
def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('apply') # load modelID success, result = utils.getModelID(model, version, 'model') if not success: LOG.critical(f'{result}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', result) LOG.debug(f'Loaded model with modelID: {result}') # assign prediction label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(model, version) if not success: LOG.critical( f'Unable to load model parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) if 'ghost' in output_format: self.param.setVal('output_similar', False) return
def build_cmd(arguments, output_format=None): ''' Instantiates a Build object to build a model using the given input file and model. This method must be self-contained and suitable for being called in cascade, by models which use the output of other models as input ''' from flame.build import Build # safety check if model exists endpoint_dir = utils.model_path(arguments['endpoint'], 0) if not os.path.isdir(endpoint_dir): return False, 'Endpoint name not found in model repository.' # remove pre-existing results file results_file = os.path.join(endpoint_dir, 'model-results.pkl') if os.path.isfile(results_file): os.remove(results_file) meta_file = os.path.join(endpoint_dir, 'model-meta.pkl') if os.path.isfile(meta_file): os.remove(meta_file) # input file provided in the command ifile = arguments['infile'] if ifile is not None and not os.path.isfile (ifile): return False, f'Wrong training series file {ifile}' # lfile is the "training_series" copied internally to the endpoint folder endpoint_path = utils.model_path(arguments['endpoint'], 0) lfile = os.path.join(endpoint_path, 'training_series') if 'param_file' in arguments: build = Build(arguments['endpoint'], param_file=arguments['param_file'], output_format=output_format) elif 'param_string' in arguments: build = Build(arguments['endpoint'], param_string=arguments['param_string'], output_format=output_format) else: build = Build(arguments['endpoint'], output_format=output_format) if utils.isSingleThread(): build.set_single_CPU() ensemble = build.get_ensemble() # ensemble[0] Boolean with True for ensemble models and False otherwyse # ensemble[1] List of ensemble model model_names # ensemble[2] List of ensemble model versions if ensemble[0]: emodels = ensemble[1] evers = ensemble[2] if ifile is None: if not os.path.isfile (lfile): return False, 'no training series detected' else: try: safe_copy(ifile, lfile) # shutil.copy(ifile, lfile) except: return False, 'Unable to copy input file to model directory' success, model_res = get_ensemble_input(build, emodels, evers, lfile) if not success: return False, model_res for i in range(len (emodels)): success, iID = utils.getModelID(emodels[i], evers[i], 'model') if success: build.extend_modelID(iID) LOG.debug(f'New modelID is: {build.conveyor.getMeta("modelID")}') # now run the model using the data from the external sources success, results = build.run(model_res) else: # when a new training series is provided in the command line # try to copy it to the model directory if ifile is not None: # in case of incremental training, add the input file at the end of existing file if arguments['incremental']: if arguments['incremental'] and os.path.isfile(lfile): LOG.info(f'Merging file {ifile} with existing training series') new_training = os.path.join(endpoint_path, 'temp_training') with open(new_training, 'w') as outfile: # handling the extra newline of SDFiles is problematic. We are delaying the # output of the newline by striping newlines and adding an universal newline # at the next line for the first block first = True with codecs.open(lfile, 'r', encoding='utf-8', errors='ignore') as infile: for line in infile: if first: outfile.write(f'{line.rstrip()}') first = False else: outfile.write(f'\n{line.rstrip()}') # for the second block we add the preceding newline in all lines with codecs.open(ifile, 'r', encoding='utf-8', errors='ignore') as infile: for line in infile: outfile.write(f'\n{line.rstrip()}') shutil.move(new_training, lfile) else: try: safe_copy (ifile, lfile) # shutil.copy(ifile, lfile) except: return False, 'Unable to copy input file to model directory' # check that the local copy of the input file exists if not os.path.isfile(lfile): return False, 'No training series found' # run the model with the input file success, results = build.run(lfile) return success, results
def predict_cmd(arguments, output_format=None): ''' Instantiates a Predict object to run a prediction using the given input file and model. This method must be self-contained and suitable for being called in cascade, by models which use the output of other models as input. ''' from flame.predict import Predict # safety check if model exists endpoint_dir = utils.model_path(arguments['endpoint'], 0) if not os.path.isdir(endpoint_dir): return False, 'Endpoint name not found in model repository.' # ** DEPRECATE ** # this is a back-compatibility trick for older versions of APIs # not supporting the label argument if 'label' not in arguments: arguments['label'] = 'temp' if 'output_format' in arguments: output_format = arguments['output_format'] predict = Predict(arguments['endpoint'], version=arguments['version'], output_format=output_format, label=arguments['label']) if utils.isSingleThread(): predict.set_single_CPU() ensemble = predict.get_ensemble() # ensemble[0] Boolean with True for ensemble models and False otherwyse # ensemble[1] List of ensemble model model_names # ensemble[2] List of ensemble model versions if ensemble[0]: if arguments['infile'] is None: return False, 'ensemble models require allways an input file' emodels = ensemble[1] evers = ensemble[2] success, model_res = get_ensemble_input(predict, emodels, evers, arguments['infile']) if not success: predict.conveyor.setError (model_res) LOG.error (model_res) # return False, model_res # TO-DO, comment this line and run prediction to allow odata to generate error info # check the presence of changes in the inner models modelID = predict.conveyor.getMeta('modelID') for i in range(len (emodels)): success, iID = utils.getModelID(emodels[i], evers[i], 'model') if success: if iID not in modelID: predict.conveyor.setWarning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended') LOG.warning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended') # now run the model using the data from the external sources success, results = predict.run(model_res) else: # run the model with the input file success, results = predict.run(arguments['infile']) LOG.info('Prediction completed...') return success, results