def sbuild_cmd(arguments, output_format=None): ''' Instantiates a Sbuild object to build a chemical space using the given input file and model. ''' from flame.sbuild import Sbuild # safety check if model exists space_dir = utils.space_path(arguments['space'], 0) if not os.path.isdir(space_dir): return False, 'Endpoint name not found in space repository.' # remove pre-existing results file results_file = os.path.join(space_dir, 'space-results.pkl') if os.path.isfile(results_file): os.remove(results_file) meta_file = os.path.join(space_dir, 'space-meta.pkl') if os.path.isfile(meta_file): os.remove(meta_file) if 'param_string' in arguments: sbuild = Sbuild(arguments['space'], param_string=arguments['param_string'], output_format=output_format) elif 'param_file' in arguments: sbuild = Sbuild(arguments['space'], param_file=arguments['param_file'], output_format=output_format) else: sbuild = Sbuild(arguments['space'], output_format=output_format) if utils.isSingleThread(): sbuild.set_single_CPU() ifile = arguments['infile'] epd = utils.space_path(arguments['space'], 0) lfile = os.path.join(epd, 'training_series') # when a new training series is provided in the command line # try to copy it to the model directory if ifile is not None: if not os.path.isfile(ifile): return False, f'Wrong compound database file {ifile}' try: safe_copy(ifile, lfile) # shutil.copy(ifile, lfile) except: return False, 'Unable to copy input file to space directory' # check that the local copy of the input file exists if not os.path.isfile(lfile): return False, 'No compound database found' # run the space building with the input file success, results = sbuild.run(lfile) return success, results
def search_cmd(command, output_format=None): ''' Instantiates a Search object to run a search using the given input file and space. ''' from flame.search import Search # ** DEPRECATE ** # this is a back-compatibility trick for older versions of APIs # not supporting the label argument if 'label' not in command: command['label'] = 'temp' # safety check if model exists space_dir = utils.space_path(command['space'], 0) if not os.path.isdir(space_dir): return False, 'Endpoint name not found in space repository.' search = Search(command['space'], version=command['version'], output_format=output_format, label=command['label']) if utils.isSingleThread(): search.set_single_CPU() success, results = search.run(command) LOG.info('Search completed...') return success, results
def action_info(space, version): ''' Returns a text or JSON with results info for a given model and version ''' if space is None: return False, 'Empty space label' rdir = utils.space_path(space, version) if not os.path.isfile(os.path.join(rdir, 'results.pkl')): return False, 'Info file not found' # from flame.conveyor import Conveyor # conveyor = Conveyor() # with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle: # conveyor.load(handle) # info = conveyor.getVal('space_build_info') with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle: info = pickle.load(handle) if info == None: return False, 'No relevant information found' # # this code serializes the results in a list and then converts it # # to a JSON # json_results = [] # for i in info: # json_results.append(conveyor.modelInfoJSON(i)) print(json.dumps(info)) return True, json.dumps(info)
def action_info(space, version, output='text'): ''' Returns a text or JSON with results info for a given model and version ''' if space is None: if output == 'JSON': return False, {'code':1, 'message': 'Empty space label'} return False, 'Empty space label' meta_path = utils.space_path(space, version) meta_file = os.path.join(meta_path, 'space-meta.pkl') if not os.path.isfile(meta_file): if output == 'JSON': return False, {'code':0, 'message': 'Info file not found'} return False, 'Info file not found' with open(meta_file, 'rb') as handle: modelID = pickle.load(handle) errorMessage = pickle.load(handle) warningMessage = pickle.load(handle) space_info = pickle.load(handle) if errorMessage is not None: if output == 'JSON': return False, {'code':1, 'message': errorMessage} return False, 'No relevant information found' warning_info = None if warningMessage is not None: warning_info = [('warning', 'runtime warning', warningMessage)] info = None for iinfo in (space_info, warning_info, [('modelID','unique model ID', modelID)]): if info == None: info = iinfo else: if iinfo != None: info+=iinfo if info == None: if output == 'JSON': return False, {'code':1, 'message': 'No relevant information found'} return False, 'No relevant information found' if output == 'text': LOG.info (f'informing space {space} version {version}') for val in info: if len(val) < 3: LOG.info(val) else: LOG.info(f'{val[0]} ({val[1]}) : {val[2]}') return True, 'space informed OK' return True, info
def post(self, request, spacename, format=None): # get the upladed file try: file_obj = request.FILES['SDF'] except MultiValueDictKeyError: file_obj = False params = request.POST.get('parameters') epd = utils.space_path(spacename, 0) print(epd) lfile = os.path.join(epd, 'training_series') # TODO: implement correctly flame build builder = sbuild.Sbuild(spacename, param_string=params, output_format="JSON") try: if isinstance(file_obj, bool): flame_status = builder.run(lfile) else: # Set the temp filesystem storage temp_dir = tempfile.mkdtemp(prefix="train_data_", dir=None) fs = FileSystemStorage(location=temp_dir) # save the file to the new filesystem path_SDF = fs.save(file_obj.name, ContentFile(file_obj.read())) training_data = os.path.join(temp_dir, path_SDF) print(training_data) flame_status = builder.run(training_data) #Copy file shutil.copy(training_data, lfile) except Exception as e: return Response(str(e), status=status.HTTP_400_BAD_REQUEST) if (flame_status[0]): if isinstance(file_obj, bool): filename = "internal training set" else: filename = file_obj.name response = { "buildStatus": "Space builded succesfully", "fileName": filename, "spacename": spacename, "version": 0 } return JsonResponse(response, status=status.HTTP_200_OK) else: return Response(json.loads(flame_status[1]), status=status.HTTP_404_NOT_FOUND)
def sbuild_cmd(arguments, output_format=None): ''' Instantiates a Sbuild object to build a chemical space using the given input file and model. ''' from flame.sbuild import Sbuild # safety check if model exists repo_path = pathlib.Path(utils.space_repository_path()) space_list = os.listdir(repo_path) if arguments['space'] not in space_list: LOG.error('Endpoint name not found in space repository.') return False, 'Endpoint name not found in space repository.' if 'param_string' in arguments: sbuild = Sbuild(arguments['space'], param_string=arguments['param_string'], output_format=output_format) elif 'param_file' in arguments: sbuild = Sbuild(arguments['space'], param_file=arguments['param_file'], output_format=output_format) else: sbuild = Sbuild(arguments['space'], output_format=output_format) ifile = arguments['infile'] epd = utils.space_path(arguments['space'], 0) lfile = os.path.join(epd, 'training_series') # when a new training series is provided in the command line # try to copy it to the model directory if ifile is not None: if not os.path.isfile(ifile): LOG.error(f'Wrong compound database file {ifile}') return False, f'Wrong compound database file {ifile}' try: shutil.copy(ifile, lfile) except: LOG.error(f'Unable to copy input file to space directory') return False, 'Unable to copy input file to space directory' # check that the local copy of the input file exists if not os.path.isfile(lfile): LOG.error(f'No compound database found') return False, 'No compound database found' # run the space building with the input file success, results = sbuild.run(lfile) return success, results
def loadYaml(self, model, version, isSpace=False): ''' load a set of parameters from the configuration file present at the model directory adds some parameters identifying the model and the hash of the configuration file ''' # obtain the path and the default name of the model parameters if isSpace: parameters_file_path = utils.space_path(model, version) else: parameters_file_path = utils.model_path(model, version) if not os.path.isdir(parameters_file_path): return False, f'Model "{model}", version "{version}" not found' parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') # load the main class dictionary (p) from this yaml file if not os.path.isfile(parameters_file_name): return False, 'Parameters file not found' try: with open(parameters_file_name, 'r') as pfile: self.p = yaml.safe_load(pfile) except Exception as e: return False, e # check version of the parameter file # no 'version' key mans version < 2.0 if 'param_format' in self.p: self.extended = True else: self.extended = False self.param_format = 1.0 # # correct CV to kfold for conformal models # if self.getVal('conformal') is True: # self.setVal('ModelValidationCV','kfold') if self.getVal('model') == 'majority': self.setVal('conformal', False) # add keys for the model and a MD5 hash self.setVal('endpoint', model) self.setVal('version', version) self.setVal('model_path', parameters_file_path) # self.setVal('md5',utils.md5sum(parameters_file_name)) self.setVal('md5', self.idataHash()) return True, 'OK'
def action_remove(space, version): ''' Remove the version indicated as argument from the space tree indicated as argument ''' if not space: return False, 'Empty space label' if version == 0: return False, 'Development version cannot be removed, provide a version number' rdir = utils.space_path(space, version) if not os.path.isdir(rdir): return False, f'Version {version} not found' shutil.rmtree(rdir, ignore_errors=True) LOG.info(f'Version {version} of space {space} has been removed') return True, f'Version {version} of space {space} has been removed'
def __init__(self, space, version, output_format=None, label=None): LOG.debug('Starting search...') self.space = space self.version = version self.label = label self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('sapply') # load modelID path = utils.space_path(space, version) meta = os.path.join(path, 'space-meta.pkl') try: with open(meta, 'rb') as handle: modelID = pickle.load(handle) except: LOG.critical(f'Unable to load modelID from {meta}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', modelID) LOG.debug(f'Loaded space with modelID: {modelID}') # assign prediction (search) label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(space, version, isSpace=True) if not success: LOG.critical( f'Unable to load space parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) return
def run(self, param_dict): ''' Executes a default predicton workflow ''' metric = None numsel = None cutoff = None # path to endpoint epd = utils.space_path(self.space, self.version) if not os.path.isdir(epd): LOG.error(f'Unable to find space {self.space}') self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}') if self.getVal(param_dict,'smarts') is not None: input_source = param_dict['smarts'] self.param.setVal('input_type', 'smarts') elif self.getVal(param_dict,'infile') is not None: input_source = param_dict['infile'] else: LOG.error(f'Unable to find input_file') self.conveyor.setError('wrong format in the runtime similarity parameters') if 'runtime_param' in param_dict: runtime_param = self.getVal(param_dict, 'runtime_param') if runtime_param is not None: LOG.info (f'runtime parameters: {str(runtime_param)}') try: with open(runtime_param, 'r') as pfile: rtparam = yaml.safe_load(pfile) try: metric = rtparam['similarity_metric'] numsel = rtparam['similarity_cutoff_num'] cutoff = rtparam['similarity_cutoff_distance'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') except: LOG.error('runtime similarity parameter file not found') self.conveyor.setError('runtime similarity parameter file not found') else: try: metric = param_dict['metric'] numsel = param_dict['numsel'] cutoff = param_dict['cutoff'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') md = self.param.getVal('computeMD_method') if utils.isFingerprint(md) and len(md) > 1: LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.param.setVal('computeMD_method',[md[0]]) if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of # the processing applied to each space modpath = utils.smodule_path(self.space, self.version) idata_child = importlib.import_module(modpath+".idata_child") sapply_child = importlib.import_module(modpath+".sapply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): if not self.conveyor.isKey ('SMARTS'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
def delta(self, model, version, param, iformat='YAML', isSpace=False): ''' load a set of parameters from the configuration file present at the model directory also, inserts the keys present in the param_file provided, assuming that it contains a YAML-compatible format, like the one generated by manage adds some parameters identifying the model and the hash of the configuration file ''' if not self.loadYaml(model, version, isSpace): return False, 'file not found' # parse parameter file assuning it will be in # a YAML-compatible format if iformat == 'JSONS': try: newp = json.loads(param) except Exception as e: return False, e else: try: with open(param, 'r') as pfile: if iformat == 'YAML': newp = yaml.safe_load(pfile) elif iformat == 'JSON': newp = json.load(pfile) except Exception as e: return False, e self.applyDelta(newp) # # update interna dict with keys in the input file (delta) # black_list = ['param_format','version','model_path','endpoint','md5'] # for key in newp: # if key not in black_list: # val = newp[key] # # YAML define null values as 'None, which are interpreted # # as strings # if val == 'None': # val = None # if isinstance(val ,dict): # for inner_key in val: # inner_val = val[inner_key] # if inner_val == 'None': # inner_val = None # self.setInnerVal(key, inner_key, inner_val) # #print ('@delta: adding',key, inner_key, inner_val) # else: # self.setVal(key,val) # #print ('@delta: adding',key,val,type(val)) # dump internal dict to the parameters file if isSpace: parameters_file_path = utils.space_path(model, version) else: parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'parameters.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(self.p, pfile) except Exception as e: return False, 'unable to write parameters' # # correct CV to kfold for conformal models # if self.getVal('conformal') is True: # self.setVal('ModelValidationCV','kfold') if self.getVal('model') == 'majority': self.setVal('conformal', False) # self.setVal('md5',utils.md5sum(parameters_file_name)) self.setVal('md5', self.idataHash()) return True, 'OK'
def delta(self, model, version, doc, iformat='YAML', isSpace=False): ''' load a set of parameters from the configuration file present at the model directory also, inserts the keys present in the param_file provided, assuming that it contains a YAML-compatible format, like the one generated by manage adds some parameters identifying the model and the hash of the configuration file ''' # input is a string, either in JSON or YAML format # this is the typical input sent by if iformat not in ['JSON', 'JSONS', 'YAML', 'YAMLS']: return False, 'input format not recognized' if iformat == 'JSONS': try: newp = json.loads(doc) except Exception as e: return False, str(e) elif iformat == 'YAMLS': try: newp = yaml.load(doc) except Exception as e: return False, str(e) # input is a file, either in YAML or JSON format else: try: with open(doc, 'r') as pfile: if iformat == 'YAML': newp = yaml.safe_load(pfile) elif iformat == 'JSON': newp = json.load(pfile) except Exception as e: return False, str(e) # update interna dict with keys in the input file (delta) black_list = [] for key in newp: if key not in black_list: val = newp[key] # YAML define null values as 'None, which are interpreted # as strings if val == 'None': val = None if isinstance(val, dict): for inner_key in val: inner_val = val[inner_key] if inner_val == 'None': inner_val = None self.setInnerVal(key, inner_key, inner_val) #print ('@delta: adding',key, inner_key, inner_val) else: self.setVal(key, val) #print ('@delta: adding',key,val,type(val)) # dump internal dict to the parameters file if isSpace: parameters_file_path = utils.space_path(model, version) else: parameters_file_path = utils.model_path(model, version) parameters_file_name = os.path.join(parameters_file_path, 'documentation.yaml') try: with open(parameters_file_name, 'w') as pfile: yaml.dump(self.fields, pfile) except Exception as e: return False, 'unable to write parameters' self.setVal('md5', self.idataHash()) return True, 'OK'
def run(self, input_source): ''' Executes a default chemical space building workflow ''' # path to endpoint epd = utils.space_path(self.space, 0) if not os.path.isdir(epd): self.conveyor.setError(f'Unable to find space {self.space}') #LOG.error(f'Unable to find space {self.space}') # import ichild classes if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of the processing applied to each space modpath = utils.smodule_path(self.space, 0) idata_child = importlib.import_module(modpath + ".idata_child") slearn_child = importlib.import_module(modpath + ".slearn_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_create() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # check there is a suitable X and Y if not self.conveyor.isKey('xmatrix'): self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # instantiate learn (build a space from idata) and run it try: slearn = slearn_child.SlearnChild(self.param, self.conveyor) except: LOG.warning( 'Slearn child architecture mismatch, defaulting to Learn parent' ) slearn = Slearn(self.param, self.conveyor) slearn.run() LOG.debug( f'slearn child {type(slearn).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor) return odata.run()