Exemplo n.º 1
0
def sbuild_cmd(arguments, output_format=None):
    '''
    Instantiates a Sbuild object to build a chemical space using the given
    input file and model. 

    '''

    from flame.sbuild import Sbuild

    # safety check if model exists
    space_dir = utils.space_path(arguments['space'], 0)
    if not os.path.isdir(space_dir):
        return False, 'Endpoint name not found in space repository.'

    # remove pre-existing results file
    results_file = os.path.join(space_dir, 'space-results.pkl')
    if os.path.isfile(results_file):
        os.remove(results_file)
    meta_file = os.path.join(space_dir, 'space-meta.pkl')
    if os.path.isfile(meta_file):
        os.remove(meta_file)

    if 'param_string' in arguments:
        sbuild = Sbuild(arguments['space'],
                        param_string=arguments['param_string'],
                        output_format=output_format)
    elif 'param_file' in arguments:
        sbuild = Sbuild(arguments['space'],
                        param_file=arguments['param_file'],
                        output_format=output_format)
    else:
        sbuild = Sbuild(arguments['space'], output_format=output_format)

    if utils.isSingleThread():
        sbuild.set_single_CPU()

    ifile = arguments['infile']
    epd = utils.space_path(arguments['space'], 0)
    lfile = os.path.join(epd, 'training_series')

    # when a new training series is provided in the command line
    # try to copy it to the model directory
    if ifile is not None:
        if not os.path.isfile(ifile):
            return False, f'Wrong compound database file {ifile}'
        try:
            safe_copy(ifile, lfile)
            # shutil.copy(ifile, lfile)
        except:
            return False, 'Unable to copy input file to space directory'

    # check that the local copy of the input file exists
    if not os.path.isfile(lfile):
        return False, 'No compound database found'

    # run the space building with the input file
    success, results = sbuild.run(lfile)

    return success, results
Exemplo n.º 2
0
def search_cmd(command, output_format=None):
    '''
    Instantiates a Search object to run a search using the given input
    file and space.

    '''
    from flame.search import Search

    # ** DEPRECATE **
    # this is a back-compatibility trick for older versions of APIs 
    # not supporting the label argument

    if 'label' not in command:
        command['label'] = 'temp'

    # safety check if model exists
    space_dir = utils.space_path(command['space'], 0)
    if not os.path.isdir(space_dir):
        return False, 'Endpoint name not found in space repository.'

    search = Search(command['space'], version=command['version'], output_format=output_format, label=command['label'])

    if utils.isSingleThread():
        search.set_single_CPU()

    success, results = search.run(command)

    LOG.info('Search completed...')

    return success, results
Exemplo n.º 3
0
def action_info(space, version):
    '''
    Returns a text or JSON with results info for a given model and version
    '''

    if space is None:
        return False, 'Empty space label'

    rdir = utils.space_path(space, version)

    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        return False, 'Info file not found'

    # from flame.conveyor import Conveyor

    # conveyor = Conveyor()
    # with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
    #     conveyor.load(handle)

    # info =  conveyor.getVal('space_build_info')

    with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
        info = pickle.load(handle)

    if info == None:
        return False, 'No relevant information found'

    # # this code serializes the results in a list and then converts it
    # # to a JSON
    # json_results = []
    # for i in info:
    #     json_results.append(conveyor.modelInfoJSON(i))

    print(json.dumps(info))
    return True, json.dumps(info)
Exemplo n.º 4
0
def action_info(space, version, output='text'):
    '''
    Returns a text or JSON with results info for a given model and version
    '''

    if space is None:
        if output == 'JSON':
            return False, {'code':1, 'message': 'Empty space label'}
        return False, 'Empty space label'

    meta_path = utils.space_path(space, version)
    meta_file = os.path.join(meta_path, 'space-meta.pkl')
    
    if not os.path.isfile(meta_file):
        if output == 'JSON':
            return False, {'code':0, 'message': 'Info file not found'}
        return False, 'Info file not found'

    with open(meta_file, 'rb') as handle:
        modelID = pickle.load(handle)
        errorMessage = pickle.load(handle)
        warningMessage = pickle.load(handle)
        space_info = pickle.load(handle)
    
    if errorMessage is not None:
        if output == 'JSON':
            return False, {'code':1, 'message': errorMessage}
        return False, 'No relevant information found'
   
    warning_info = None
    if warningMessage is not None:
        warning_info = [('warning', 'runtime warning', warningMessage)]

    info = None
    
    for iinfo in (space_info, warning_info, [('modelID','unique model ID', modelID)]):
        if info == None:
            info = iinfo
        else:
            if iinfo != None:
                info+=iinfo

    if info == None:
        if output == 'JSON':
            return False, {'code':1, 'message': 'No relevant information found'}
        return False, 'No relevant information found'

    if output == 'text':

        LOG.info (f'informing space {space} version {version}')

        for val in info:
            if len(val) < 3:
                LOG.info(val)
            else:
                LOG.info(f'{val[0]} ({val[1]}) : {val[2]}')
        return True, 'space informed OK'

    return True, info
Exemplo n.º 5
0
    def post(self, request, spacename, format=None):

        # get the upladed file
        try:
            file_obj = request.FILES['SDF']
        except MultiValueDictKeyError:
            file_obj = False

        params = request.POST.get('parameters')
        epd = utils.space_path(spacename, 0)
        print(epd)
        lfile = os.path.join(epd, 'training_series')
        # TODO: implement correctly flame build
        builder = sbuild.Sbuild(spacename,
                                param_string=params,
                                output_format="JSON")
        try:
            if isinstance(file_obj, bool):

                flame_status = builder.run(lfile)

            else:

                # Set the temp filesystem storage
                temp_dir = tempfile.mkdtemp(prefix="train_data_", dir=None)

                fs = FileSystemStorage(location=temp_dir)
                # save the file to the new filesystem
                path_SDF = fs.save(file_obj.name, ContentFile(file_obj.read()))

                training_data = os.path.join(temp_dir, path_SDF)
                print(training_data)
                flame_status = builder.run(training_data)
                #Copy file
                shutil.copy(training_data, lfile)

        except Exception as e:
            return Response(str(e), status=status.HTTP_400_BAD_REQUEST)

        if (flame_status[0]):
            if isinstance(file_obj, bool):
                filename = "internal training set"
            else:
                filename = file_obj.name
            response = {
                "buildStatus": "Space builded succesfully",
                "fileName": filename,
                "spacename": spacename,
                "version": 0
            }
            return JsonResponse(response, status=status.HTTP_200_OK)
        else:
            return Response(json.loads(flame_status[1]),
                            status=status.HTTP_404_NOT_FOUND)
Exemplo n.º 6
0
def sbuild_cmd(arguments, output_format=None):
    '''
    Instantiates a Sbuild object to build a chemical space using the given
    input file and model. 

    '''

    from flame.sbuild import Sbuild

    # safety check if model exists
    repo_path = pathlib.Path(utils.space_repository_path())
    space_list = os.listdir(repo_path)

    if arguments['space'] not in space_list:
        LOG.error('Endpoint name not found in space repository.')
        return False, 'Endpoint name not found in space repository.'

    if 'param_string' in arguments:
        sbuild = Sbuild(arguments['space'],
                        param_string=arguments['param_string'],
                        output_format=output_format)
    elif 'param_file' in arguments:
        sbuild = Sbuild(arguments['space'],
                        param_file=arguments['param_file'],
                        output_format=output_format)
    else:
        sbuild = Sbuild(arguments['space'], output_format=output_format)

    ifile = arguments['infile']
    epd = utils.space_path(arguments['space'], 0)
    lfile = os.path.join(epd, 'training_series')

    # when a new training series is provided in the command line
    # try to copy it to the model directory
    if ifile is not None:
        if not os.path.isfile(ifile):
            LOG.error(f'Wrong compound database file {ifile}')
            return False, f'Wrong compound database file {ifile}'
        try:
            shutil.copy(ifile, lfile)
        except:
            LOG.error(f'Unable to copy input file to space directory')
            return False, 'Unable to copy input file to space directory'

    # check that the local copy of the input file exists
    if not os.path.isfile(lfile):
        LOG.error(f'No compound database found')
        return False, 'No compound database found'

    # run the space building with the input file
    success, results = sbuild.run(lfile)

    return success, results
Exemplo n.º 7
0
    def loadYaml(self, model, version, isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # obtain the path and the default name of the model parameters
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        if not os.path.isdir(parameters_file_path):
            return False, f'Model "{model}", version "{version}" not found'

        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(parameters_file_name):
            return False, 'Parameters file not found'

        try:
            with open(parameters_file_name, 'r') as pfile:
                self.p = yaml.safe_load(pfile)
        except Exception as e:
            return False, e

        # check version of the parameter file
        # no 'version' key mans version < 2.0
        if 'param_format' in self.p:
            self.extended = True
        else:
            self.extended = False
            self.param_format = 1.0

        # # correct CV to kfold for conformal models
        # if self.getVal('conformal') is True:
        #     self.setVal('ModelValidationCV','kfold')
        if self.getVal('model') == 'majority':
            self.setVal('conformal', False)

        # add keys for the model and a MD5 hash
        self.setVal('endpoint', model)
        self.setVal('version', version)
        self.setVal('model_path', parameters_file_path)
        # self.setVal('md5',utils.md5sum(parameters_file_name))
        self.setVal('md5', self.idataHash())

        return True, 'OK'
Exemplo n.º 8
0
def action_remove(space, version):
    '''
    Remove the version indicated as argument from the space tree indicated
    as argument
    '''

    if not space:
        return False, 'Empty space label'

    if version == 0:
        return False, 'Development version cannot be removed, provide a version number'

    rdir = utils.space_path(space, version)
    if not os.path.isdir(rdir):
        return False, f'Version {version} not found'

    shutil.rmtree(rdir, ignore_errors=True)
    LOG.info(f'Version {version} of space {space} has been removed')
    return True, f'Version {version} of space {space} has been removed'
Exemplo n.º 9
0
    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting search...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('sapply')

        # load modelID
        path = utils.space_path(space, version)
        meta = os.path.join(path, 'space-meta.pkl')
        try:
            with open(meta, 'rb') as handle:
                modelID = pickle.load(handle)
        except:
            LOG.critical(f'Unable to load modelID from {meta}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', modelID)
        LOG.debug(f'Loaded space with modelID: {modelID}')

        # assign prediction (search) label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        success, results = self.param.loadYaml(space, version, isSpace=True)
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return
Exemplo n.º 10
0
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        metric = None
        numsel = None
        cutoff = None
        
        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            LOG.error(f'Unable to find space {self.space}')
            self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}')

        if self.getVal(param_dict,'smarts') is not None:
            input_source = param_dict['smarts']
            self.param.setVal('input_type', 'smarts')

        elif self.getVal(param_dict,'infile') is not None:
            input_source = param_dict['infile']

        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError('wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = self.getVal(param_dict, 'runtime_param')
            if runtime_param is not None:
                LOG.info (f'runtime parameters: {str(runtime_param)}')
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error('wrong format in the runtime similarity parameters')
                            self.conveyor.setError('wrong format in the runtime similarity parameters')
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError('runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError('wrong format in the runtime similarity parameters')

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.param.setVal('computeMD_method',[md[0]])

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath+".idata_child")
            sapply_child = importlib.import_module(modpath+".sapply_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
            except:
                LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():

            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                if not self.conveyor.isKey ('SMARTS'):
                    LOG.debug(f'Failed to compute MDs')
                    self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message

        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Exemplo n.º 11
0
    def delta(self, model, version, param, iformat='YAML', isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        if not self.loadYaml(model, version, isSpace):
            return False, 'file not found'

        # parse parameter file assuning it will be in
        # a YAML-compatible format
        if iformat == 'JSONS':
            try:
                newp = json.loads(param)
            except Exception as e:
                return False, e
        else:
            try:
                with open(param, 'r') as pfile:
                    if iformat == 'YAML':
                        newp = yaml.safe_load(pfile)
                    elif iformat == 'JSON':
                        newp = json.load(pfile)
            except Exception as e:
                return False, e

        self.applyDelta(newp)

        # # update interna dict with keys in the input file (delta)
        # black_list = ['param_format','version','model_path','endpoint','md5']
        # for key in newp:
        #     if key not in black_list:

        #         val = newp[key]

        #         # YAML define null values as 'None, which are interpreted
        #         # as strings
        #         if val == 'None':
        #             val = None

        #         if isinstance(val ,dict):
        #             for inner_key in val:
        #                 inner_val = val[inner_key]

        #                 if inner_val == 'None':
        #                     inner_val = None

        #                 self.setInnerVal(key, inner_key, inner_val)
        #                 #print ('@delta: adding',key, inner_key, inner_val)
        #         else:
        #             self.setVal(key,val)
        #             #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.p, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        # # correct CV to kfold for conformal models
        # if self.getVal('conformal') is True:
        #     self.setVal('ModelValidationCV','kfold')
        if self.getVal('model') == 'majority':
            self.setVal('conformal', False)

        # self.setVal('md5',utils.md5sum(parameters_file_name))
        self.setVal('md5', self.idataHash())

        return True, 'OK'
Exemplo n.º 12
0
    def delta(self, model, version, doc, iformat='YAML', isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # input is a string, either in JSON or YAML format
        # this is the typical input sent by

        if iformat not in ['JSON', 'JSONS', 'YAML', 'YAMLS']:
            return False, 'input format not recognized'

        if iformat == 'JSONS':
            try:
                newp = json.loads(doc)
            except Exception as e:
                return False, str(e)
        elif iformat == 'YAMLS':
            try:
                newp = yaml.load(doc)
            except Exception as e:
                return False, str(e)

        # input is a file, either in YAML or JSON format
        else:
            try:
                with open(doc, 'r') as pfile:
                    if iformat == 'YAML':
                        newp = yaml.safe_load(pfile)
                    elif iformat == 'JSON':
                        newp = json.load(pfile)
            except Exception as e:
                return False, str(e)

        # update interna dict with keys in the input file (delta)
        black_list = []
        for key in newp:
            if key not in black_list:
                val = newp[key]
                # YAML define null values as 'None, which are interpreted
                # as strings
                if val == 'None':
                    val = None
                if isinstance(val, dict):
                    for inner_key in val:
                        inner_val = val[inner_key]
                        if inner_val == 'None':
                            inner_val = None
                        self.setInnerVal(key, inner_key, inner_val)
                        #print ('@delta: adding',key, inner_key, inner_val)
                else:
                    self.setVal(key, val)
                    #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        parameters_file_name = os.path.join(parameters_file_path,
                                            'documentation.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.fields, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        self.setVal('md5', self.idataHash())

        return True, 'OK'
Exemplo n.º 13
0
    def run(self, input_source):
        ''' Executes a default chemical space building workflow '''

        # path to endpoint
        epd = utils.space_path(self.space, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find space {self.space}')
            #LOG.error(f'Unable to find space {self.space}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of  the processing applied to each space
            modpath = utils.smodule_path(self.space, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            slearn_child = importlib.import_module(modpath + ".slearn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # instantiate learn (build a space from idata) and run it
            try:
                slearn = slearn_child.SlearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Slearn child architecture mismatch, defaulting to Learn parent'
                )
                slearn = Slearn(self.param, self.conveyor)

            slearn.run()
            LOG.debug(
                f'slearn child {type(slearn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()