Пример #1
0
    def __init__(self, model, param_file=None, output_format=None):
        LOG.debug('Starting build...')
        self.model = model
        self.param = Parameters()
        self.conveyor = Conveyor()

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the model
            # directory and save changes to make them persistent
            success, message = self.param.delta(model, 0, param_file)
        else:
            # load parameter file at the model directory
            success, message = self.param.loadYaml(model, 0)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load model parameters. "{message}" Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)
Пример #2
0
def action_searches_result(label):
    '''
    try to retrieve the searches result with the label used as argument
    returns 
        - (False, Null) if it there is no directory or the search 
          pickle file cannot be found 
        
        - (True, JSON) with the results otherwyse
    '''
    opath = tempfile.gettempdir()
    if not os.path.isdir(opath):
        return False, f'directory {opath} not found'

    # default in case label was not provided
    if label is None:
        label = 'temp'

    iconveyor = Conveyor()

    search_pkl_path = os.path.join(opath, 'similars-' + label + '.pkl')
    if not os.path.isfile(search_pkl_path):
        return False, f'file {search_pkl_path} not found'

    with open(search_pkl_path, 'rb') as handle:
        success, message = iconveyor.load(handle)

    if not success:
        print(f'error reading prediction results with message {message}')
        return False, None

    if not iconveyor.isKey('search_results'):
        return False, 'search results not found'

    results = iconveyor.getVal('search_results')
    names = iconveyor.getVal('obj_nam')
    if iconveyor.isKey('SMILES'):
        smiles = iconveyor.getVal('SMILES')
    if len(results) != len(names):
        return False, 'results length does not match names'

    for i in range(len(results)):
        if iconveyor.isKey('SMILES'):
            print(f'similars to {names[i]} [{smiles[i]}]')
        else:
            print(f'similars to {names[i]}')

        iresult = results[i]
        for j in range(len(iresult['distances'])):
            dist = iresult['distances'][j]
            name = iresult['names'][j]
            smil = iresult['SMILES'][j]
            print(f'   {dist:.3f} : {name} [{smil}]')

    # return a JSON generated by iconveyor
    return True, iconveyor.getJSON()
Пример #3
0
    def __init__(self,
                 space,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting sbuild...')
        self.space = space
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('slearn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID', utils.id_generator())
        LOG.debug(
            f'Generated new space with modelID: {self.conveyor.getMeta("modelID")}'
        )

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the space
            # directory and save changes to make them persistent
            success, message = self.param.delta(space,
                                                0,
                                                param_file,
                                                iformat='YAML',
                                                isSpace=True)

        elif param_string is not None:
            success, message = self.param.delta(space,
                                                0,
                                                param_string,
                                                iformat='JSONS',
                                                isSpace=True)

        else:
            # load parameter file at the space directory
            success, message = self.param.loadYaml(space, 0, isSpace=True)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {message}. Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)
Пример #4
0
def action_predictions_result (label, output='text'):
    '''
    try to retrieve the prediction result with the label used as argument
    returns 
        - (False, Null) if it there is no directory or the predictions 
          pickle files cannot be found 
        
        - (True, object) with the results otherwyse
    '''
    # get de model repo path
    predictions_path = pathlib.Path(utils.predictions_repository_path())

    label_path = predictions_path.joinpath(label)

    if not os.path.isdir(label_path):
        if output != 'text':
            return False, {'code':0, 'message': f'directory {label_path} not found'}
        print (f'directory {label_path} not found')
        return False, None

    result_path = label_path.joinpath('prediction-results.pkl')
    if not result_path.is_file():
        if output != 'text':
            return False, {'code':0, 'message': f'predictions not found for {label} directory'}
        print (f'predictions not found for {label} directory')
        return False, None

    iconveyor = Conveyor()

    with open(result_path, 'rb') as handle:
        success, message = iconveyor.load(handle)

    if not success:
        if output != 'text':
            return False, {'code':1, 'message': f'error reading prediction results with message {message}'}
        print (f'error reading prediction results with message {message}')
        return False, None

    # console output    
    print_prediction_result(('obj_num','number of objects',iconveyor.getVal('obj_num')))

    if iconveyor.isKey('external-validation'):
        for val in iconveyor.getVal('external-validation'):
            print_prediction_result (val)   

    if iconveyor.isKey('values'):
        for i in range (iconveyor.getVal('obj_num')):
            print (iconveyor.getVal('obj_nam')[i], '\t', float("{0:.4f}".format(iconveyor.getVal('values')[i])))

    # return iconveyor
    return True, iconveyor
Пример #5
0
def action_results(model, version=None, ouput_variables=False):
    ''' Returns a JSON with whole results info for a given model and version '''

    if model is None:
        return False, 'Empty model label'

    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        return False, 'results not found'

    from flame.conveyor import Conveyor

    conveyor = Conveyor()
    with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
        conveyor.load(handle)

    return True, conveyor.getJSON()
Пример #6
0
def action_results(model, version=None, ouput_variables=False):
    ''' Returns an object with whole results info for a given model and version '''

    if model is None:
        return False, {'code': 1, 'message': 'Empty model label'}

    results_path = utils.model_path(model, version)
    results_file = os.path.join(results_path, 'model-results.pkl')

    if not os.path.isfile(results_file):
        return False, {'code': 0, 'message': 'Results file not found'}

    conveyor = Conveyor()

    with open(results_file, 'rb') as handle:
        conveyor.load(handle)

    return True, conveyor
Пример #7
0
    def load_results(self):
        '''
            Load results pickle with model information
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path, 'results.pkl')
        self.conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')

        try:
            with open(results_file_name, "rb") as input_file:
                self.conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at:
            # {results_file_name}')
            raise e
Пример #8
0
    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting search...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('sapply')

        # load modelID
        path = utils.space_path(space, version)
        meta = os.path.join(path, 'space-meta.pkl')
        try:
            with open(meta, 'rb') as handle:
                modelID = pickle.load(handle)
        except:
            LOG.critical(f'Unable to load modelID from {meta}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', modelID)
        LOG.debug(f'Loaded space with modelID: {modelID}')

        # assign prediction (search) label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        success, results = self.param.loadYaml(space, version, isSpace=True)
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return
Пример #9
0
    def __init__(self, model, version, output_format=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        if not self.param.loadYaml(model, version):
            LOG.critical('Unable to load model parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
        return
Пример #10
0
    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('apply')

        # load modelID
        success, result = utils.getModelID(model, version, 'model')
        if not success:
            LOG.critical(f'{result}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', result)
        LOG.debug(f'Loaded model with modelID: {result}')

        # assign prediction label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        success, results = self.param.loadYaml(model, version)
        if not success:
            LOG.critical(
                f'Unable to load model parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

            if 'ghost' in output_format:
                self.param.setVal('output_similar', False)

        return
Пример #11
0
    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        if not self.param.loadYaml(space, version, isSpace=True):
            LOG.critical('Unable to load space parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return
Пример #12
0
class Documentation:
    ''' Class storing the information needed to documentate models
        Fields are loaded from a YAML file (documentation.yaml)

        ...

        Attributes
        ----------

        fields : dict
            fields in the documentation
        version : int
            documentation version


        Methods
        -------

        load_parameters()
            Accesses to param file to retrieve all
            information needed to document the model.
        load_results()
            Accesses to build results to retrieve all
            information needed to document the model.
        assign_parameters()
            Fill documentation values corresponding to
             model parameter values
        assign_results()
            Assign result values to documentation fields
        get_upf_template()
            creates a spreedsheet QMRF-like
        get_prediction_template()
            Creates a reporting document for predictions
    
        '''
    def __init__(self, model, version=0, context='model'):
        ''' Load the fields from the documentation file'''

        self.model = model
        self.version = version
        self.fields = None
        self.parameters = Parameters()
        self.conveyor = None

        # obtain the path and the default name of the model documents
        documentation_file_path = utils.model_path(self.model, self.version)
        documentation_file_name = os.path.join(documentation_file_path,
                                               'documentation.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(documentation_file_name):
            raise Exception('Documentation file not found')

        try:
            with open(documentation_file_name, 'r') as documentation_file:
                self.fields = yaml.safe_load(documentation_file)
        except Exception as e:
            # LOG.error(f'Error loading documentation file with exception: {e}')
            raise e

        success, message = self.parameters.loadYaml(model, 0)

        if not success:
            print(
                'Parameters could not be loaded. Please assure endpoint is correct'
            )
            return

        # Remove this after acc
        #self.load_parameters()
        if context == 'model':
            self.load_results()
            self.assign_parameters()
            self.assign_results()
            self.setVal('md5', self.idataHash())

    def delta(self, model, version, doc, iformat='YAML', isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # if not self.loadYaml(model, version, isSpace):
        #     return False, 'file not found'

        # parse parameter file assuning it will be in
        # a YAML-compatible format
        if iformat == 'JSONS':
            try:
                newp = json.loads(doc)
            except Exception as e:
                return False, e
        else:
            try:
                with open(doc, 'r') as pfile:
                    if iformat == 'YAML':
                        newp = yaml.safe_load(pfile)
                    elif iformat == 'JSON':
                        newp = json.load(pfile)
            except Exception as e:
                return False, e

        # update interna dict with keys in the input file (delta)
        black_list = []
        for key in newp:
            if key not in black_list:
                val = newp[key]
                # YAML define null values as 'None, which are interpreted
                # as strings
                if val == 'None':
                    val = None
                if isinstance(val, dict):
                    for inner_key in val:
                        inner_val = val[inner_key]
                        if inner_val == 'None':
                            inner_val = None
                        self.setInnerVal(key, inner_key, inner_val)
                        #print ('@delta: adding',key, inner_key, inner_val)
                else:
                    self.setVal(key, val)
                    #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        parameters_file_name = os.path.join(parameters_file_path,
                                            'documentation.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.fields, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        self.setVal('md5', self.idataHash())

        return True, 'OK'

    def load_results(self):
        '''
            Load results pickle with model information
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path, 'results.pkl')
        self.conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')

        try:
            with open(results_file_name, "rb") as input_file:
                self.conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at:
            # {results_file_name}')
            raise e

    def getVal(self, key):
        ''' Return the value of the key parameter or None if it is
            not found in the parameters dictionary
        '''
        if not key in self.fields:
            return None

        if 'value' in self.fields[key]:
            return self.fields[key]['value']
        return None

    def getDict(self, key):
        ''' Return the value of the key parameter or None if it ises.
            not found in the parameters dictionary
        '''
        d = {}
        if not key in self.fields:
            return d

        element = self.fields[key]['value']
        if isinstance(element, dict):
            # iterate keys and copy to the temp dictionary
            # the key and the content of 'value'
            for k, v in element.items():
                if 'value' in v:
                    d[k] = v['value']
        return d

    def setVal(self, key, value):
        ''' Sets the parameter defined by key to the given value
        '''
        # for existing keys, replace the contents of 'value'
        if key in self.fields:
            if "value" in self.fields[key]:
                if not isinstance(self.fields[key]['value'], dict):
                    self.fields[key]["value"] = value
                else:
                    # print(key)
                    for k in value.keys():
                        self.fields[key][k] = value[k]

        # for new keys, create a new element with 'value' key
        else:
            self.fields[key] = {'value': value}

    def setInnerVal(self, okey, ikey, value):
        ''' Sets a parameter within an internal dictionary. The entry is defined
            by a key of the outer dictionary (okey) and a second key in the inner
            dicctionary (ikey). The paramenter will be set to the given value

            This function test the existence of all the keys and dictionaries to 
            prevent crashes and returns without setting the value if any error is 
            found
        '''
        if not okey in self.fields:
            return

        if not "value" in self.fields[okey]:
            return

        odict = self.fields[okey]['value']

        if not isinstance(odict, dict):
            return

        if not ikey in odict:
            return
        if not isinstance(odict[ikey], dict):
            odict['value'] = value
            return
        if "value" in odict[ikey]:
            odict[ikey]["value"] = value
        else:
            odict[ikey] = {'value': value}

    def appVal(self, key, value):
        ''' Appends value to the end of existing key list 
        '''

        if not key in self.fields:
            return

        if "value" in self.fields[key]:
            vt = self.fields[key]['value']

            # if the key is already a list, append the new value at the end
            if isinstance(vt, list):
                self.fields[key]['value'].append(value)
            # ... otherwyse, create a list with the previous content and the
            # new value
            else:
                self.fields[key]['value'] = [vt, value]

    def dumpJSON(self):
        return json.dumps(self.fields)

    def assign_parameters(self):
        '''
            Fill documentation values corresponding to model parameter values
        '''

        if not self.parameters:
            raise ('Parameters were not loaded')

        # self.fields['Algorithm']['subfields']['algorithm']['value'] = \
        #     self.parameters.getVal('model')
        self.setInnerVal('Algorithm', 'algorithm',
                         self.parameters.getVal('model'))
        self.setInnerVal('Algorithm', 'descriptors',
                         self.parameters.getVal('computeMD_method'))
        if self.parameters.getVal('conformal'):
            self.setInnerVal('AD_method', 'name', 'conformal prediction')
            self.setVal(
                'AD_parameters', f'Conformal Significance '
                f'{self.parameters.getVal("conformalSignificance")}')

    def assign_results(self):
        '''
            Assign result values to documentation fields
        '''
        # Accepted validation keys
        allowed = [
            'Conformal_accuracy', 'Conformal_mean_interval', 'Sensitivity',
            'Specificity', 'MCC', 'Conformal_coverage', 'Conformal_accuracy',
            'Q2', 'SDEP', 'SensitivityPed', 'SpecificityPred',
            'SpecificityPred', 'MCCpred', 'scoringR', 'R2', 'SDEC'
        ]
        model_info = self.conveyor.getVal('model_build_info')
        validation = self.conveyor.getVal('model_valid_info')

        # The code below to filter the hyperparameters to be
        # reported.

        # Get parameter keys for the used estimator
        #param_key = self.parameters.getVal('model') + '_parameters'
        # Get parameter dictionary
        #estimator_params = self.parameters.getDict(param_key)

        self.fields['Algorithm_settings']['value'] = \
            (self.conveyor.getVal('estimator_parameters'))

        # Horrendous patch to solve backcompatibility problem
        if 'subfields' in self.fields['Data_info']:
            sub_label = 'subfields'
        else:
            sub_label = 'value'

        self.fields['Data_info']\
            [sub_label]['training_set_size']['value'] = \
            model_info[0][2]

        self.fields['Data_info']\
            [sub_label]['training_set_size']['value'] = \
            model_info[0][2]

        self.fields['Descriptors']\
            [sub_label]['final_number']['value'] = \
            model_info[1][2]
        self.fields['Descriptors']\
            [sub_label]['ratio']['value'] = \
            '{:0.2f}'.format(model_info[1][2]/model_info[0][2])

        internal_val = dict()
        for stat in validation:
            if stat[0] in allowed:
                internal_val[stat[0]] = float("{0:.2f}".format(stat[2]))
        if internal_val:
            self.fields['Internal_validation_1']\
                ['value'] = internal_val

    def get_string(self, dictionary):
        '''
        Convert a dictionary (from documentation.yaml)
        to string format for the model template
        '''
        text = ''
        for key, val in dictionary.items():
            text += f'{key} : {val["value"]}\n'
        return text

    def get_string2(self, dictionary):
        '''
        Convert a dictionary (from parameter file) to 
        string format for the model template
        '''
        text = ''
        for key, val in dictionary.items():
            try:
                if isinstance(str(val), str):
                    text += f'{key} : {val}\n'
            except:
                continue

        return text

    def get_upf_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''

        template = pd.DataFrame()
        template['ID'] = ['']
        template['Version'] = ['']
        template['Description'] = ['']
        template['Contact'] = ['']
        template['Institution'] = ['']
        template['Date'] = ['']
        template['Endpoint'] = ['']
        template['Endpoint_units'] = ['']
        template['Dependent_variable'] = ['']
        template['Species'] = ['']
        template['Limits_applicability'] = ['']
        template['Experimental_protocol'] = ['']
        template['Data_info'] = [
            self.get_string(self.fields['Data_info']['subfields'])
        ]
        template['Model_availability'] = [\
            self.get_string(self.fields['Model_availability']
                            ['subfields'])]
        template['Algorithm'] = [
            self.get_string(self.fields['Algorithm']['subfields'])
        ]
        template['Software'] = [
            self.get_string(self.fields['Software']['subfields'])
        ]
        template['Descriptors'] = [
            self.get_string(self.fields['Descriptors']['subfields'])
        ]
        template['Algorithm_settings'] = [
            self.get_string(self.fields['Algorithm_settings']['subfields'])
        ]
        template['AD_method'] = [
            self.get_string(self.fields['AD_method']['subfields'])
        ]
        template['AD_parameters'] = [self.fields['AD_parameters']['value']]

        template['Goodness_of_fit_statistics'] = [self.fields\
                                ['Goodness_of_fit_statistics']['value']]
        template['Internal_validation_1'] = [
            self.fields['Internal_validation_1']['value']
        ]
        template.to_csv('QMRF_template.tsv', sep='\t')

    def get_upf_template2(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        fields = ['ID', 'Version', 'Contact', 'Institution',\
            'Date', 'Endpoint', 'Endpoint_units', 'Dependent_variable', 'Species',\
                'Limits_applicability', 'Experimental_protocol', 'Data_info',\
                    'Model_availability', 'Algorithm', 'Software', 'Descriptors',\
                        'Algorithm_settings', 'AD_method', 'AD_parameters',\
                            'Goodness_of_fit_statistics', 'Internal_validation_1' ]
        template = pd.DataFrame(
            columns=['Field', 'Parameter name', 'Parameter value'])
        for field in fields:
            try:
                subfields = self.fields[field]['subfields']
            except:
                subfields = self.fields[field]['value']
            if subfields is not None:
                for index, subfield in enumerate(subfields):
                    field2 = ''
                    if index == 0:
                        field2 = field
                    else:
                        field2 = ""
                    value = str(subfields[subfield]['value'])
                    # None types are retrieved as str from yaml??
                    if value == "None":
                        value = ""
                    row = dict(zip(['Field', 'Parameter name', 'Parameter value'],\
                        [field2, subfield, value]))
                    template = template.append(row, ignore_index=True)
            else:
                value = str(self.fields[field]['value'])
                if value == 'None':
                    value = ""
                row = dict(zip(['Field', 'Parameter name', 'Parameter value'],\
                    [field, "", value]))
                template = template.append(row, ignore_index=True)
        template.to_csv('QMRF_template3.tsv', sep='\t', index=False)

    def get_prediction_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path,
                                         'prediction-results.pkl')
        conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')
        try:
            with open(results_file_name, "rb") as input_file:
                conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at: {results_file_name}')
            raise e

        # First get Name, Inchi and InChIkey

        names = conveyor.getVal('obj_nam')
        smiles = conveyor.getVal('SMILES')
        inchi = [AllChem.MolToInchi(AllChem.MolFromSmiles(m)) for m in smiles]
        inchikeys = [
            AllChem.InchiToInchiKey(
                AllChem.MolToInchi(AllChem.MolFromSmiles(m))) for m in smiles
        ]
        predictions = []
        applicability = []
        if self.parameters['quantitative']['value']:
            raise ('Prediction template for quantitative endpoints'
                   ' not implemented yet')
        if not self.parameters['conformal']['value']:
            predictions = conveyor.getVal('values')
        else:
            c0 = np.asarray(conveyor.getVal('c0'))
            c1 = np.asarray(conveyor.getVal('c1'))

            predictions = []
            for i, j in zip(c0, c1):
                prediction = ''
                if i == j:
                    prediction = 'out of AD'
                    applicability.append('out')
                if i != j:
                    if i == True:
                        prediction = 'Inactive'
                    else:
                        prediction = 'Active'
                    applicability.append('in')

                predictions.append(prediction)

        # Now create the spreedsheats for prediction

        # First write summary
        summary = ("Study name\n" + "Endpoint\n" + "QMRF-ID\n" +
                   "(Target)Compounds\n" +
                   "Compounds[compounds]\tName\tInChiKey\n")

        for name, inch in zip(names, inchikeys):
            summary += f'\t{name}\t{inch}\n'

        summary += ("\nFile\n" + "Author name\n" + "E-mail\n" + "Role\n" +
                    "Affiliation\n" + "Date\n")

        with open('summary_document.tsv', 'w') as out:
            out.write(summary)

        # Now prediction details
        # Pandas is used to ease the table creation.

        reporting = pd.DataFrame()

        reporting['InChI'] = inchi
        reporting['CAS-RN'] = '-'
        reporting['SMILES'] = smiles
        reporting['prediction'] = predictions
        reporting['Applicability_domain'] = applicability
        reporting['reliability'] = '-'
        reporting['Structural_analogue_1_CAS'] = '-'
        reporting['Structural_analogue_1_smiles'] = '-'
        reporting['Structural_analogue_1_source'] = '-'
        reporting['Structural_analogue_1_experimental_value'] = '-'
        reporting['Structural_analogue_2_CAS'] = '-'
        reporting['Structural_analogue_2_smiles'] = '-'
        reporting['Structural_analogue_2_source'] = '-'
        reporting['Structural_analogue_2_experimental_value'] = '-'
        reporting['Structural_analogue_3_CAS'] = '-'
        reporting['Structural_analogue_3_smiles'] = '-'
        reporting['Structural_analogue_3_source'] = '-'
        reporting['Structural_analogue_3_experimental_value'] = '-'

        reporting.to_csv('prediction_report.tsv', sep='\t', index=False)

    def idataHash(self):
        ''' Create a md5 hash for a number of keys describing parameters
            relevant for idata

            This hash is compared between runs, to check wether idata must
            recompute or not the MD 
        '''

        # update with any new idata relevant parameter
        keylist = [
            'SDFile_name', 'SDFile_activity', 'SDFile_experimental',
            'normalize_method', 'ionize_method', 'convert3D_method',
            'computeMD_method', 'TSV_varnames', 'TSV_objnames', 'TSV_activity',
            'input_type'
        ]

        idata_params = []
        for i in keylist:
            idata_params.append(self.getVal(i))

        # MD_settings is a dictionary, obtain and sort the keys+values
        md_params = self.getDict('MD_settings')
        md_list = []
        for key in md_params:
            # combine key + value in a single string
            md_list.append(key + str(md_params[key]))

        idata_params.append(md_list.sort())

        # use picke as a buffered object, neccesary to generate the hexdigest
        p = pickle.dumps(idata_params)
        return hashlib.md5(p).hexdigest()
Пример #13
0
class Predict:

    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('apply')

        # load modelID
        success, result = utils.getModelID(model, version, 'model')
        if not success:
            LOG.critical(f'{result}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', result)
        LOG.debug (f'Loaded model with modelID: {result}')

        # assign prediction label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                    'method', 'single',
                    'Label used to identify the prediction')

        success, results = self.param.loadYaml(model, version)
        if not success:
            LOG.critical(f'Unable to load model parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
            if 'ghost' in output_format:
                self.param.setVal('output_similar', False)

        return

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        
        # if not os.path.isdir(endpoint):
        #     self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}')
        #     #LOG.error(f'Unable to find model {self.model}')

        # if not self.conveyor.getError():
        # uses the child classes within the 'model' folder,
        # to allow customization of
        # the processing applied to each model
        modpath = utils.module_path(self.model, self.version)

        idata_child = importlib.import_module(modpath+".idata_child")
        apply_child = importlib.import_module(modpath+".apply_child")
        odata_child = importlib.import_module(modpath+".odata_child")

        # run idata object, in charge of generate model data from input
        try:
            idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
        except:
            LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
            idata = Idata(self.param, self.conveyor, input_source)

        idata.run()
        LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_apply()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        # for secret models avoid searching similar compounds
        space_pkl = os.path.join(endpoint,'space.pkl')
        if not os.path.isfile(space_pkl):
            self.param.setVal('output_similar', False)

        if not self.conveyor.getError():
            if self.param.getVal('output_similar') is True:

                from flame.sapply import Sapply

                metric = self.param.getVal('similarity_metric')
                numsel = self.param.getVal('similarity_cutoff_num')
                cutoff = self.param.getVal('similarity_cutoff_distance')
                
                # sapply = Sapply(self.param, self.conveyor)

                sapply_child = importlib.import_module(modpath+".sapply_child")

                # run apply object, in charge of generate a prediction from idata
                try:
                    sapply = sapply_child.SapplyChild(self.param, self.conveyor)
                except:
                    LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                    sapply = Sapply(self.param, self.conveyor)

                sapply.run(cutoff, numsel, metric)
                LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                apply = apply_child.ApplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent')
                apply = Apply(self.param, self.conveyor)

            apply.run()
            LOG.debug(f'apply child {type(apply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #14
0
class Build:
    def __init__(self,
                 model,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting build...')
        self.model = model
        self.param = Parameters()
        self.conveyor = Conveyor()

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the model
            # directory and save changes to make them persistent
            success, message = self.param.delta(model,
                                                0,
                                                param_file,
                                                iformat='YAML')

        elif param_string is not None:
            success, message = self.param.delta(model,
                                                0,
                                                param_string,
                                                iformat='JSONS')

        else:
            # load parameter file at the model directory
            success, message = self.param.loadYaml(model, 0)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load model parameters. "{message}" Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        epd = utils.model_path(self.model, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find model {self.model}')
            #LOG.error(f'Unable to find model {self.model}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'model' folder,
            # to allow customization of  the processing applied to each model
            modpath = utils.module_path(self.model, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            learn_child = importlib.import_module(modpath + ".learn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate model data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)
            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

            if not self.conveyor.isKey('ymatrix'):
                self.conveyor.setError(
                    f'No activity data (Y) found in training series')

        if not self.conveyor.getError():
            # instantiate learn (build a model from idata) and run it
            learn = learn_child.LearnChild(self.param, self.conveyor)
            learn.run()

            try:
                learn = learn_child.LearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Learn child architecture mismatch, defaulting to Learn parent'
                )
                learn = Learn(self.param, self.conveyor)

            LOG.debug(f'learn child {type(learn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #15
0
    def get_prediction_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path,
                                         'prediction-results.pkl')
        conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')
        try:
            with open(results_file_name, "rb") as input_file:
                conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at: {results_file_name}')
            raise e

        # First get Name, Inchi and InChIkey

        names = conveyor.getVal('obj_nam')
        smiles = conveyor.getVal('SMILES')
        inchi = [AllChem.MolToInchi(AllChem.MolFromSmiles(m)) for m in smiles]
        inchikeys = [
            AllChem.InchiToInchiKey(
                AllChem.MolToInchi(AllChem.MolFromSmiles(m))) for m in smiles
        ]
        predictions = []
        applicability = []
        if self.parameters['quantitative']['value']:
            raise ('Prediction template for quantitative endpoints'
                   ' not implemented yet')
        if not self.parameters['conformal']['value']:
            predictions = conveyor.getVal('values')
        else:
            c0 = np.asarray(conveyor.getVal('c0'))
            c1 = np.asarray(conveyor.getVal('c1'))

            predictions = []
            for i, j in zip(c0, c1):
                prediction = ''
                if i == j:
                    prediction = 'out of AD'
                    applicability.append('out')
                if i != j:
                    if i == True:
                        prediction = 'Inactive'
                    else:
                        prediction = 'Active'
                    applicability.append('in')

                predictions.append(prediction)

        # Now create the spreedsheats for prediction

        # First write summary
        summary = ("Study name\n" + "Endpoint\n" + "QMRF-ID\n" +
                   "(Target)Compounds\n" +
                   "Compounds[compounds]\tName\tInChiKey\n")

        for name, inch in zip(names, inchikeys):
            summary += f'\t{name}\t{inch}\n'

        summary += ("\nFile\n" + "Author name\n" + "E-mail\n" + "Role\n" +
                    "Affiliation\n" + "Date\n")

        with open('summary_document.tsv', 'w') as out:
            out.write(summary)

        # Now prediction details
        # Pandas is used to ease the table creation.

        reporting = pd.DataFrame()

        reporting['InChI'] = inchi
        reporting['CAS-RN'] = '-'
        reporting['SMILES'] = smiles
        reporting['prediction'] = predictions
        reporting['Applicability_domain'] = applicability
        reporting['reliability'] = '-'
        reporting['Structural_analogue_1_CAS'] = '-'
        reporting['Structural_analogue_1_smiles'] = '-'
        reporting['Structural_analogue_1_source'] = '-'
        reporting['Structural_analogue_1_experimental_value'] = '-'
        reporting['Structural_analogue_2_CAS'] = '-'
        reporting['Structural_analogue_2_smiles'] = '-'
        reporting['Structural_analogue_2_source'] = '-'
        reporting['Structural_analogue_2_experimental_value'] = '-'
        reporting['Structural_analogue_3_CAS'] = '-'
        reporting['Structural_analogue_3_smiles'] = '-'
        reporting['Structural_analogue_3_source'] = '-'
        reporting['Structural_analogue_3_experimental_value'] = '-'

        reporting.to_csv('prediction_report.tsv', sep='\t', index=False)
Пример #16
0
class Documentation:
    ''' Class storing the information needed to documentate models
        Fields are loaded from a YAML file (documentation.yaml)

        ...

        Attributes
        ----------

        fields : dict
            fields in the documentation
        version : int
            documentation version


        Methods
        -------

        load_parameters()
            Accesses to param file to retrieve all
            information needed to document the model.
        load_results()
            Accesses to build results to retrieve all
            information needed to document the model.
        assign_parameters()
            Fill documentation values corresponding to
             model parameter values
        assign_results()
            Assign result values to documentation fields
        get_upf_template()
            creates a spreedsheet QMRF-like
        get_prediction_template()
            Creates a reporting document for predictions
    
        '''
    def __init__(self, model, version=0, context='model'):
        ''' Load the fields from the documentation file'''

        self.model = model
        self.version = version
        self.fields = None
        self.parameters = Parameters()
        self.conveyor = None

        # obtain the path and the default name of the model documents
        documentation_file_path = utils.model_path(self.model, self.version)
        documentation_file_name = os.path.join(documentation_file_path,
                                               'documentation.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(documentation_file_name):
            raise Exception('Documentation file not found')

        try:
            with open(documentation_file_name, 'r') as documentation_file:
                self.fields = yaml.safe_load(documentation_file)
        except Exception as e:
            # LOG.error(f'Error loading documentation file with exception: {e}')
            raise e

        success, message = self.parameters.loadYaml(model, version)

        if not success:
            print(
                f'Parameters could not be loaded. {message}. Please make sure endpoint and version are correct'
            )
            return

        # Remove this after acc
        #self.load_parameters()
        if context == 'model':
            self.load_results()
            self.assign_parameters()
            self.assign_results()
            self.autocomplete_documentation()
            self.setVal('md5', self.idataHash())

    def safe_copy(inputfile):
        ''' this function makes sure that the input file contains only printable chars

        '''

    def delta(self, model, version, doc, iformat='YAML', isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # input is a string, either in JSON or YAML format
        # this is the typical input sent by

        if iformat not in ['JSON', 'JSONS', 'YAML', 'YAMLS']:
            return False, 'input format not recognized'

        if iformat == 'JSONS':
            try:
                newp = json.loads(doc)
            except Exception as e:
                return False, str(e)
        elif iformat == 'YAMLS':
            try:
                newp = yaml.load(doc)
            except Exception as e:
                return False, str(e)

        # input is a file, either in YAML or JSON format
        else:
            try:
                with open(doc, 'r') as pfile:
                    if iformat == 'YAML':
                        newp = yaml.safe_load(pfile)
                    elif iformat == 'JSON':
                        newp = json.load(pfile)
            except Exception as e:
                return False, str(e)

        # update interna dict with keys in the input file (delta)
        black_list = []
        for key in newp:
            if key not in black_list:
                val = newp[key]
                # YAML define null values as 'None, which are interpreted
                # as strings
                if val == 'None':
                    val = None
                if isinstance(val, dict):
                    for inner_key in val:
                        inner_val = val[inner_key]
                        if inner_val == 'None':
                            inner_val = None
                        self.setInnerVal(key, inner_key, inner_val)
                        #print ('@delta: adding',key, inner_key, inner_val)
                else:
                    self.setVal(key, val)
                    #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        parameters_file_name = os.path.join(parameters_file_path,
                                            'documentation.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.fields, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        self.setVal('md5', self.idataHash())

        return True, 'OK'

    def load_results(self):
        '''
            Load results pickle with model information
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path,
                                         'model-results.pkl')
        self.conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')

        try:
            with open(results_file_name, "rb") as input_file:
                self.conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at:
            # {results_file_name}')
            raise e

    def getVal(self, key):
        ''' Return the value of the key parameter or None if it is
            not found in the parameters dictionary
        '''
        if not key in self.fields:
            return None

        if 'value' in self.fields[key]:
            return self.fields[key]['value']
        return None

    def getDict(self, key):
        ''' Return the value of the key parameter or None if it ises.
            not found in the parameters dictionary
        '''
        d = {}
        if not key in self.fields:
            return d

        element = self.fields[key]['value']
        if isinstance(element, dict):
            # iterate keys and copy to the temp dictionary
            # the key and the content of 'value'
            for k, v in element.items():
                if 'value' in v:
                    d[k] = v['value']
        return d

    def setVal(self, key, value):
        ''' Sets the parameter defined by key to the given value
        '''
        # for existing keys, replace the contents of 'value'
        if key in self.fields:
            if "value" in self.fields[key]:
                if not isinstance(self.fields[key]['value'], dict):
                    self.fields[key]["value"] = value
                # this should never happen, since value is never a dictionary
                # else:
                #     for k in value.keys():
                #         self.fields[key][k] = value[k]

        # this behaviour is deprecated, do not add new keys
        # # for new keys, create a new element with 'value' key
        # else:
        #     self.fields[key] = {'value': value}

    def setInnerVal(self, okey, ikey, value):
        ''' Sets a parameter within an internal dictionary. The entry is defined
            by a key of the outer dictionary (okey) and a second key in the inner
            dicctionary (ikey). The paramenter will be set to the given value

            This function test the existence of all the keys and dictionaries to 
            prevent crashes and returns without setting the value if any error is 
            found
        '''
        if not okey in self.fields:
            return

        if not "value" in self.fields[okey]:
            return

        odict = self.fields[okey]['value']

        if not isinstance(odict, dict):
            return

        # now we are sure that odict is the right inner dictionary
        if not ikey in odict:
            return

        # algorithm parameters not present in the template
        if not isinstance(odict[ikey], dict):
            odict['value'] = value
            return

        # keys present in the template
        if "value" in odict[ikey]:
            odict[ikey]["value"] = value

    def appVal(self, key, value):
        ''' Appends value to the end of existing key list 
        '''

        if not key in self.fields:
            return

        if "value" in self.fields[key]:
            vt = self.fields[key]['value']

            # if the key is already a list, append the new value at the end
            if isinstance(vt, list):
                self.fields[key]['value'].append(value)
            # ... otherwyse, create a list with the previous content and the
            # new value
            else:
                self.fields[key]['value'] = [vt, value]

    def dumpJSON(self):
        return json.dumps(self.fields, allow_nan=True)

    def dumpYAML(self):
        yaml_out = []

        order = [
            'ID', 'Version', 'Model_title', 'Model_description', 'Keywords',
            'Contact', 'Institution', 'Date', 'Endpoint', 'Endpoint_units',
            'Interpretation', 'Dependent_variable', 'Species',
            'Limits_applicability', 'Experimental_protocol',
            'Model_availability', 'Data_info', 'Algorithm', 'Software',
            'Descriptors', 'Algorithm_settings', 'AD_method', 'AD_parameters',
            'Goodness_of_fit_statistics', 'Internal_validation_1',
            'Internal_validation_2', 'External_validation', 'Comments',
            'Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates',
            'QMRF_updates', 'References', 'QMRF_same_models',
            'Mechanistic_basis', 'Mechanistic_references',
            'Supporting_information', 'Comment_on_the_endpoint',
            'Endpoint_data_quality_and_variability', 'Descriptor_selection'
        ]

        for ik in order:
            if ik in self.fields:
                k = ik
                v = self.fields[k]

                ivalue = ''
                idescr = ''
                ioptio = ''

                ## newest parameter formats are extended and contain
                ## rich metainformation for each entry
                if 'value' in v:
                    if not isinstance(v['value'], dict):
                        ivalue = v['value']
                    else:
                        # print header of dictionary
                        yaml_out.append(f'{k} :')

                        # iterate keys assuming existence of value and description
                        for intk in v['value']:
                            intv = v['value'][intk]
                            if not isinstance(intv, dict):
                                yaml_out.append(
                                    f'   {intk:27} : {str(intv):30}'
                                )  #{iioptio} {iidescr}')

                            else:
                                #print(intk)
                                intv = v['value'][intk]

                                iivalue = ''
                                if "value" in intv:
                                    iivalue = intv["value"]
                                # else:
                                #     iivalue = intv

                                iidescr = ''
                                if "description" in intv and intv[
                                        "description"] is not None:
                                    iidescr = intv["description"]

                                iioptio = ''
                                if 'options' in intv:
                                    toptio = intv['options']

                                    if isinstance(toptio, list):
                                        if toptio != [None]:
                                            iioptio = f' {toptio}'

                                if isinstance(iivalue, float):
                                    iivalue = f'{iivalue:f}'
                                elif iivalue is None:
                                    iivalue = ''

                                yaml_out.append(
                                    f'   {intk:27} : {str(iivalue):30} #{iioptio} {iidescr}'
                                )

                        continue

                    if 'description' in v:
                        idescr = v['description']

                    if 'options' in v:
                        toptio = v['options']

                        if isinstance(toptio, list):
                            ioptio = f' {toptio}'

                yaml_out.append(
                    f'{k:30} : {str(ivalue):30} #{ioptio} {idescr}')

        return (yaml_out)

    def dumpExcel(self, oname):

        # openpyxl should be installed in the environment
        # pip install openpyxl

        from openpyxl import Workbook
        from openpyxl.styles import Font, NamedStyle, Alignment
        # from openpyxl.comments import Comment

        wb = Workbook()
        ws = wb.active
        ws.title = f"Model {self.model} documentation"
        alignment_style = Alignment(vertical='top', wrapText=True)

        # Label Style
        Label = NamedStyle(name="Label")
        Label.font = Font(name='Calibri', size=11, bold=True)
        Label.alignment = alignment_style

        ws.column_dimensions['A'].width = 25.10
        ws.column_dimensions['B'].width = 28.00
        ws.column_dimensions['C'].width = 60.00
        ws.column_dimensions['D'].width = 60.00

        # sections of the document, specifying the document keys which will be listed
        sections = [
            ('General model information', [
                'ID', 'Version', 'Model_title', 'Model_description',
                'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint',
                'Endpoint_units', 'Interpretation', 'Dependent_variable',
                'Species', 'Limits_applicability', 'Experimental_protocol',
                'Model_availability', 'Data_info'
            ]),
            ('Algorithm and software', [
                'Algorithm', 'Software', 'Descriptors', 'Algorithm_settings',
                'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics',
                'Internal_validation_1', 'Internal_validation_2',
                'External_validation', 'Comments'
            ]),
            ('Other information', [
                'Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates',
                'QMRF_updates', 'References', 'QMRF_same_models',
                'Mechanistic_basis', 'Mechanistic_references',
                'Supporting_information', 'Comment_on_the_endpoint',
                'Endpoint_data_quality_and_variability', 'Descriptor_selection'
            ])
        ]

        #Save the position and name of the label for the first and last section
        position = []
        name = [sections[0][1][0], 'Other Comments']

        count = 1
        for isection in sections:

            for ik in isection[1]:

                label_k = ik.replace('_', ' ')

                if label_k == 'Internal validation 2' or label_k == 'External validation':
                    ws[f"A{count}"] = label_k
                    ws[f'A{count}'].style = Label
                else:
                    ws[f"B{count}"] = label_k
                    ws[f"B{count}"].style = Label

                if ik in self.fields:
                    # set defaults for value
                    ivalue = ''
                    #v is the selected entry in the documentation dictionary
                    v = self.fields[ik]
                    ## newest parameter formats are extended and contain
                    ## rich metainformation for each entry
                    if 'value' in v:
                        ivalue = v['value']

                        if isinstance(ivalue, dict):

                            ws[f"A{count}"] = label_k
                            ws[f"A{count}"].style = Label

                            end = (count) + (len(ivalue) - 1)

                            for intk in ivalue:
                                label_ik = intk.replace('_', ' ')
                                # label_ik = intk.replace('_f', '').replace('_', ' ')
                                ws[f'B{count}'] = label_ik
                                ws[f'B{count}'].style = Label

                                intv = ivalue[intk]
                                if not isinstance(intv, dict):

                                    iivalue = intv
                                    if iivalue is None:
                                        iivalue = " "
                                else:
                                    intv = ivalue[intk]
                                    iivalue = ''
                                    if 'value' in intv:
                                        iivalue = intv["value"]
                                    if iivalue is None:
                                        iivalue = ''

                                    ws[f'D{count}'] = intv['description']
                                    ws[f'D{count}'].alignment = alignment_style

                                ws[f'C{count}'] = f'{str(iivalue)}'
                                ws[f'C{count}'].font = Font(name='Calibri',
                                                            size=11,
                                                            color='3465a4')
                                ws[f'C{count}'].alignment = alignment_style

                                ws.merge_cells(f'A{count}:A{end}')

                                count += 1

                        else:

                            ws[f'D{count}'] = v['description']
                            ws[f'D{count}'].alignment = alignment_style

                            if label_k == 'Experimental protocol' or label_k == 'Comments':
                                position.append(count)

                            if ivalue is None:
                                ivalue = ''

                            ws[f'C{count}'] = f'{str(ivalue)}'
                            ws[f'C{count}'].font = Font(name='Calibri',
                                                        size=11,
                                                        color='3465a4')
                            ws[f'C{count}'].alignment = alignment_style

                            count += 1

        itr = 0
        for i in position:
            if itr == 0:
                ws[f'A{1}'] = name[itr]
                ws[f"A{1}"].style = Label
                ws.merge_cells(f'A{1}:A{i}')
            else:
                ws[f'A{i}'] = name[itr]
                ws[f"A{i}"].style = Label
                ws.merge_cells(f'A{i}:A{count-1}')

            itr += 1

        try:
            wb.save(oname)
        except:
            return False, f'error saving document as {oname}'

        return True, 'OK'

    def dumpWORD(self, oname):

        # python-docx should be installed in the environment
        # pip install python-docx

        from docx import Document
        from docx.shared import Pt
        from docx.shared import RGBColor

        # most of the formatting is included in this template, where we
        # redefined default styles for Normal, 'heading 1' and 'Table Grid'
        #
        # note that this template can be easily customized with a company
        # or project logo
        path = os.path.dirname(os.path.abspath(__file__))
        path = os.path.join(path, 'children')
        path = os.path.join(path, 'documentation_template.docx')
        document = Document(path)

        # define style for normal and heading 1
        # normal_style = document.styles['Normal']
        # normal_font = normal_style.font
        # normal_font.name = 'Calibri'
        # normal_font.size = Pt(10)

        # heading_style = document.styles['heading 1']
        # heading_font = heading_style.font
        # heading_font.name = 'Calibri'
        # heading_font.color.rgb = RGBColor(0x00, 0x00, 0x00)
        # heading_font.size = Pt(12)

        # withd of column 1 and 2
        wcol1 = 1400000
        wcol2 = 4200000

        # withd of internal columns i and 2
        wicol1 = 1200000
        wicol2 = 2900000

        # sections of the document, specifying the document keys which will be listed
        sections = [
            ('General model information', [
                'ID', 'Version', 'Model_title', 'Model_description',
                'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint',
                'Endpoint_units', 'Interpretation', 'Dependent_variable',
                'Species', 'Limits_applicability', 'Experimental_protocol',
                'Model_availability', 'Data_info'
            ]),
            ('Algorithm and software', [
                'Algorithm', 'Software', 'Descriptors', 'Algorithm_settings',
                'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics',
                'Internal_validation_1', 'Internal_validation_2',
                'External_validation', 'Comments'
            ]),
            ('Other information', [
                'Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates',
                'QMRF_updates', 'References', 'QMRF_same_models',
                'Mechanistic_basis', 'Mechanistic_references',
                'Supporting_information', 'Comment_on_the_endpoint',
                'Endpoint_data_quality_and_variability', 'Descriptor_selection'
            ])
        ]

        for isection in sections:
            # heading with the section name
            document.add_heading(isection[0], level=1)

            # table with one row per key
            table = document.add_table(rows=len(isection[1]), cols=2)
            table.style = 'Table Grid'
            table.autofit = False

            count = 0
            for ik in isection[1]:
                # add a row and format two columns
                row = table.rows[count]
                row.cells[0].width = wcol1
                row.cells[1].width = wcol2

                label_k = ik.replace('_', ' ')
                row.cells[0].text = f'{label_k}'
                count = count + 1

                # define value
                if ik in self.fields:

                    # set defaults for value
                    ivalue = ''

                    # v is the selected entry in the documentation dictionary
                    v = self.fields[ik]

                    ## newest parameter formats are extended and contain
                    ## rich metainformation for each entry
                    if 'value' in v:
                        ivalue = v['value']

                        # if ivalue is a dictionary create a nested table and iterate
                        # to represent the keys within
                        if isinstance(ivalue, dict):

                            row.cells[0].text = f'{label_k}'
                            itable = row.cells[1].add_table(rows=len(ivalue),
                                                            cols=2)
                            itable.style = 'Table Grid'
                            itable.autofit = False

                            icount = 0
                            # iterate keys assuming existence of value and description
                            for intk in ivalue:

                                label_ik = intk.replace('_', ' ')
                                # label_ik = intk.replace('_f', '').replace('_', ' ')

                                irow = itable.rows[icount]
                                irow.cells[0].width = wicol1
                                irow.cells[1].width = wicol2
                                icount = icount + 1

                                intv = ivalue[intk]
                                if not isinstance(intv, dict):
                                    iivalue = intv

                                else:
                                    intv = ivalue[intk]

                                    iivalue = ''
                                    if "value" in intv:
                                        iivalue = intv["value"]
                                    if isinstance(iivalue, float):
                                        iivalue = f'{iivalue:f}'
                                    elif iivalue is None:
                                        iivalue = ''

                                irow.cells[0].text = f'{label_ik}'
                                irow.cells[1].text = f'{str(iivalue)}'

                        # if the key is not a dictionary just insert the value inside
                        else:
                            if ivalue is None:
                                ivalue = ''

                            row.cells[1].text = f'{str(ivalue)}'

        try:
            document.save(oname)
        except:
            return False, f'error saving document as {oname}'

        return True, 'OK'

    def assign_parameters(self):
        '''
            Fill documentation values corresponding to model parameter values
        '''

        if not self.parameters:
            raise ('Parameters were not loaded')

        # self.fields['Algorithm']['subfields']['algorithm']['value'] = \
        #     self.parameters.getVal('model')
        self.setInnerVal('Algorithm', 'algorithm',
                         self.parameters.getVal('model'))

        if self.parameters.getVal('input_type') == 'molecule':
            self.setInnerVal('Algorithm', 'descriptors',
                             self.parameters.getVal('computeMD_method'))
            cv_method = f'{self.parameters.getVal("ModelValidationCV")} ({str(self.parameters.getVal("ModelValidationN"))})'
            self.setInnerVal('Algorithm', 'cross-validation', cv_method)
            features = self.parameters.getVal("feature_selection")
            if features is not None:
                features += f' ({self.parameters.getVal("feature_number")})'
            self.setInnerVal('Descriptors', 'descriptors',
                             self.parameters.getVal('computeMD_method'))
            self.setInnerVal('Descriptors', 'scaling',
                             self.parameters.getVal('modelAutoscaling'))
            self.setInnerVal('Descriptors', 'selection_method', features)

        elif self.parameters.getVal('input_type') == 'model_ensemble':
            self.setInnerVal('Descriptors', 'descriptors', 'ensemble models')

        if self.parameters.getVal('conformal'):
            self.setInnerVal('AD_method', 'name', 'conformal prediction')
            # self.setInnerVal('AD_parameters', 'confidence',  f'{self.parameters.getVal("conformalConfidence")}')
            conformal_settings_dict = {}
            conformal_settings_dict['confidence'] = self.parameters.getVal(
                "conformalConfidence")
            conformal_settings = self.parameters.getVal('conformal_settings')
            if conformal_settings is not None:
                for key in conformal_settings:
                    conformal_settings_dict[key] = conformal_settings[key][
                        "value"]
            self.fields['AD_parameters']['value'] = conformal_settings_dict

    def assign_results(self):
        '''
            Assign result values to documentation fields
        '''
        # Accepted validation keys
        # allowed = ['Conformal_accuracy', 'Conformal_mean_interval',
        #            'Conformal_coverage', 'Conformal_accuracy',
        #            'Q2', 'SDEP',
        #            'SensitivityPred', 'SpecificityPred', 'MCCpred']
        # gof_allowed = ['R2', 'SDEC', 'scoringR'
        #                'Sensitivity', 'Specificity', 'MCC']

        allowed = [
            'Conformal_accuracy', 'Conformal_mean_interval',
            'Conformal_coverage', 'Q2', 'SDEP', 'scoringP', 'Sensitivity',
            'Specificity', 'MCC'
        ]

        gof_allowed = [
            'Conformal_accuracy_f', 'Conformal_mean_interval_f',
            'Conformal_coverage_f', 'R2', 'SDEC', 'scoringR', 'Sensitivity_f',
            'Specificity_f', 'MCC_f'
        ]

        model_info = self.conveyor.getVal('model_build_info')
        validation = self.conveyor.getVal('model_valid_info')
        # print(model_info)

        # The code below to filter the hyperparameters to be
        # reported.

        # Get parameter keys for the used estimator
        #param_key = self.parameters.getVal('model') + '_parameters'
        # Get parameter dictionary
        #estimator_params = self.parameters.getDict(param_key)

        self.fields['Algorithm_settings']['value'] = \
            (self.conveyor.getVal('estimator_parameters'))

        # print (self.conveyor.getVal('estimator_parameters'))

        # Horrendous patch to solve backcompatibility problem
        if 'subfields' in self.fields['Data_info']:
            sub_label = 'subfields'
        else:
            sub_label = 'value'

        self.fields['Data_info'][sub_label]['training_set_size']['value'] = \
            model_info[0][2]

        self.fields['Descriptors'][sub_label]['final_number']['value'] = \
            model_info[1][2]

        self.fields['Descriptors'][sub_label]['ratio']['value'] = \
            '{:0.2f}'.format(model_info[1][2]/model_info[0][2])

        internal_val = dict()
        for stat in validation:
            if stat[0] in allowed:
                internal_val[stat[0]] = float("{0:.2f}".format(stat[2]))
        if internal_val:
            self.fields['Internal_validation_1']\
                ['value'] = internal_val

        gof = dict()
        for stat in validation:
            if stat[0] in gof_allowed:
                gof[stat[0]] = float("{0:.2f}".format(stat[2]))
        if gof:
            self.fields['Goodness_of_fit_statistics']\
                ['value'] = gof

    def get_string(self, dictionary):
        '''
        Convert a dictionary (from documentation.yaml)
        to string format for the model template
        '''
        text = ''
        for key, val in dictionary.items():
            text += f'{key} : {val["value"]}\n'
        return text

    def get_string2(self, dictionary):
        '''
        Convert a dictionary (from parameter file) to 
        string format for the model template
        '''
        text = ''
        for key, val in dictionary.items():
            try:
                if isinstance(str(val), str):
                    text += f'{key} : {val}\n'
            except:
                continue

        return text

    def get_upf_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''

        template = pd.DataFrame()
        template['ID'] = ['']
        template['Version'] = ['']
        template['Description'] = ['']
        template['Contact'] = ['']
        template['Institution'] = ['']
        template['Date'] = ['']
        template['Endpoint'] = ['']
        template['Endpoint_units'] = ['']
        template['Dependent_variable'] = ['']
        template['Species'] = ['']
        template['Limits_applicability'] = ['']
        template['Experimental_protocol'] = ['']
        template['Data_info'] = [
            self.get_string(self.fields['Data_info']['subfields'])
        ]
        template['Model_availability'] = [\
            self.get_string(self.fields['Model_availability']
                            ['subfields'])]
        template['Algorithm'] = [
            self.get_string(self.fields['Algorithm']['subfields'])
        ]
        template['Software'] = [
            self.get_string(self.fields['Software']['subfields'])
        ]
        template['Descriptors'] = [
            self.get_string(self.fields['Descriptors']['subfields'])
        ]
        template['Algorithm_settings'] = [
            self.get_string(self.fields['Algorithm_settings']['subfields'])
        ]
        template['AD_method'] = [
            self.get_string(self.fields['AD_method']['subfields'])
        ]
        template['AD_parameters'] = [self.fields['AD_parameters']['value']]

        template['Goodness_of_fit_statistics'] = [self.fields\
                                ['Goodness_of_fit_statistics']['value']]
        template['Internal_validation_1'] = [
            self.fields['Internal_validation_1']['value']
        ]
        template.to_csv('QMRF_template.tsv', sep='\t')

    def get_upf_template2(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        fields = ['ID', 'Version', 'Contact', 'Institution',\
            'Date', 'Endpoint', 'Endpoint_units', 'Dependent_variable', 'Species',\
                'Limits_applicability', 'Experimental_protocol', 'Data_info',\
                    'Model_availability', 'Algorithm', 'Software', 'Descriptors',\
                        'Algorithm_settings', 'AD_method', 'AD_parameters',\
                            'Goodness_of_fit_statistics', 'Internal_validation_1' ]
        template = pd.DataFrame(
            columns=['Field', 'Parameter name', 'Parameter value'])
        for field in fields:
            try:
                subfields = self.fields[field]['subfields']
            except:
                subfields = self.fields[field]['value']
            if subfields is not None:
                for index, subfield in enumerate(subfields):
                    field2 = ''
                    if index == 0:
                        field2 = field
                    else:
                        field2 = ""
                    value = str(subfields[subfield]['value'])
                    # None types are retrieved as str from yaml??
                    if value == "None":
                        value = ""
                    row = dict(zip(['Field', 'Parameter name', 'Parameter value'],\
                        [field2, subfield, value]))
                    template = template.append(row, ignore_index=True)
            else:
                value = str(self.fields[field]['value'])
                if value == 'None':
                    value = ""
                row = dict(zip(['Field', 'Parameter name', 'Parameter value'],\
                    [field, "", value]))
                template = template.append(row, ignore_index=True)
        template.to_csv('QMRF_template3.tsv', sep='\t', index=False)

    def get_prediction_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path,
                                         'prediction-results.pkl')
        conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')
        try:
            with open(results_file_name, "rb") as input_file:
                conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at: {results_file_name}')
            raise e

        # First get Name, Inchi and InChIkey

        names = conveyor.getVal('obj_nam')
        smiles = conveyor.getVal('SMILES')
        inchi = [AllChem.MolToInchi(AllChem.MolFromSmiles(m)) for m in smiles]
        inchikeys = [
            AllChem.InchiToInchiKey(
                AllChem.MolToInchi(AllChem.MolFromSmiles(m))) for m in smiles
        ]
        predictions = []
        applicability = []
        if self.parameters['quantitative']['value']:
            raise ('Prediction template for quantitative endpoints'
                   ' not implemented yet')
        if not self.parameters['conformal']['value']:
            predictions = conveyor.getVal('values')
        else:
            c0 = np.asarray(conveyor.getVal('c0'))
            c1 = np.asarray(conveyor.getVal('c1'))

            predictions = []
            for i, j in zip(c0, c1):
                prediction = ''
                if i == j:
                    prediction = 'out of AD'
                    applicability.append('out')
                if i != j:
                    if i == True:
                        prediction = 'Inactive'
                    else:
                        prediction = 'Active'
                    applicability.append('in')

                predictions.append(prediction)

        # Now create the spreedsheats for prediction

        # First write summary
        summary = ("Study name\n" + "Endpoint\n" + "QMRF-ID\n" +
                   "(Target)Compounds\n" +
                   "Compounds[compounds]\tName\tInChiKey\n")

        for name, inch in zip(names, inchikeys):
            summary += f'\t{name}\t{inch}\n'

        summary += ("\nFile\n" + "Author name\n" + "E-mail\n" + "Role\n" +
                    "Affiliation\n" + "Date\n")

        with open('summary_document.tsv', 'w') as out:
            out.write(summary)

        # Now prediction details
        # Pandas is used to ease the table creation.

        reporting = pd.DataFrame()

        reporting['InChI'] = inchi
        reporting['CAS-RN'] = '-'
        reporting['SMILES'] = smiles
        reporting['prediction'] = predictions
        reporting['Applicability_domain'] = applicability
        reporting['reliability'] = '-'
        reporting['Structural_analogue_1_CAS'] = '-'
        reporting['Structural_analogue_1_smiles'] = '-'
        reporting['Structural_analogue_1_source'] = '-'
        reporting['Structural_analogue_1_experimental_value'] = '-'
        reporting['Structural_analogue_2_CAS'] = '-'
        reporting['Structural_analogue_2_smiles'] = '-'
        reporting['Structural_analogue_2_source'] = '-'
        reporting['Structural_analogue_2_experimental_value'] = '-'
        reporting['Structural_analogue_3_CAS'] = '-'
        reporting['Structural_analogue_3_smiles'] = '-'
        reporting['Structural_analogue_3_source'] = '-'
        reporting['Structural_analogue_3_experimental_value'] = '-'

        reporting.to_csv('prediction_report.tsv', sep='\t', index=False)

    def idataHash(self):
        ''' Create a md5 hash for a number of keys describing parameters
            relevant for idata

            This hash is compared between runs, to check wether idata must
            recompute or not the MD 
        '''

        # update with any new idata relevant parameter
        keylist = [
            'SDFile_name', 'SDFile_activity', 'SDFile_experimental',
            'normalize_method', 'ionize_method', 'convert3D_method',
            'computeMD_method', 'TSV_objnames', 'TSV_activity', 'input_type'
        ]

        idata_params = []
        for i in keylist:
            idata_params.append(self.getVal(i))

        # MD_settings is a dictionary, obtain and sort the keys+values
        md_params = self.getDict('MD_settings')
        md_list = []
        for key in md_params:
            # combine key + value in a single string
            md_list.append(key + str(md_params[key]))

        idata_params.append(md_list.sort())

        # use picke as a buffered object, neccesary to generate the hexdigest
        p = pickle.dumps(idata_params)
        return hashlib.md5(p).hexdigest()

    def empty_fields(self):
        '''
        This function checks which fields do not contain values 
        '''
        emptyfields = []
        for ik in self.fields:
            v = self.fields[ik]
            if 'value' in v:
                ivalue = v['value']
                if isinstance(ivalue, dict):
                    for intk in ivalue:
                        intv = ivalue[intk]
                        if not isinstance(intv, dict):
                            iivalue = intv
                            if iivalue is None or len(str(iivalue)) is 0:
                                emptyfields.append(intk)

                        else:
                            intv = ivalue[intk]
                            iivalue = ''
                            if intv["value"] is None or len(str(
                                    intv["value"])) is 0:
                                emptyfields.append(intk)

                else:
                    if ivalue is None or len(str(ivalue)) is 0:
                        emptyfields.append(ik)

        return emptyfields

    def get_mols(self):

        return dict(
            zip(self.conveyor.getVal("obj_nam"),
                self.conveyor.getVal("SMILES")))

    def autocomplete_documentation(self):
        """
        Auto complete fields in model documentation
        """
        #ID, Model identifier.
        self.fields['ID']['value'] = utils.getModelID(self.model, self.version,
                                                      'model')[1]
        #Version
        self.fields['Version']['value'] = str(self.version)
        #Date, Date of model development and Date of QMRF.
        today = date.today().strftime("%B %d, %Y")

        self.fields['Date']['value'] = today
        self.fields['Date_of_QMRF']['value'] = today

        #format, Format used(SDF,TSV)
        if self.parameters.getVal('input_type') == 'data':
            self.fields['Data_info']['value']['format']['value'] = 'TSV'
        else:
            self.fields['Data_info']['value']['format']['value'] = 'SDF'
        #Algorithm, type: QSAR.
        self.fields['Algorithm']['value']['type']['value'] = 'QSAR'
        #Model, Main modelling program, version, description and license.
        software = "Flame, 1.0rc3"
        fieldsapplysoftware = ['model', 'descriptors', 'applicability_domain']

        for field in fieldsapplysoftware:
            if field == 'applicability_domain':
                if self.parameters.getVal('conformal'):
                    self.fields['Software']['value'][field]['value'] = software
            else:
                self.fields['Software']['value'][field]['value'] = software
Пример #17
0
class Sbuild:
    def __init__(self,
                 space,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting sbuild...')
        self.space = space
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('slearn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID', utils.id_generator())
        LOG.debug(
            f'Generated new space with modelID: {self.conveyor.getMeta("modelID")}'
        )

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the space
            # directory and save changes to make them persistent
            success, message = self.param.delta(space,
                                                0,
                                                param_file,
                                                iformat='YAML',
                                                isSpace=True)

        elif param_string is not None:
            success, message = self.param.delta(space,
                                                0,
                                                param_string,
                                                iformat='JSONS',
                                                isSpace=True)

        else:
            # load parameter file at the space directory
            success, message = self.param.loadYaml(space, 0, isSpace=True)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {message}. Aborting...')
            sys.exit(1)

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.conveyor.setWarning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.param.setVal('computeMD_method', [md[0]])

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default chemical space building workflow '''

        # path to endpoint
        epd = utils.space_path(self.space, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find space {self.space}')
            #LOG.error(f'Unable to find space {self.space}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of  the processing applied to each space
            modpath = utils.smodule_path(self.space, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            slearn_child = importlib.import_module(modpath + ".slearn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # instantiate learn (build a space from idata) and run it
            try:
                slearn = slearn_child.SlearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Slearn child architecture mismatch, defaulting to Learn parent'
                )
                slearn = Slearn(self.param, self.conveyor)

            slearn.run()
            LOG.debug(
                f'slearn child {type(slearn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #18
0
def action_info(model, version, output='text'):
    '''
    Returns a text or JSON with results info for a given model and version
    '''

    if model is None:
        return False, 'Empty model label'

    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):

        # compatibity method. use info.pkl
        if not os.path.isfile(os.path.join(rdir, 'info.pkl')):
            return False, 'Info file not found'

        with open(os.path.join(rdir, 'info.pkl'), 'rb') as handle:
            #retrieve a pickle file containing the keys 'model_build'
            #and 'model_validate' of results
            info = pickle.load(handle)
            info += pickle.load(handle)
        # end of compatibility method

    else:
        # new method, use results.pkl
        if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
            return False, 'Info file not found'

        from flame.conveyor import Conveyor

        conveyor = Conveyor()
        with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
            conveyor.load(handle)

        info = conveyor.getVal('model_build_info')
        info += conveyor.getVal('model_valid_info')

        if info == None:
            return False, 'Info not found'

    # when this function is called from the console, output is 'text'
    # write and exit
    if output == 'text':

        LOG.info(f'informing model {model} version {version}')

        for val in info:
            if len(val) < 3:
                LOG.info(val)
            else:
                LOG.info(f'{val[0]} ({val[1]}) : {val[2]}')
        return True, 'model informed OK'

    # this is only reached when this funcion is called from a web service
    # asking for a JSON

    # this code serializes the results in a list and then converts it
    # to a JSON
    json_results = []
    for i in info:
        json_results.append(conveyor.modelInfoJSON(i))

    #print (json.dumps(json_results))
    return True, json.dumps(json_results)
Пример #19
0
class Predict:
    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        if not self.param.loadYaml(model, version):
            LOG.critical('Unable to load model parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        if not os.path.isdir(endpoint):
            self.conveyor.setError(
                f'Unable to find model {self.model}, version {self.version}')
            #LOG.error(f'Unable to find model {self.model}')

        if not self.conveyor.getError():
            # uses the child classes within the 'model' folder,
            # to allow customization of
            # the processing applied to each model
            modpath = utils.module_path(self.model, self.version)

            idata_child = importlib.import_module(modpath + ".idata_child")
            apply_child = importlib.import_module(modpath + ".apply_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate model data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                apply = apply_child.ApplyChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Apply child architecture mismatch, defaulting to Apply parent'
                )
                apply = Apply(self.param, self.conveyor)

            apply.run()
            LOG.debug(f'apply child {type(apply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #20
0
class Build:

    def __init__(self, model, param_file=None, param_string=None, output_format=None):
        LOG.debug('Starting build...')
        self.model = model
        self.param = Parameters()
        
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('learn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID',utils.id_generator())
        LOG.debug(f'Generated new model with modelID: {self.conveyor.getMeta("modelID")}')

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the model
            # directory and save changes to make them persistent
            success, message = self.param.delta(model, 0, param_file, iformat='YAML')

        elif param_string is not None:
            success, message = self.param.delta(model, 0, param_string, iformat='JSONS')

        else:
            # load parameter file at the model directory
            success, message = self.param.loadYaml(model, 0)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(f'Unable to load model parameters. {message}. Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)

        if self.param.getVal('confidential'):
            self.confidentialAuditParam()
 
    def confidentialAuditParam (self):
        import yaml

        original_method = self.param.getVal('model')
        if self.param.getVal ('quantitative'):
            if original_method != 'PLSR':
                self.param.setVal('model', 'PLSR')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, '
                f'the original method {original_method} was not suitable to build confidential models')
        else:
            if original_method != 'PLSDA':
                self.param.setVal('model', 'PLSDA')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, '
                f'the original method {original_method} was not suitable to build confidential models')
        
        # TODO: conformal support
        if self.param.getVal('conformal'):
            self.param.setVal('conformal', False)
            LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. '
            'Conformal models are not supported for now in confidential models')

        parameters_file_path = utils.model_path(self.model, 0)
        parameters_file_name = os.path.join (parameters_file_path,
                                            'parameters.yaml')
        with open(parameters_file_name, 'w') as pfile:
            yaml.dump (self.param.p, pfile)

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def extend_modelID (self, ensembleID):
        modelID = self.conveyor.getMeta('modelID')
        modelID = f'{modelID}-{ensembleID}'
        self.conveyor.addMeta('modelID', modelID)
        LOG.debug (f'modelID re-defined as {self.conveyor.getVal("modelID")}')

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        epd = utils.model_path(self.model, 0)
        # if not os.path.isdir(epd):
        #     self.conveyor.setError(f'Unable to find model {self.model}')
        #     #LOG.error(f'Unable to find model {self.model}')

        # import ichild classes
        # if not self.conveyor.getError():
        # uses the child classes within the 'model' folder,
        # to allow customization of  the processing applied to each model
        modpath = utils.module_path(self.model, 0)

        idata_child = importlib.import_module(modpath+".idata_child")
        learn_child = importlib.import_module(modpath+".learn_child")
        odata_child = importlib.import_module(modpath+".odata_child")

        # run idata object, in charge of generate model data from input
        try:
            idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
        except:
            LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
            idata = Idata(self.param, self.conveyor, input_source)
        idata.run() 
        LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey ('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

            if not self.conveyor.isKey ('ymatrix'):
                self.conveyor.setError(f'No activity data (Y) found in training series')
    
            # run optional chemical space building for supporting "closest" training series object
            # if self.param.getVal('buildSimilarity'):
            if self.param.getVal('output_similar') is True:

                from flame.slearn import Slearn

                slearn_child = importlib.import_module(modpath+".slearn_child")
                
                if not self.conveyor.getError():
                    # instantiate learn (build a space from idata) and run it
                    try:
                        slearn = slearn_child.SlearnChild(self.param, self.conveyor)
                    except:
                        LOG.warning ('Slearn child architecture mismatch, defaulting to Learn parent')
                        slearn = Slearn(self.param, self.conveyor)

                    slearn.run()
                    LOG.debug(f'slearn child {type(slearn).__name__} completed `run()`')

        if not self.conveyor.getError():

            # instantiate learn (build a model from idata) and run it
            try:
                learn = learn_child.LearnChild(self.param, self.conveyor)
            except:
                LOG.warning ('Learn child architecture mismatch, defaulting to Learn parent')
                learn = Learn(self.param, self.conveyor)
            learn.run()

            LOG.debug(f'learn child {type(learn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #21
0
def action_import(model):
    '''
    Creates a new model tree from a tarbal file with the name "model.tgz"
    '''
    import re

    if not model:
        return False, 'Empty model label'

    # convert model to endpoint string
    base_model = os.path.basename(model)
    endpoint = os.path.splitext(base_model)[0]

    # find version in case of single version exports
    version = None
    if re.match("_v[0-9]{6}", endpoint[-8:]):
        version = int(endpoint[-6:])
        endpoint = endpoint[:-8]

    ext = os.path.splitext(base_model)[1]
    base_path = utils.model_tree_path(endpoint)

    # safety checks
    if os.path.isdir(base_path):
        return False, f'Endpoint {endpoint} already exists'

    if ext != '.tgz':
        importfile = os.path.abspath(model + '.tgz')
    else:
        importfile = model

    LOG.info(f'Importing {importfile} ...')

    if not os.path.isfile(importfile):
        LOG.info(f'Importing package {importfile} not found')
        return False, f'Importing package {importfile} not found'

    confidential = False

    # create directory
    try:
        os.mkdir(base_path)
    except Exception as e:
        return False, f'Error creating directory {base_path}: {e}'

    # unpack tar.gz. This is done for any kind of export file
    with tarfile.open(importfile, 'r:gz') as tar:
        tar.extractall(base_path)

    # when importing a single version we need to clone the last folder to 'dev'
    inner_dirs = os.listdir(base_path)
    if not 'dev' in inner_dirs and version is not None:

        # assign single version using file name
        version_dir = f'ver{version:06d}'

        # as a fallback assign the last internal folder
        if not os.path.isdir(version_dir):
            version_dir = inner_dirs[-1]

        version_path = os.path.join(base_path, version_dir)
        confidential_model = os.path.join(version_path,
                                          'confidential_model.yaml')

        # check if it is a confidential model
        if (os.path.isfile(confidential_model)):

            confidential = True

            flame_source = os.path.dirname(os.path.abspath(__file__))
            children_source = os.path.join(flame_source, 'children')
            children_names = [
                'apply', 'idata', 'odata', 'learn', 'slearn', 'sapply'
            ]

            for cname in children_names:
                cpath = os.path.join(children_source, cname + '_child.py')
                shutil.copy(cpath, version_path)
                LOG.info(f'Adding local children: {cpath} ...')

            # open confidential_model.yaml
            with open(confidential_model, 'r') as fc:
                cmodel = yaml.safe_load(fc)

            # create model-results.pkl
            model_building_info = [('nobj', '', cmodel['nobj'])]
            model_building_info += [('nvarx', '', cmodel['nvarx'])]
            model_building_info += [('model', '', cmodel['model'])]

            model_type_info = [('quantitative', '', cmodel['quantitative'])]
            model_type_info += [('conformal', '', cmodel['conformal'])]
            model_type_info += [('conformal_confidence', '',
                                 cmodel['conformal_confidence'])]
            model_type_info += [('ensemble', '', False)]
            model_type_info += [('ensemble_names', '', [])]
            model_type_info += [('ensemble_versions', '', [])]
            model_type_info += [('confidential', '', True)]
            model_type_info += [('secret', '', True)]

            if cmodel['quantitative']:
                model_validation_info = [('R2', '', cmodel['R2'])]
                model_validation_info += [('Q2', '', cmodel['Q2'])]
                model_validation_info += [('SDEC', '', cmodel['SDEC'])]
                model_validation_info += [('SDEP', '', cmodel['SDEP'])]
                model_validation_info += [('scoringP', '', cmodel['scoringP'])]
                model_validation_info += [('scoringR', '', cmodel['scoringR'])]
            else:
                model_validation_info = [('MCC_f', '', cmodel['MCC_f'])]
                model_validation_info += [('MCC', '', cmodel['MCC'])]
                model_validation_info += [('Sensitivity_f', '',
                                           cmodel['Sensitivity_f'])]
                model_validation_info += [('Sensitivity', '',
                                           cmodel['Sensitivity'])]
                model_validation_info += [('Specificity_f', '',
                                           cmodel['Specificity_f'])]
                model_validation_info += [('Specificity', '',
                                           cmodel['Specificity'])]
                model_validation_info += [('FP_f', '', cmodel['FP_f'])]
                model_validation_info += [('FP', '', cmodel['FP'])]
                model_validation_info += [('FN_f', '', cmodel['FN_f'])]
                model_validation_info += [('FN', '', cmodel['FN'])]
                model_validation_info += [('TP_f', '', cmodel['TP_f'])]
                model_validation_info += [('TP', '', cmodel['TP'])]
                model_validation_info += [('TN_f', '', cmodel['TN_f'])]
                model_validation_info += [('TN', '', cmodel['TN'])]

            conveyor = Conveyor()
            conveyor.addMeta('modelID', cmodel['modelID'])
            conveyor.addMeta('endpoint', endpoint)
            conveyor.addMeta('version', version)
            conveyor.addMeta('quantitative', True)
            conveyor.addMeta('secret', True)

            conveyor.addVal(model_building_info, 'model_build_info',
                            'model building information', 'method', 'single',
                            'Information about the model building')
            conveyor.addVal(model_validation_info, 'model_valid_info',
                            'model validation information', 'method', 'single',
                            'Information about the model validation')
            conveyor.addVal(model_type_info, 'model_type_info',
                            'model type information', 'method', 'single',
                            'Information about the model type')

            results_file_name = os.path.join(version_path, 'model-results.pkl')
            with open(results_file_name, 'wb') as handle:
                conveyor.save(handle)

            meta_file_name = os.path.join(version_path, 'model-meta.pkl')
            with open(meta_file_name, 'wb') as handle:
                pickle.dump(cmodel['modelID'], handle)
                pickle.dump(None, handle)
                pickle.dump(None, handle)
                pickle.dump(model_building_info, handle)
                pickle.dump(model_validation_info, handle)
                pickle.dump(model_type_info, handle)

        # clone the version in dev
        shutil.copytree(version_path, os.path.join(base_path, 'dev'))
        LOG.info(f'Cloning version {version} to version 0 ...')

    if confidential:
        LOG.info(
            f'Import of CONFIDENTIAL model {model} version {version} was successfull'
        )
        return True, 'OK'

    # get libraries
    message = f'Endpoint {endpoint} imported OK'
    for x in os.listdir(base_path):
        model_path = os.path.join(base_path, x)
        model_pkl = os.path.join(model_path, 'estimator.pkl')
        dict_estimator = {}
        if os.path.isfile(model_pkl):
            with open(model_pkl, "rb") as input_file:
                try:
                    dict_estimator = pickle.load(input_file)
                except Exception as e:
                    return False, f'Incompatible libraries found!. Import aborted with message "{str(e)}"'

        # check if the libraries used to build this model are similar to current libraries
        if 'libraries' in dict_estimator:
            # print (dict_estimator['libraries'])
            success, results = utils.compatible_modules(
                dict_estimator['libraries'])
            if not success:
                message = f"WARNING: Incompatible libraries detected, {results}. Use at your own risk"
                return False, message

    LOG.info(
        'Libraries used to generate the imported model are compatible with local libraries'
    )
    LOG.info(message)
    return True, message
Пример #22
0
class Search:

    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting search...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('sapply')

        # load modelID
        path = utils.space_path(space, version)
        meta = os.path.join(path,'space-meta.pkl')
        try:
            with open(meta, 'rb') as handle:
                modelID = pickle.load(handle)
        except:
            LOG.critical(f'Unable to load modelID from {meta}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', modelID)
        LOG.debug (f'Loaded space with modelID: {modelID}')

        # assign prediction (search) label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
            'method', 'single',
            'Label used to identify the prediction')

        success, results = self.param.loadYaml(space, version, isSpace=True)
        if not success:
            LOG.critical(f'Unable to load space parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
        return

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def getVal (self, idict, ikey):
        if not ikey in idict:
            return None
        return idict[ikey]

    # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None):
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        metric = None
        numsel = None
        cutoff = None
        
        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            LOG.error(f'Unable to find space {self.space}')
            self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}')

        if self.getVal(param_dict,'smarts') is not None:
            input_source = param_dict['smarts']
            self.param.setVal('input_type', 'smarts')

        elif self.getVal(param_dict,'infile') is not None:
            input_source = param_dict['infile']

        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError('wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = self.getVal(param_dict, 'runtime_param')
            if runtime_param is not None:
                LOG.info (f'runtime parameters: {str(runtime_param)}')
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error('wrong format in the runtime similarity parameters')
                            self.conveyor.setError('wrong format in the runtime similarity parameters')
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError('runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError('wrong format in the runtime similarity parameters')

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.param.setVal('computeMD_method',[md[0]])

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath+".idata_child")
            sapply_child = importlib.import_module(modpath+".sapply_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
            except:
                LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():

            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                if not self.conveyor.isKey ('SMARTS'):
                    LOG.debug(f'Failed to compute MDs')
                    self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message

        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Пример #23
0
class Search:
    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        if not self.param.loadYaml(space, version, isSpace=True):
            LOG.critical('Unable to load space parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None):
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        print('*********', param_dict)

        metric = None
        numsel = None
        cutoff = None

        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            self.conveyor.setError(
                f'Unable to find space {self.space}, version {self.version}')
            #LOG.error(f'Unable to find space {self.space}')

        if 'infile' in param_dict:
            input_source = param_dict['infile']
        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError(
                'wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = param_dict['runtime_param']
            if runtime_param is not None:
                print(runtime_param)
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error(
                                'wrong format in the runtime similarity parameters'
                            )
                            self.conveyor.setError(
                                'wrong format in the runtime similarity parameters'
                            )
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError(
                        'runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError(
                    'wrong format in the runtime similarity parameters')

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath + ".idata_child")
            sapply_child = importlib.import_module(modpath + ".sapply_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Sapply child architecture mismatch, defaulting to Sapply parent'
                )
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(
                f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor,
                                           self.label)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor, self.label)

        return odata.run()
Пример #24
0
def action_searches_result (label, output='text'):
    '''
    try to retrieve the searches result with the label used as argument
    returns 
        - (False, Null) if it there is no directory or the search 
          pickle file cannot be found 
        
        - (True, JSON) with the results otherwyse
    '''

    opath = tempfile.gettempdir()
    if not os.path.isdir(opath):
        if output == 'JSON':
            return False, {'code':1, 'message': f'directory {opath} not found'}
        print (f'directory {opath} not found')
        return False, None

    # default in case label was not provided
    if label is None:
        label = 'temp'

    iconveyor = Conveyor()

    search_pkl_path = os.path.join(opath,'similars-'+label+'.pkl')
    if not os.path.isfile(search_pkl_path):

        if output == 'JSON':
            return False, {'code':0, 'message': f'predictions not found for {label} directory'}
        print (f'predictions not found for {label} directory')
        return False, f'file {search_pkl_path} not found'

    with open(search_pkl_path, 'rb') as handle:
        success, message = iconveyor.load(handle)

    if not success:
        if output == 'JSON':
            return False, {'code':1, 'message': f'error reading search results with message {message}'}
        print (f'error reading search results with message {message}')
        return False, None

    if not iconveyor.isKey('search_results'):
        if output == 'JSON':
            return False, {'code':1, 'message': 'search results not found'}
        return False, 'search results not found'

    results = iconveyor.getVal('search_results')
    names = iconveyor.getVal('obj_nam')
    if iconveyor.isKey('SMILES'):
        smiles = iconveyor.getVal('SMILES')
    if len (results) != len (names):
        if output == 'JSON':
            return False, {'code':1, 'message': 'results length does not match names'}
        return False, 'results length does not match names'

    for i in range (len(results)):
        if iconveyor.isKey('SMILES'):
            print (f'similars to {names[i]} [{smiles[i]}]')
        else:
            print (f'similars to {names[i]}')

        iresult = results[i]
        for j in range (len(iresult['distances'])):
            dist = iresult['distances'][j]

            if 'obj_name' in iresult:
                name = iresult['obj_nam'][j]
            else:
                name = '-'
            if 'SMILES' in iresult:
                smil = iresult['SMILES'][j]
            else:
                smil = '-'
            
            if 'obj_id' in iresult:
                idv = iresult['obj_id'][j]
            else:
                idv ='-'
            
            if 'ymatrix' in iresult:
                act = iresult['ymatrix'][j]
            else:
                act = '-'


            print (f'   {dist:.3f} : {name} {idv} {act} [{smil}]')

    # return a JSON generated by iconveyor
    return True, iconveyor
Пример #25
0
def action_info(model, version, output='text'):
    '''
    Returns a text or JSON with results info for a given model and version
    '''

    if model is None:
        return False, 'Empty model label'


    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        return False, 'Info file not found'

    from flame.conveyor import Conveyor

    conveyor = Conveyor()
    with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
        conveyor.load(handle)

    # if there is an error, return the error Message        
    if conveyor.getError():
        error = conveyor.getErrorMessage()
        return False, error

    # collect warnings
    warning = conveyor.getWarningMessage()

    # collect build and validation info
    build_info =  conveyor.getVal('model_build_info')
    valid_info =  conveyor.getVal('model_valid_info')

    # merge everything 
    info = None
    for iinfo in (warning, build_info, valid_info):
        if info == None:
            info = iinfo
        else:
            if iinfo != None:
                info+=iinfo

    if info == None:
        return False, 'No relevant information found'

    # when this function is called from the console, output is 'text'
    # write and exit
    if output == 'text':

        LOG.info (f'informing model {model} version {version}')

        for val in info:
            if len(val) < 3:
                LOG.info(val)
            else:
                LOG.info(f'{val[0]} ({val[1]}) : {val[2]}')
        return True, 'model informed OK'

    # this is only reached when this funcion is called from a web service
    # asking for a JSON
    
    # this code serializes the results in a list and then converts it 
    # to a JSON  
    json_results = []
    for i in info:
        json_results.append(conveyor.modelInfoJSON(i))

    #print (json.dumps(json_results))
    return True, json.dumps(json_results)