Пример #1
0
    def autocomplete_documentation(self):
        """
        Auto complete fields in model documentation
        """
        #ID, Model identifier.
        self.fields['ID']['value'] = utils.getModelID(self.model, self.version,
                                                      'model')[1]
        #Version
        self.fields['Version']['value'] = str(self.version)
        #Date, Date of model development and Date of QMRF.
        today = date.today().strftime("%B %d, %Y")

        self.fields['Date']['value'] = today
        self.fields['Date_of_QMRF']['value'] = today

        #format, Format used(SDF,TSV)
        if self.parameters.getVal('input_type') == 'data':
            self.fields['Data_info']['value']['format']['value'] = 'TSV'
        else:
            self.fields['Data_info']['value']['format']['value'] = 'SDF'
        #Algorithm, type: QSAR.
        self.fields['Algorithm']['value']['type']['value'] = 'QSAR'
        #Model, Main modelling program, version, description and license.
        software = "Flame, 1.0rc3"
        fieldsapplysoftware = ['model', 'descriptors', 'applicability_domain']

        for field in fieldsapplysoftware:
            if field == 'applicability_domain':
                if self.parameters.getVal('conformal'):
                    self.fields['Software']['value'][field]['value'] = software
            else:
                self.fields['Software']['value'][field]['value'] = software
Пример #2
0
    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('apply')

        # load modelID
        success, result = utils.getModelID(model, version, 'model')
        if not success:
            LOG.critical(f'{result}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', result)
        LOG.debug(f'Loaded model with modelID: {result}')

        # assign prediction label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        success, results = self.param.loadYaml(model, version)
        if not success:
            LOG.critical(
                f'Unable to load model parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

            if 'ghost' in output_format:
                self.param.setVal('output_similar', False)

        return
Пример #3
0
def build_cmd(arguments, output_format=None):
    '''
    Instantiates a Build object to build a model using the given
    input file and model. 

    This method must be self-contained and suitable for being called in
    cascade, by models which use the output of other models as input
    '''
    
    from flame.build import Build

    # safety check if model exists
    endpoint_dir = utils.model_path(arguments['endpoint'], 0)
    if not os.path.isdir(endpoint_dir):
        return False, 'Endpoint name not found in model repository.'

    # remove pre-existing results file
    results_file = os.path.join(endpoint_dir, 'model-results.pkl')
    if os.path.isfile(results_file):
        os.remove(results_file)

    meta_file = os.path.join(endpoint_dir, 'model-meta.pkl')
    if os.path.isfile(meta_file):
        os.remove(meta_file)

    # input file provided in the command
    ifile = arguments['infile']
    if ifile is not None and not os.path.isfile (ifile):
        return False, f'Wrong training series file {ifile}'

    # lfile is the "training_series" copied internally to the endpoint folder
    endpoint_path = utils.model_path(arguments['endpoint'], 0)
    lfile = os.path.join(endpoint_path, 'training_series')

    if 'param_file' in arguments:
        build = Build(arguments['endpoint'], param_file=arguments['param_file'], output_format=output_format)
    elif 'param_string' in arguments:
        build = Build(arguments['endpoint'], param_string=arguments['param_string'], output_format=output_format)
    else:
        build = Build(arguments['endpoint'], output_format=output_format)

    if utils.isSingleThread():
        build.set_single_CPU()

    ensemble = build.get_ensemble()

    # ensemble[0]     Boolean with True for ensemble models and False otherwyse
    # ensemble[1]     List of ensemble model model_names
    # ensemble[2]     List of ensemble model versions

    if ensemble[0]:

        emodels = ensemble[1]
        evers   = ensemble[2]

        if ifile is None:
            if not os.path.isfile (lfile):
                return False, 'no training series detected'
        else:
            try:
                safe_copy(ifile, lfile)
                # shutil.copy(ifile, lfile)
            except:
                return False, 'Unable to copy input file to model directory'
        
        success, model_res = get_ensemble_input(build, emodels, evers, lfile)

        if not success:
            return False, model_res

        for i in range(len (emodels)):
            success, iID = utils.getModelID(emodels[i], evers[i], 'model')
            if success:
                build.extend_modelID(iID)

        LOG.debug(f'New modelID is: {build.conveyor.getMeta("modelID")}')

        # now run the model using the data from the external sources
        success, results = build.run(model_res)

    else:

        # when a new training series is provided in the command line
        # try to copy it to the model directory
        if ifile is not None:

            # in case of incremental training, add the input file at the end of existing file
            if arguments['incremental']:
                if arguments['incremental'] and os.path.isfile(lfile):
                    LOG.info(f'Merging file {ifile} with existing training series')
                    new_training = os.path.join(endpoint_path, 'temp_training')

                    with open(new_training, 'w') as outfile:

                        # handling the extra newline of SDFiles is problematic. We are delaying the
                        # output of the newline by striping newlines and adding an universal newline
                        # at the next line for the first block  
                        first = True
                        with codecs.open(lfile, 'r', encoding='utf-8', errors='ignore') as infile:
                            for line in infile:
                                if first:
                                    outfile.write(f'{line.rstrip()}')
                                    first = False
                                else:
                                    outfile.write(f'\n{line.rstrip()}')

                        # for the second block we add the preceding newline in all lines 
                        with codecs.open(ifile, 'r', encoding='utf-8', errors='ignore') as infile:
                            for line in infile:
                                outfile.write(f'\n{line.rstrip()}')

                    shutil.move(new_training, lfile)
            else:
                try:
                    safe_copy (ifile, lfile)
                    # shutil.copy(ifile, lfile)
                except:
                    return False, 'Unable to copy input file to model directory'

        # check that the local copy of the input file exists
        if not os.path.isfile(lfile):
            return False, 'No training series found'

        # run the model with the input file
        success, results = build.run(lfile)

    return success, results
Пример #4
0
def predict_cmd(arguments, output_format=None):
    '''
    Instantiates a Predict object to run a prediction using the given input
    file and model.

    This method must be self-contained and suitable for being called in
    cascade, by models which use the output of other models as input.
    '''
    from flame.predict import Predict

    # safety check if model exists
    endpoint_dir = utils.model_path(arguments['endpoint'], 0)
    if not os.path.isdir(endpoint_dir):
        return False, 'Endpoint name not found in model repository.'

    # ** DEPRECATE **
    # this is a back-compatibility trick for older versions of APIs 
    # not supporting the label argument
    if 'label' not in arguments:
        arguments['label'] = 'temp'

    if 'output_format' in arguments:
        output_format = arguments['output_format']

    predict = Predict(arguments['endpoint'], version=arguments['version'],  output_format=output_format, label=arguments['label'])

    if utils.isSingleThread():
        predict.set_single_CPU()

    ensemble = predict.get_ensemble()

    # ensemble[0]     Boolean with True for ensemble models and False otherwyse
    # ensemble[1]     List of ensemble model model_names
    # ensemble[2]     List of ensemble model versions

    if ensemble[0]:

        if arguments['infile'] is None:
            return False, 'ensemble models require allways an input file'

        emodels = ensemble[1]
        evers   = ensemble[2]

        success, model_res = get_ensemble_input(predict, emodels, evers, arguments['infile'])

        if not success:
            predict.conveyor.setError (model_res)
            LOG.error (model_res)
            # return False, model_res        # TO-DO, comment this line and run prediction to allow odata to generate error info

        # check the presence of changes in the inner models
        modelID = predict.conveyor.getMeta('modelID')
        for i in range(len (emodels)):
            success, iID = utils.getModelID(emodels[i], evers[i], 'model')
            if success:
                if iID not in modelID:
                    predict.conveyor.setWarning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended')
                    LOG.warning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended')

        # now run the model using the data from the external sources
        success, results = predict.run(model_res)

    else:

        # run the model with the input file
        success, results = predict.run(arguments['infile'])

    LOG.info('Prediction completed...')

    return success, results