def _ETKDG(ifile) -> (bool, str):
    """ Assigns 3D structures to the molecular structures provided as input.

    success_list = [True for i in range(sdfu.count_mols(ifile))]

    LOG.info('Converting to ETKDG 3D structures')
        suppl = Chem.SDMolSupplier(ifile)
    except Exception as e:
        LOG.critical('Unable to create supplier')
        raise e
        # not true, UNABLE TO CREATE SUPPLIER
        # return False, 'unable to compute 3D structures'

    filename, fileext = os.path.splitext(ifile)
    ofile = filename + '_3d' + fileext
    LOG.debug(f'3D stucture ouput file is: {ofile}')

    with open(ofile, 'w') as fo:

        mcount = 0
        for mol in suppl:
            if mol is None:
                LOG.debug('Supplier failed to read'
                            f' molecule #{mcount+1} in {ifile}')
                mol3 = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol3, AllChem.ETKDG())
                LOG.error('Failed to generate 3D structures using'
                            f'ETKDG method for molecule #{mcount+1} in {ifile}')
                mcount += 1

            ## debug for testing error handling. This code simulates a 3D conversion error
            # if mcount == 3 :
            #     print ('@ETKDG debug, skipping mol 3')
            #     LOG.error('Failed to generate 3D structures using'
            #                 f'ETKDG method for molecule #{mcount+1} in {ifile}')
            #     success_list[mcount]=False
            #     mcount += 1
            #     continue

            fo.write('\n$$$$\n')  # end of mol
            mcount += 1

    return success_list, ofile
    def convert3D(self, ifile, method):
        Assigns 3D structures to the molecular structures provided as input.

        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            return success_list, ifile

        if 'ETKDG' in method:
            success_list, ofile = convert3D._ETKDG(ifile)

        return success_list, ofile
文件: idata.py 项目: eTRANSAFE/flame
    def ionize(self, ifile, method):
        Adjust the ionization status of the molecular structure,
        using a given pH.

        success_list = [True for i in range(sdfutils.count_mols(ifile))]

        if not method:
            return success_list, ifile

            LOG.debug('ionize called, but no method implemented so far')
            # methods here

        return success_list, ifile
    def workflow_series(self, input_file):
        Executes in sequence methods required to generate MD,
        starting from a single molecular file

        input : ifile, a molecular file in SDFile format
        output: results contains the following  lists
                results[0] a numpy bidimensional array containing MD
                results[1] a list of strings containing the names of the MD vars
                results[2] a list of booleans indicating for which objects the 
                           MD computations succeeded    


        mol_index = [True for i in range(sdfu.count_mols(input_file))]

        # 1. normalize
        success_list, output_normalize_file = self.normalize(
            input_file, self.parameters['normalize_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to normalize ' + input_file

        # 2. ionize
        success_list, output_ionize_file = self.ionize(
            output_normalize_file, self.parameters['ionize_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to ionize ' + input_file

        # 3. convert3D
        success_list, output_convert3D_file = self.convert3D(
            output_ionize_file, self.parameters['convert3D_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to convert 3D ' + input_file

        # 4. compute MD
        success, results = self.computeMD(output_convert3D_file,

        if not success:
            return False, results

        x = results[0]
        xnames = results[1]
        success_list = results[2]

        success, mol_index = self.updateMolIndex(mol_index, success_list)

        return success, (x, xnames, mol_index)
    def workflow_objects(self, input_file):
        Executes in sequence methods required to generate MD,
        starting from a single molecular file.

        input : ifile, a molecular file in SDFile format
        output: results is a numpy bidimensional array containing MD

        success_list = []
        md_results = []
        va_results = []

        # split in single molecule pieces
        num_mol = sdfu.count_mols(input_file)
        success, results = sdfu.split_SDFile(input_file, num_mol)

        if not success:
            return success, results

        file_list = results[0]
        file_size = results[1]

        # check if any of the molecules is empty
        for fsize in file_size:
            success_list.append(fsize == 1)

        first_mol = True

        for i, ifile in enumerate(file_list):

            if not success_list[i]:  # molecule was empty, do not process
                LOG.error(f'Molecule {i+1} in {ifile} is empty, skiping...')

            success, results = self.workflow_series(ifile)

            # since the workflow was run for a single molecule, results[2] is ignored, because it must match
            # the value in success
            success_list[i] = success

            if not success:  # failed in the workflow
                LOG.error(f'Workflow failed for molecule #{str(i+1)}'
                          f' in file {input_file}')

            if first_mol:  # first molecule
                md_results = results[0]
                va_results = results[1]
                num_var = len(md_results)
                first_mol = False
                if len(results[0]) != num_var:
                    LOG.warning(f'MD length for molecule #{str(i+1)} in file'
                                f' {input_file} does not match the MD length'
                                'of the first molecule')
                    success_list[i] = False

                md_results = np.vstack((md_results, results[0]))

        #print (success_list)

        return True, (md_results, va_results, success_list)
    def normalize(self, ifile, method):
        Generates a simplified SDFile with MolBlock and an internal ID for
        further processing

        Note that this method is applied to every molecule and that it removes
        mol blocks in the input SDFile not able to generate a valid mol

        Also, when defined in control, applies chemical standardization
        protocols, like the one provided by Francis Atkinson (EBI),
        accessible from:


        Returns a tuple containing the result of the method and (if True)
        the name of the output molecule and an error message otherwyse


        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            method = ''

        LOG.info('Starting normalization...')
            suppl = Chem.SDMolSupplier(ifile)
            LOG.debug(f'mol supplier created from {ifile}')
        except Exception as e:
            LOG.error('Unable to create mol supplier with the exception: '
            return False, 'Error at processing input file for standardizing structures'

        filename, fileext = os.path.splitext(ifile)
        ofile = filename + '_std' + fileext
        LOG.debug(f'writing standarized molecules to {ofile}')
        with open(ofile, 'w') as fo:
            mcount = 0
            # merror = 0
            for m in suppl:

                # molecule not recognised by RDKit
                if m is None:
                    LOG.error('Unable to process molecule'
                              f' #{mcount+1} in {ifile}')

                name = sdfu.getName(m,

                parent = None

                if 'standardize' in method:

                        parent = standardise.run(Chem.MolToMolBlock(m))

                    except standardise.StandardiseException as e:

                        if e.name == "no_non_salt":
                            # very commong warning, use parent mol and proceed
                                f'"No non salt error" found. Skiped standardize for mol'
                                f' #{mcount} {name}')
                            parent = Chem.MolToMolBlock(m)
                            # serious issue, no parent was generated, use original mol
                            if (parent is None):
                                    f'Critical standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Skipping normalization'
                                parent = Chem.MolToMolBlock(m)
                            # minor isse, parent was generated, show a warning and proceed
                                    f'Standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Normalization applied'
                        #return False, e.name

                    except Exception as e:
                        # this error means an execution error running standardizer
                        # the molecule is discarded and therefore the list of molecules must be updated
                            f'Critical standardize execution exception {e}'
                            f' when processing mol #{mcount} {name}. Discarding molecule'
                        success_list[mcount] = False

                    LOG.info(f'Skipping normalization.')
                    parent = Chem.MolToMolBlock(m)

                # in any case, write parent plus internal ID (flameID)

                # *** discarded method to control errors ****
                # flameID = 'fl%0.10d' % mcount
                # fo.write('>  <flameID>\n'+flameID+'\n\n')

                mcount += 1

                # terminator

        return success_list, ofile