def get_fingerprint_from_mol(mol: oechem.OEMol) -> Tuple[float]: """Utility that retrieves a molecule's fingerprint and returns it as a tuple. Refer to :meth:`~assign_fingerprint` for how the fingerprint is stored in the molecule. Parameters ---------- mol : oechem.OEMol The molecule from which to retrieve the fingerprint. Returns ------- Tuple[float] A tuple containing the fingerprint. Raises ------ ValueError If the molecule does not contain fingerprint data. """ if not mol.HasData(DancePipeline.FINGERPRINT_LENGTH_NAME): raise ValueError("Could not retrieve fingerprint length for molecule.") length = mol.GetIntData(DancePipeline.FINGERPRINT_LENGTH_NAME) def get_fingerprint_index(i): name = f"{DancePipeline.FINGERPRINT_VALUE_NAME}_{i}" if not mol.HasData(name): raise ValueError(f"Unable to retrieve fingerprint value at index {i}") return mol.GetDoubleData(name) return tuple(get_fingerprint_index(i) for i in range(length))
def write_mol_to_fingerprint_file( mol: oechem.OEMol, properties: [danceprops.DanceProperties], select_output_dir: str, select_bin_size: float, wiberg_precision: float, ): """Writes a molecule to its appropriate SMILES fingerprint file""" # Some of the molecules coming in may be invalid. DanceGenerator may find # there was an error in charge calculations, in which case the charged # copy was not assigned to the molecule. This function checks for that. is_valid_molecule = \ lambda mol: mol.HasData(danceprops.DANCE_CHARGED_COPY_KEY) if not is_valid_molecule(mol): logging.debug(f"Ignored molecule {mol.GetTitle()}") return charged_copy = mol.GetData(danceprops.DANCE_CHARGED_COPY_KEY) for atom in charged_copy.GetAtoms(oechem.OEIsInvertibleNitrogen()): tri_n = atom break fingerprint = danceprops.DanceFingerprint(tri_n, wiberg_precision) # Retrieve the total bond order around the trivalent nitrogen bond_order = danceprops.get_dance_property(mol, properties).tri_n_bond_order # Round the total bond order down to the lowest multiple of bin_size. For # instance, if bin_size is 0.02, and the bond_order is 2.028, it becomes # 2.02. This works because (bond_order / self._bin_size) generates a # multiple of the bin_size. Then floor() finds the next integer less than # the multiple. Finally, multiplying back by bin_size obtains the nearest # actual value. bond_order = math.floor(bond_order / select_bin_size) * select_bin_size filename = f"{select_output_dir}/{bond_order},{fingerprint}.smi" with open(filename, "a") as f: f.write(f"{oechem.OEMolToSmiles(mol)} {mol.GetTitle()}\n") logging.debug(f"Wrote {mol.GetTitle()} to {filename}")