def eg_extract_one_bond_break(molecule_file, reaction_file="reactions.pkl"): molecules = pickle_load(molecule_file) print("number of moles:", len(molecules)) extractor = ReactionExtractorFromMolSet(molecules) extractor.extract_one_bond_break(find_one=False) extractor.to_file(reaction_file)
def write_dataset_edge_label(): filename = "~/Applications/db_access/mol_builder/molecules_n200.pkl" mols = pickle_load(filename) struct_file = "~/Applications/db_access/mol_builder/struct_mols_bond_annotation.sdf" label_file = "~/Applications/db_access/mol_builder/label_mols_bond_annotation.yaml" feature_file = ( "~/Applications/db_access/mol_builder/feature_mols_bond_annotation.yaml" ) write_edge_label_based_on_bond(mols, struct_file, label_file, feature_file)
def eg_buckets(molecule_file): molecules = pickle_load(molecule_file) print("number of moles:", len(molecules)) extractor = ReactionExtractorFromMolSet(molecules) buckets = extractor.bucket_molecules( keys=["formula", "charge", "spin_multiplicity"]) pprint(buckets) buckets = extractor.bucket_molecules(keys=["formula"]) pprint(buckets)
def get_single_atom_energy(): filename = "~/Applications/db_access/mol_builder/molecules_unfiltered.pkl" # filename = "~/Applications/db_access/mol_builder/molecules.pkl" # filename = "~/Applications/db_access/mol_builder/molecules_n200.pkl" mols = pickle_load(filename) formula = ["H1", "Li1", "C1", "O1", "F1", "P1"] print("# formula free energy charge") for m in mols: if m.formula in formula: print(m.formula, m.free_energy, m.charge)
def number_of_bonds(): filename = "~/Applications/db_access/mol_builder/molecules.pkl" mols = pickle_load(filename) nbonds = [] for m in mols: nbonds.append(len(m.bonds)) mean = np.mean(nbonds) median = np.median(nbonds) print("### number of bonds mean:", mean) print("### number of bonds median:", median)
def write_dataset(): # filename = "~/Applications/db_access/mol_builder/molecules.pkl" filename = "~/Applications/db_access/mol_builder/molecules_n200.pkl" mols = pickle_load(filename) # mols = mols[len(mols) * 739 // 2048 : len(mols) * 740 // 2048] # ####################### # # filter charge 0 mols # ####################### # new_mols = [] # for m in mols: # if m.charge == 1: # new_mols.append(m) # mols = new_mols struct_file = "~/Applications/db_access/mol_builder/struct_mols_n200.sdf" label_file = "~/Applications/db_access/mol_builder/label_mols_n200.csv" feature_file = "~/Applications/db_access/mol_builder/feature_mols_n200.yaml" write_sdf_csv_dataset(mols, struct_file, label_file, feature_file)
def from_file(cls, filename): molecules = pickle_load(to_path(filename)) logger.info(f"{len(molecules)} molecules loaded from file: {filename}") return cls(molecules)