def test_number_of_features(self): """Tests that the reported number of features is correct. """ desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=False) n_features = desc.get_number_of_features() self.assertEqual(n_features, 25)
def setupDescs(structs, indexs, level, descname, chemsyms_uniques, n_atoms, steve, v): """ Setup descriptor and run it for ASE structures. Return DataFrame with given strictures as descriptors """ # choose the descriptor if descname == "CM": desc = CoulombMatrix(n_atoms_max=n_atoms, flatten=True) # permutation = 'sorted_l2' is default n_feat = desc.get_number_of_features() if descname == "MBTR": desc = MBTR(species=chemsyms_uniques, k1=mk1, k2=mk2, k3=mk3, periodic=False, normalization="l2_each", flatten=True) n_feat = desc.get_number_of_features() if descname == "SOAP": desc = SOAP(species=chemsyms_uniques, periodic=False, rcut=srcut, nmax=snmax, lmax=slmax, average=True) # Averaging for global n_feat = desc.get_number_of_features() # Create descriptors descs = desc.create(structs, n_jobs=steve) # Parallel # Create a DF of returned `list` of `arrays` of descs descs_df = pd.DataFrame(descs, index=indexs) if v: print("""🔘 Created {}-descriptors for all {} {}-structures. Number of features in {}: {}""".format(descname, structs.shape[0], level, descname, n_feat)) return descs_df, n_feat
def test_parallel_sparse(self): """Tests creating sparse output parallelly. """ # Test indices samples = [molecule("CO"), molecule("N2O")] desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=True, sparse=True) n_features = desc.get_number_of_features() # Test multiple systems, serial job output = desc.create( system=samples, n_jobs=1, ).toarray() assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]).toarray() assumed[1, :] = desc.create(samples[1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test multiple systems, parallel job output = desc.create( system=samples, n_jobs=2, ).toarray() assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]).toarray() assumed[1, :] = desc.create(samples[1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Non-flattened output desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=False, sparse=True) output = [ x.toarray() for x in desc.create( system=samples, n_jobs=2, ) ] assumed = np.empty((2, 5, 5)) assumed[0] = desc.create(samples[0]).toarray() assumed[1] = desc.create(samples[1]).toarray() self.assertTrue(np.allclose(np.array(output), assumed))
def test_parallel_dense(self): """Tests creating dense output parallelly.""" samples = [molecule("CO"), molecule("N2O")] desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=True, sparse=False) n_features = desc.get_number_of_features() # Determining number of jobs based on the amount of CPUs desc.create(system=samples, n_jobs=-1, only_physical_cores=False) desc.create(system=samples, n_jobs=-1, only_physical_cores=True) # Test multiple systems, serial job output = desc.create( system=samples, n_jobs=1, ) assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]) assumed[1, :] = desc.create(samples[1]) self.assertTrue(np.allclose(output, assumed)) # Test multiple systems, parallel job output = desc.create( system=samples, n_jobs=2, ) assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]) assumed[1, :] = desc.create(samples[1]) self.assertTrue(np.allclose(output, assumed)) # Non-flattened output desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=False, sparse=False) output = desc.create( system=samples, n_jobs=2, ) assumed = np.empty((2, 5, 5)) assumed[0] = desc.create(samples[0]) assumed[1] = desc.create(samples[1]) self.assertTrue(np.allclose(np.array(output), assumed))
def plotDescs(structs, indexs, level, descname, chemsyms, n_atoms, steve, v, path_output, save=True): """ Plot descriptors """ # choose the descriptor if descname == "CM": desc = CoulombMatrix( n_atoms_max=n_atoms, flatten=False, permutation='none') # permutation = 'sorted_l2' is default n_feat = desc.get_number_of_features() # Create descriptors descs = desc.create(structs, n_jobs=steve) # Parallel # Plot CM of zero_cluster and save it to outputs-folder sns.heatmap(descs, cmap='Spectral', robust=True, xticklabels=chemsyms, yticklabels=chemsyms) plt.title("CM of {}".format(indexs)) if save: plt.savefig("{}/{}_CM.png".format(path_output, indexs[:-4])) if descname == "MBTR": desc = MBTR(species=list(set(chemsyms)), k1=mk1, k2=mk2, k3=mk3, periodic=False, normalization="l2_each", flatten=False) n_feat = desc.get_number_of_features() descs = desc.create(structs, n_jobs=steve) # Parallel # Create the mapping between an index in the output and the corresponding chemical symbol n_elements = len(desc.species) # dict({index_of_atom_type:Z_of_atom_type}) imap = desc.index_to_atomic_number # dict({index_of_atom_type:atom_type_symbol}) smap = { index: ase.data.chemical_symbols[number] for index, number in imap.items() } # Plot k=1 x = np.linspace(0, 1, 100) # las number defines the resolution of x-axis x1 = desc.get_k1_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): plt.plot(x1, descs["k1"][i, :], label="{}".format(smap[i])) ax.set_xlabel("Charge") ax.set_xlabel( "Atomic number") #, size=20) # from fullmetalfelix/ML-CSC-tutorial ax.set_ylabel("k1 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k1 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k1.png".format(path_output, indexs[:-4])) # Plot k=2 x = np.linspace(0, 0.5, 100) # Kato mitä tää on docsista x2 = desc.get_k2_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): for j in range(n_elements): if j >= i: plt.plot(x2, descs["k2"][i, j, :], label="{}-{}".format(smap[i], smap[j])) ax.set_xlabel("Inverse distance (1/angstrom)" ) #, size=20) # How to make not inverse? ax.set_ylabel("k2 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k2 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k2.png".format(path_output, indexs[:-4])) # Plot k=3 x = np.linspace(0, 0.5, 100) # Kato mitä tää on docsista x3 = desc.get_k3_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): for j in range(n_elements): if j >= i: for k in range(n_elements): if k >= j and smap[k] == "S": plt.plot(x3, descs["k3"][i, j, k, :], label="{}-{}-{}".format( smap[i], smap[j], smap[k])) ax.set_xlabel("cos(angle)") #, size=20) ax.set_ylabel("k3 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k3 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k3.png".format(path_output, indexs[:-4])) if descname == "SOAP": desc = SOAP(species=list(set(chemsyms)), periodic=False, rcut=srcut, nmax=snmax, lmax=slmax, average=False) # Averaging for global n_feat = desc.get_number_of_features() descs = desc.create(structs, n_jobs=steve) # Plot SOAPs for all atom pairs chemsyms_combos = list(combinations_with_replacement(desc.species, 2)) for combo in chemsyms_combos: # The locations of specific element combinations can be retrieved like this. pairloc = desc.get_location(combo) # These locations can be directly used to slice the corresponding part from an # SOAP output for e.g. plotting. plt.plot(descs[0, pairloc], label="{}-{}".format(combo[0], combo[1])) plt.legend() #plt.xlim(20,40) plt.xlabel("N of features for an atom pair") plt.ylabel("Output value of SOAPs") plt.title("SOAPs of {}".format(indexs)) if save: plt.savefig("{}/{}_SOAP.png".format(path_output, indexs[:-4])) if v: print("🔘 Plotting {} done.".format(descname))
def test_number_of_features(self): """Tests that the reported number of features is correct.""" desc = CoulombMatrix(n_atoms_max=5, permutation="random", sigma=100) n_features = desc.get_number_of_features() self.assertEqual(n_features, 25)
def test_number_of_features(self): """Tests that the reported number of features is correct.""" desc = CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum") n_features = desc.get_number_of_features() self.assertEqual(n_features, 5)