def test_op_site_fingerprint(self): opsf = OPSiteFingerprint() l = opsf.feature_labels() t = ["sgl_bd CN_1", "bent180 CN_2", "bent45 CN_2", "bent90 CN_2", \ "bent135 CN_2", "tri_plan CN_3", "tet CN_3", "T CN_3", \ "sq_plan CN_4", "sq CN_4", "tet CN_4", "see_saw CN_4", \ "tri_pyr CN_4", "pent_plan CN_5", "sq_pyr CN_5", \ "tri_bipyr CN_5", "oct CN_6", "pent_pyr CN_6", "hex_pyr CN_7", \ "pent_bipyr CN_7", "bcc CN_8", "hex_bipyr CN_8", \ "q2 CN_9", "q4 CN_9", "q6 CN_9", \ "q2 CN_10", "q4 CN_10", "q6 CN_10", "q2 CN_11", "q4 CN_11", "q6 CN_11", \ "cuboct CN_12", "q2 CN_12", "q4 CN_12", "q6 CN_12"] for i in range(len(l)): self.assertEqual(l[i], t[i]) ops = opsf.featurize(self.sc, 0) self.assertEqual(len(ops), 35) self.assertAlmostEqual(int(1000 * ops[opsf.feature_labels().index( 'oct CN_6')]), 999) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual(int(1000 * ops[opsf.feature_labels().index( 'bcc CN_8')] + 0.5), 895) opsf = OPSiteFingerprint(dist_exp=0) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual(int(1000 * ops[opsf.feature_labels().index( 'bcc CN_8')] + 0.5), 955)
def get_tet_bcc_motif(structure, idx): """ Convenience class-method from Nils Zimmermann. Used to distinguish coordination environment in half-Heuslers. Args: structure (pymatgen Structure): the target structure to evaluate idx (index): the site index in the structure Returns: (str) that describes site coordination enviornment 'bcc' 'tet' 'unrecognized' """ op_site_fp = OPSiteFingerprint() fp = op_site_fp.featurize(structure, idx) labels = op_site_fp.feature_labels() i_tet = labels.index('tet CN_4') i_bcc = labels.index('bcc CN_8') if fp[i_bcc] > 0.5: return 'bcc' elif fp[i_tet] > 0.5: return 'tet' else: return 'unrecognized'
def test_op_site_fingerprint(self): opsf = OPSiteFingerprint() l = opsf.feature_labels() t = ['sgl_bd CN_1', 'L-shaped CN_2', 'water-like CN_2', \ 'bent 120 degrees CN_2', 'bent 150 degrees CN_2', \ 'linear CN_2', 'trigonal planar CN_3', \ 'trigonal non-coplanar CN_3', 'T-shaped CN_3', \ 'square co-planar CN_4', 'tetrahedral CN_4', \ 'rectangular see-saw-like CN_4', 'see-saw-like CN_4', \ 'trigonal pyramidal CN_4', 'pentagonal planar CN_5', \ 'square pyramidal CN_5', 'trigonal bipyramidal CN_5', \ 'hexagonal planar CN_6', 'octahedral CN_6', \ 'pentagonal pyramidal CN_6', 'hexagonal pyramidal CN_7', \ 'pentagonal bipyramidal CN_7', 'body-centered cubic CN_8', \ 'hexagonal bipyramidal CN_8', 'q2 CN_9', 'q4 CN_9', 'q6 CN_9', \ 'q2 CN_10', 'q4 CN_10', 'q6 CN_10', \ 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', \ 'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12'] for i in range(len(l)): self.assertEqual(l[i], t[i]) ops = opsf.featurize(self.sc, 0) self.assertEqual(len(ops), 37) self.assertAlmostEqual( ops[opsf.feature_labels().index('octahedral CN_6')], 0.9995, places=7) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual( ops[opsf.feature_labels().index('body-centered cubic CN_8')], 0.8955, places=7) opsf = OPSiteFingerprint(dist_exp=0) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual( ops[opsf.feature_labels().index('body-centered cubic CN_8')], 0.9555, places=7) # The following test aims at ensuring the copying of the OP dictionaries work. opsfp = OPSiteFingerprint() cnnfp = CrystalNNFingerprint.from_preset('ops') self.assertEqual( len([1 for l in opsfp.feature_labels() if l.split()[0] == 'wt']), 0)
def test_op_site_fingerprint(self): opsf = OPSiteFingerprint() l = opsf.feature_labels() t = ['sgl_bd CN_1', 'L-shaped CN_2', 'water-like CN_2', \ 'bent 120 degrees CN_2', 'bent 150 degrees CN_2', \ 'linear CN_2', 'trigonal planar CN_3', \ 'trigonal non-coplanar CN_3', 'T-shaped CN_3', \ 'square co-planar CN_4', 'tetrahedral CN_4', \ 'rectangular see-saw-like CN_4', 'see-saw-like CN_4', \ 'trigonal pyramidal CN_4', 'pentagonal planar CN_5', \ 'square pyramidal CN_5', 'trigonal bipyramidal CN_5', \ 'hexagonal planar CN_6', 'octahedral CN_6', \ 'pentagonal pyramidal CN_6', 'hexagonal pyramidal CN_7', \ 'pentagonal bipyramidal CN_7', 'body-centered cubic CN_8', \ 'hexagonal bipyramidal CN_8', 'q2 CN_9', 'q4 CN_9', 'q6 CN_9', \ 'q2 CN_10', 'q4 CN_10', 'q6 CN_10', \ 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', \ 'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12'] for i in range(len(l)): self.assertEqual(l[i], t[i]) ops = opsf.featurize(self.sc, 0) self.assertEqual(len(ops), 37) self.assertAlmostEqual( ops[opsf.feature_labels().index('octahedral CN_6')], 0.9995, places=7) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual( ops[opsf.feature_labels().index('body-centered cubic CN_8')], 0.8955, places=7) opsf = OPSiteFingerprint(dist_exp=0) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual( ops[opsf.feature_labels().index('body-centered cubic CN_8')], 0.9555, places=7)
def test_op_site_fingerprint(self): opsf = OPSiteFingerprint() l = opsf.feature_labels() t = ['sgl_bd CN_1', 'L-shaped CN_2', 'water-like CN_2', \ 'bent 120 degrees CN_2', 'bent 150 degrees CN_2', \ 'linear CN_2', 'trigonal planar CN_3', \ 'trigonal non-coplanar CN_3', 'T-shaped CN_3', \ 'square co-planar CN_4', 'tetrahedral CN_4', \ 'rectangular see-saw-like CN_4', 'see-saw-like CN_4', \ 'trigonal pyramidal CN_4', 'pentagonal planar CN_5', \ 'square pyramidal CN_5', 'trigonal bipyramidal CN_5', \ 'hexagonal planar CN_6', 'octahedral CN_6', \ 'pentagonal pyramidal CN_6', 'hexagonal pyramidal CN_7', \ 'pentagonal bipyramidal CN_7', 'body-centered cubic CN_8', \ 'hexagonal bipyramidal CN_8', 'q2 CN_9', 'q4 CN_9', 'q6 CN_9', \ 'q2 CN_10', 'q4 CN_10', 'q6 CN_10', \ 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', \ 'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12'] for i in range(len(l)): self.assertEqual(l[i], t[i]) ops = opsf.featurize(self.sc, 0) self.assertEqual(len(ops), 37) self.assertAlmostEqual(ops[opsf.feature_labels().index( 'octahedral CN_6')], 0.9995, places=7) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual(ops[opsf.feature_labels().index( 'body-centered cubic CN_8')], 0.8955, places=7) opsf = OPSiteFingerprint(dist_exp=0) ops = opsf.featurize(self.cscl, 0) self.assertAlmostEqual(ops[opsf.feature_labels().index( 'body-centered cubic CN_8')], 0.9555, places=7) # The following test aims at ensuring the copying of the OP dictionaries work. opsfp = OPSiteFingerprint() cnnfp = CrystalNNFingerprint.from_preset('ops') self.assertEqual(len([1 for l in opsfp.feature_labels() if l.split()[0] == 'wt']), 0)
class OPStructureFingerprint(BaseFeaturizer): """ Calculates all order parameters (OPs) for all sites in a crystal structure. Args: op_site_fp (OPSiteFingerprint): defines the types of order parameters to be calculated. stats ([str]): list of weighted statistics to compute for each feature. If stats is None, for each order parameter, a list is returned that contains the calculated parameter for each site in the structure. *Note for nth mode, stat must be 'n*_mode'; e.g. stat='2nd_mode' min_oxi (int): minimum site oxidation state for inclusion (e.g., zero means metals/cations only) max_oxi (int): maximum site oxidation state for inclusion """ def __init__(self, op_site_fp=None, stats=('mean', 'std_dev', 'minimum', 'maximum'), min_oxi=None, max_oxi=None): self.op_site_fp = OPSiteFingerprint() if op_site_fp is None \ else op_site_fp self._labels = self.op_site_fp.feature_labels() self.stats = tuple([stats]) if type(stats) == str else stats if self.stats and '_mode' in ''.join(self.stats): nmodes = 0 for stat in self.stats: if '_mode' in stat and int(stat[0]) > nmodes: nmodes = int(stat[0]) self.nmodes = nmodes self.min_oxi = min_oxi self.max_oxi = max_oxi def featurize(self, s): """ Calculate all sites' local structure order parameters (LSOPs). Args: s: Pymatgen Structure object. Returns: opvals: (2D array of floats) LSOP values of all sites' (1st dimension) order parameters (2nd dimension). 46 order parameters are computed per site: q_cn (coordination number), q_lin, 35 x q_bent (starting with a target angle of 5 degrees and, increasing by 5 degrees, until 175 degrees), q_tet, q_oct, q_bcc, q_2, q_4, q_6, q_reg_tri, q_sq, q_sq_pyr. """ opvals = [[] for t in self._labels] for i, site in enumerate(s.sites): if (self.min_oxi is None or site.specie.oxi_state >= self.min_oxi) \ and (self.max_oxi is None or site.specie.oxi_state >= self.max_oxi): opvalstmp = self.op_site_fp.featurize(s, i) for j, opval in enumerate(opvalstmp): if opval is None: opvals[j].append(0.0) else: opvals[j].append(opval) if self.stats: opstats = [] for op in opvals: if '_mode' in ''.join(self.stats): modes = self.n_numerical_modes(op, self.nmodes, 0.01) for stat in self.stats: if '_mode' in stat: opstats.append(modes[int(stat[0])-1]) else: opstats.append(PropertyStats().calc_stat(op, stat)) return opstats else: return opvals def feature_labels(self): if self.stats: labels = [] for attr in self._labels: for stat in self.stats: labels.append('%s %s' % (stat, attr)) return labels else: return self._labels def citations(self): return ('@article{zimmermann_jain_2017, title={Applications of order' ' parameter feature vectors}, journal={in progress}, author={' 'Zimmermann, N. E. R. and Jain, A.}, year={2017}}') def implementors(self): return (['Nils E. R. Zimmermann', 'Alireza Faghaninia', 'Anubhav Jain']) @staticmethod def n_numerical_modes(data_lst, n=2, dl=0.1): """ Returns the n first modes of a data set that are obtained with a finite bin size for the underlying frequency distribution. Args: data_lst ([float]): data values. n (integer): number of most frequent elements to be determined. dl (float): bin size of underlying (coarsened) distribution. Returns: ([float]): first n most frequent entries (or nan if not found). """ if len(set(data_lst)) == 1: return [data_lst[0]] + [float('NaN') for _ in range(n-1)] hist, bins = np.histogram(data_lst, bins=np.arange( min(data_lst), max(data_lst), dl), density=False) modes = list(bins[np.argsort(hist)[-n:]][::-1]) return modes + [float('NaN') for _ in range(n-len(modes))]