def _run_test(self, statistic, sample_1, sample_1_weighted, sample_2, sample_2_weighted): """ Run a test for a certain statistic against the two sample datasets :param statistic: name of statistic :param sample_1: float, expected value for statistic of sample 1 without weights :param sample_1_weighted: float, expected value for statistic of sample 1 with weights :param sample_2: float, expected value for statistic of sample 2 without weights :param sample_2_weighted: float, expected value for statistic of sample 2 with weights """ self.assertAlmostEqual( sample_1, PropertyStats.calc_stat(self.sample_1, statistic)) self.assertAlmostEqual( sample_1_weighted, PropertyStats.calc_stat(self.sample_1, statistic, self.sample_1_weights)) self.assertAlmostEqual( sample_2, PropertyStats.calc_stat(self.sample_2, statistic)) self.assertAlmostEqual( sample_2_weighted, PropertyStats.calc_stat(self.sample_2, statistic, self.sample_2_weights))
def featurize(self, comp): """ Get elemental property attributes Args: comp: Pymatgen composition object Returns: all_attributes: Specified property statistics of features """ all_attributes = [] for attr in self.features: elem_data = self.data_source.get_property(comp, attr) for stat in self.stats: all_attributes.append(PropertyStats().calc_stat( elem_data, stat)) return all_attributes
def featurize(self, s): """ Calculate all sites' local structure order parameters (LSOPs). Args: s: Pymatgen Structure object. Returns: opvals: (2D array of floats) LSOP values of all sites' (1st dimension) order parameters (2nd dimension). 46 order parameters are computed per site: q_cn (coordination number), q_lin, 35 x q_bent (starting with a target angle of 5 degrees and, increasing by 5 degrees, until 175 degrees), q_tet, q_oct, q_bcc, q_2, q_4, q_6, q_reg_tri, q_sq, q_sq_pyr. """ opvals = [[] for t in self._labels] for i, site in enumerate(s.sites): if (self.min_oxi is None or site.specie.oxi_state >= self.min_oxi) \ and (self.max_oxi is None or site.specie.oxi_state >= self.max_oxi): opvalstmp = self.op_site_fp.featurize(s, i) for j, opval in enumerate(opvalstmp): if opval is None: opvals[j].append(0.0) else: opvals[j].append(opval) if self.stats: opstats = [] for op in opvals: if '_mode' in ''.join(self.stats): modes = self.n_numerical_modes(op, self.nmodes, 0.01) for stat in self.stats: if '_mode' in stat: opstats.append(modes[int(stat[0])-1]) else: opstats.append(PropertyStats().calc_stat(op, stat)) return opstats else: return opvals
def test_mode(self): self._run_test("mode", 1, 1, 0, 0.5) # Additional tests self.assertAlmostEqual(0, PropertyStats.mode([0, 1, 2], [1, 1, 1]))
def featurize(self, struct, idx): """ Get OP fingerprint of site with given index in input structure. Args: struct (Structure): Pymatgen Structure object. idx (int): index of target site in structure struct. Returns: opvals (numpy array): order parameters of target site. """ idop = 1.0 / self.dop opvals = {} s = struct.sites[idx] neigh_dist = [] r = 6 while len(neigh_dist) < 12: r += 1.0 neigh_dist = struct.get_neighbors(s, r) # Smoothen distance, but use relative distances. dmin = min([d for n, d in neigh_dist]) neigh_dist = [[n, d / dmin] for n, d in neigh_dist] neigh_dist_alldrs = {} d_sorted_alldrs = {} for i in range(-self.ndr, self.ndr + 1): opvals[i] = [] this_dr = self.dr + float(i) * self.ddr this_idr = 1.0 / this_dr neigh_dist_alldrs[i] = [] for j in range(len(neigh_dist)): neigh_dist_alldrs[i].append([neigh_dist[j][0], (float(int(neigh_dist[j][1] * this_idr \ + 0.5)) + 0.5) * this_dr]) d_sorted_alldrs[i] = [] for n, d in neigh_dist_alldrs[i]: if d not in d_sorted_alldrs[i]: d_sorted_alldrs[i].append(d) d_sorted_alldrs[i] = sorted(d_sorted_alldrs[i]) # Do q_sgl_bd separately. if self.optypes[1][0] == "sgl_bd": for i in range(-self.ndr, self.ndr + 1): site_list = [s] for n, dn in neigh_dist_alldrs[i]: site_list.append(n) opval = self.ops[1][0].get_order_parameters( site_list, 0, indices_neighs=[j for j in range(1, len(site_list))]) opvals[i].append(opval[0]) for i in range(-self.ndr, self.ndr + 1): prev_cn = 0 prev_site_list = None prev_d_fac = None dmin = min(d_sorted_alldrs[i]) for d in d_sorted_alldrs[i]: this_cn = 0 site_list = [s] this_av_inv_drel = 0.0 for j, [n, dn] in enumerate(neigh_dist_alldrs[i]): if dn <= d: this_cn += 1 site_list.append(n) this_av_inv_drel += (1.0 / (neigh_dist[j][1])) this_av_inv_drel = this_av_inv_drel / float(this_cn) d_fac = this_av_inv_drel**self.dist_exp for cn in range(max(2, prev_cn + 1), min(this_cn + 1, 13)): # Set all OPs of non-CN-complying neighbor environments # to zero if applicable. if self.zero_ops and cn != this_cn: for it in range(len(self.optypes[cn])): opvals[i].append(0) continue # Set all (remaining) OPs. for it in range(len(self.optypes[cn])): opval = self.ops[cn][it].get_order_parameters( site_list, 0, indices_neighs=[ j for j in range(1, len(site_list)) ]) if opval[0] is None: opval[0] = 0 else: opval[0] = d_fac * opval[0] if self.optypes[cn][it] == 'bcc': opval[0] = opval[0] / 0.976 opvals[i].append(opval[0]) prev_site_list = site_list prev_cn = this_cn prev_d_fac = d_fac if prev_cn >= 12: break opvals_out = [] ps = PropertyStats() for j in range(len(opvals[0])): # Compute histogram, determine peak, and location # of peak value. op_tmp = [opvals[i][j] for i in range(-self.ndr, self.ndr + 1)] minval = float(int(min(op_tmp) * idop - 1.5)) * self.dop # print(minval) if minval < 0.0: minval = 0.0 if minval > 1.0: minval = 1.0 # print(minval) maxval = float(int(max(op_tmp) * idop + 1.5)) * self.dop # print(maxval) if maxval < 0.0: maxval = 0.0 if maxval > 1.0: maxval = 1.0 # print(maxval) if minval == maxval: minval = minval - self.dop maxval = maxval + self.dop # print(minval) # print(maxval) nbins = int((maxval - minval) * idop) # print('{} {} {}'.format(minval, maxval, nbins)) hist, bin_edges = np.histogram(op_tmp, bins=nbins, range=(minval, maxval), normed=False, weights=None, density=False) max_hist = max(hist) op_peaks = [] for i, h in enumerate(hist): if h == max_hist: op_peaks.append( [i, 0.5 * (bin_edges[i] + bin_edges[i + 1])]) # Address problem that 2 OP values can be close to a bin edge. hist2 = [] op_peaks2 = [] i = 0 while i < len(op_peaks): if i < len(op_peaks) - 1: if op_peaks[i + 1][0] - op_peaks[i][0] == 1: op_peaks2.append(0.5 * (op_peaks[i][1] + op_peaks[i + 1][1])) hist2.append(hist[op_peaks[i][0]] + hist[op_peaks[i + 1][0]]) i += 1 else: op_peaks2.append(op_peaks[i][1]) hist2.append(hist[op_peaks[i][0]]) else: op_peaks2.append(op_peaks[i][1]) hist2.append(hist[op_peaks[i][0]]) i += 1 opvals_out.append(op_peaks2[list(hist2).index(max(hist2))]) return np.array(opvals_out)
""" prod = 1.0 for el, amt in comp.get_el_amt_dict().items(): prod = prod * (Element(el).X**amt) return [-prod**(1 / sum(comp.get_el_amt_dict().values()))] def feature_labels(self): return ["Band Center"] def implementors(self): return ["Anubhav Jain"] if __name__ == '__main__': print(PropertyStats.holder_mean([1, 2, 3, 4])) training_set = pd.DataFrame({ "composition": [ Composition("Fe2O3"), Composition("Ga1Na6P3"), Composition("O4Si1Zn2") ] }) print("WARD NPJ ATTRIBUTES") print("Stoichiometric attributes") p_list = [0, 2, 3, 5, 7, 9] print(Stoichiometry().featurize_dataframe(training_set, col_id="composition")) print("Elemental property attributes") print(ElementProperty().featurize_dataframe(training_set,