def parse_composition(structure_type, s, ctype): toks = s.strip().split() if len(toks) == 1: c = Composition({toks[0].split(":")[0]: 1}) else: c = Composition( {t.split(":")[0]: float(t.split(":")[1]) for t in toks}) c = Composition({k2: v2 / sum(c.values()) for k2, v2 in c.items()}) if len(c) != 2: raise ValueError("Bad composition on %s." % ctype) frac = [c.get_atomic_fraction(k) for k in c.keys()] if structure_type == 'garnet': if ctype == "A": if abs(frac[0] - 0.5) > 0.01: raise ValueError("Bad composition on %s. " "Only 1:1 mixing allowed!" % ctype) elif ctype in ["C", "D"]: if not (abs(frac[0] - 1.0 / 3) < 0.01 or abs(frac[1] - 1.0 / 3) < 0.01): raise ValueError("Bad composition on %s. " "Only 2:1 mixing allowed!" % ctype) elif structure_type == 'perovskite': if abs(frac[0] - 0.5) > 0.01: raise ValueError("Bad composition on %s. " "Only 1:1 mixing allowed!" % ctype) try: for k in c.keys(): k.oxi_state if k not in ELS[structure_type][ctype]: raise ValueError("%s is not a valid species for %s site." % (k, ctype)) except AttributeError: raise ValueError("Oxidation states must be specified for all species!") return c
def __getitem__(self, idx): """ Returns ------- atom_weights: torch.Tensor shape (M, 1) weights of atoms in the material atom_fea: torch.Tensor shape (M, n_fea) features of atoms in the material self_fea_idx: torch.Tensor shape (M*M, 1) list of self indices nbr_fea_idx: torch.Tensor shape (M*M, 1) list of neighbor indices target: torch.Tensor shape (1,) target value for material cry_id: torch.Tensor shape (1,) input id for the material """ cry_id, composition, target = self.df.iloc[idx] comp_dict = Composition(composition).get_el_amt_dict() elements = list(comp_dict.keys()) weights = list(comp_dict.values()) weights = np.atleast_2d(weights).T / np.sum(weights) assert len(elements) != 1, f"cry-id {cry_id} [{composition}] is a pure system" try: atom_fea = np.vstack( [self.elem_features.get_fea(element) for element in elements] ) except AssertionError: raise AssertionError( f"cry-id {cry_id} [{composition}] contains element types not in embedding" ) except ValueError: raise ValueError( f"cry-id {cry_id} [{composition}] composition cannot be parsed into elements" ) env_idx = list(range(len(elements))) self_fea_idx = [] nbr_fea_idx = [] nbrs = len(elements) - 1 for i, _ in enumerate(elements): self_fea_idx += [i] * nbrs nbr_fea_idx += env_idx[:i] + env_idx[i + 1 :] # convert all data to tensors atom_weights = torch.Tensor(weights) atom_fea = torch.Tensor(atom_fea) self_fea_idx = torch.LongTensor(self_fea_idx) nbr_fea_idx = torch.LongTensor(nbr_fea_idx) if self.task == "regression": targets = torch.Tensor([float(target)]) elif self.task == "classification": targets = torch.LongTensor([target]) return ( (atom_weights, atom_fea, self_fea_idx, nbr_fea_idx), targets, composition, cry_id, )