def test_print_available_datasets(self): # Go over all parameter combinations, # for each check that returned dataset is correct for sort_method in ['alphabetical', 'num_entries']: datasets = get_available_datasets(sort_method=sort_method) if sort_method == 'alphabetical': self.assertEqual(datasets, sorted(self.dataset_names)) else: self.assertEqual( datasets, sorted(self.dataset_names, key=lambda x: self.dataset_dict[x]['num_entries'], reverse=True))
def test_get_available_datasets(self): # Go over all parameter combinations, # for each check that returned dataset is correct for parameter_combo in product([True, False], [True, False], ['alphabetical', 'num_entries']): datasets = get_available_datasets(*parameter_combo) if parameter_combo[2] == 'alphabetical': self.assertEqual(datasets, sorted(self.dataset_names)) else: self.assertEqual( datasets, sorted(self.dataset_names, key=lambda x: self.dataset_dict[x]['num_entries'], reverse=True))
def test_get_available_datasets(self): # Go over all parameter combinations, # for each check that returned dataset is correct for parameter_combo in product([True, False], [True, False], ['alphabetical', 'num_entries']): datasets = get_available_datasets(*parameter_combo) if parameter_combo[2] == 'alphabetical': self.assertEqual(datasets, sorted(self.dataset_names)) else: self.assertEqual( datasets, sorted(self.dataset_names, key=lambda x: self.dataset_dict[x]['num_entries'], reverse=True) )
def get_available_datasets(self): datasets = get_available_datasets() return
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info datasets = get_available_datasets(print_format=None) for dataset in datasets: if "matbench_" in dataset: df = load_dataset(dataset) target_col = [col for col in df.columns if col not in ["structure", "composition"]][0] print(f" * - :code:`{dataset}`\n - :code:`{target_col}`\n - {df.shape[0]}") # print(get_all_dataset_info("matbench_steels"))
2. composition features are actually desired. (deduced from whether composition featurizers are present in self.featurizers). Args: df (pandas.DataFrame): May or may not contain composition column. Returns: df (pandas.DataFrame): Contains composition column if desired """ if "structure" in df.columns and "composition" not in df.columns: if self.auto_featurizer or (set(_composition_aliases) & set(self.featurizers.keys())): df = self._tidy_column(df, "structure") struct2comp = StructureToComposition( target_col_id="composition", overwrite_data=False) df = struct2comp.featurize_dataframe(df, "structure") self.logger.debug("Adding compositions from structures.") return df if __name__ == "__main__": from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets print(get_available_datasets()) # df = load_dataset("steel_strength").rename(columns={"formula": "composition"})[["yield strength", "composition"]] # af = AutoFeaturizer() # print(df) # df = af.fit_transform(df, "yield strength") from pymatgen import Structure # s = Structure() # s.