示例#1
0
 def test_print_available_datasets(self):
     # Go over all parameter combinations,
     # for each check that returned dataset is correct
     for sort_method in ['alphabetical', 'num_entries']:
         datasets = get_available_datasets(sort_method=sort_method)
         if sort_method == 'alphabetical':
             self.assertEqual(datasets, sorted(self.dataset_names))
         else:
             self.assertEqual(
                 datasets,
                 sorted(self.dataset_names,
                        key=lambda x: self.dataset_dict[x]['num_entries'],
                        reverse=True))
示例#2
0
 def test_get_available_datasets(self):
     # Go over all parameter combinations,
     # for each check that returned dataset is correct
     for parameter_combo in product([True, False], [True, False],
                                    ['alphabetical', 'num_entries']):
         datasets = get_available_datasets(*parameter_combo)
         if parameter_combo[2] == 'alphabetical':
             self.assertEqual(datasets, sorted(self.dataset_names))
         else:
             self.assertEqual(
                 datasets,
                 sorted(self.dataset_names,
                        key=lambda x: self.dataset_dict[x]['num_entries'],
                        reverse=True))
 def test_get_available_datasets(self):
     # Go over all parameter combinations,
     # for each check that returned dataset is correct
     for parameter_combo in product([True, False], [True, False],
                                    ['alphabetical', 'num_entries']):
         datasets = get_available_datasets(*parameter_combo)
         if parameter_combo[2] == 'alphabetical':
             self.assertEqual(datasets, sorted(self.dataset_names))
         else:
             self.assertEqual(
                 datasets,
                 sorted(self.dataset_names,
                        key=lambda x: self.dataset_dict[x]['num_entries'],
                        reverse=True)
             )
示例#4
0
 def get_available_datasets(self):
     datasets = get_available_datasets()
     return
示例#5
0
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info
datasets = get_available_datasets(print_format=None)

for dataset in datasets:
    if "matbench_" in dataset:
        df = load_dataset(dataset)

        target_col = [col for col in df.columns if col not in ["structure", "composition"]][0]
        print(f"   * - :code:`{dataset}`\n     - :code:`{target_col}`\n     - {df.shape[0]}")


# print(get_all_dataset_info("matbench_steels"))
示例#6
0
            2. composition features are actually desired. (deduced from whether
                composition featurizers are present in self.featurizers).
        Args:
            df (pandas.DataFrame): May or may not contain composition column.

        Returns:
            df (pandas.DataFrame): Contains composition column if desired
        """
        if "structure" in df.columns and "composition" not in df.columns:
            if self.auto_featurizer or (set(_composition_aliases)
                                        & set(self.featurizers.keys())):
                df = self._tidy_column(df, "structure")
                struct2comp = StructureToComposition(
                    target_col_id="composition", overwrite_data=False)
                df = struct2comp.featurize_dataframe(df, "structure")
                self.logger.debug("Adding compositions from structures.")
        return df


if __name__ == "__main__":
    from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets

    print(get_available_datasets())
    # df = load_dataset("steel_strength").rename(columns={"formula": "composition"})[["yield strength", "composition"]]
    # af = AutoFeaturizer()
    # print(df)
    # df = af.fit_transform(df, "yield strength")

    from pymatgen import Structure
    # s = Structure()
    # s.