Пример #1
0
def load_dataset(name, data_home=None, download_if_missing=True):
    """
    Loads a dataframe containing the dataset specified with the 'name' field.

    Dataset file is stored/loaded from data_home if specified, otherwise at
    the MATMINER_DATA environment variable if set or at matminer/datasets
    by default.

    Args:
        name (str): keyword specifying what dataset to load, run
            matminer.datasets.get_available_datasets() for options

        data_home (str): path to folder to look for dataset file

        download_if_missing (bool): whether to download the dataset if is not
            found on disk

    Returns: (pd.DataFrame,
              tuple -> (pd.DataFrame, pd.DataFrame) if return_lumo = True)
    """
    global _dataset_dict

    if _dataset_dict is None:
        _dataset_dict = _load_dataset_dict()

    if name not in _dataset_dict:
        error_string = "Unrecognized dataset name: {}. \n" \
                       "Use matminer.datasets.get_available_datasets() " \
                       "to see a list of currently available " \
                       "datasets".format(name)

        # Very simple attempt to match unrecognized keyword to existing
        # dataset names in an attempt to give the user immediate feedback
        possible_matches = [
            x for x in _dataset_dict.keys() if name.lower() in x.lower()
        ]

        if possible_matches:
            error_string += "\nCould you have been looking for these similar " \
                            "matches?:\n{}".format(possible_matches)

        raise ValueError(error_string)

    dataset_metadata = _dataset_dict[name]
    data_path = os.path.join(_get_data_home(data_home),
                             name + "." + dataset_metadata['file_type'])
    _validate_dataset(data_path, dataset_metadata['url'],
                      dataset_metadata['hash'], download_if_missing)

    df = load_dataframe_from_json(data_path)

    return df
Пример #2
0
def load_dataset(name, data_home=None, download_if_missing=True):
    """
    Loads a dataframe containing the dataset specified with the 'name' field.

    Dataset file is stored/loaded from data_home if specified, otherwise at
    the MATMINER_DATA environment variable if set or at matminer/datasets
    by default.

    Args:
        name (str): keyword specifying what dataset to load, run
            matminer.datasets.get_available_datasets() for options

        data_home (str): path to folder to look for dataset file

        download_if_missing (bool): whether to download the dataset if is not
            found on disk

    Returns: (pd.DataFrame,
              tuple -> (pd.DataFrame, pd.DataFrame) if return_lumo = True)
    """
    global _dataset_dict

    if _dataset_dict is None:
        _dataset_dict = _load_dataset_dict()

    if name not in _dataset_dict:
        error_string = "Unrecognized dataset name: {}. \n" \
                       "Use matminer.datasets.get_available_datasets() " \
                       "to see a list of currently available " \
                       "datasets".format(name)

        # Very simple attempt to match unrecognized keyword to existing
        # dataset names in an attempt to give the user immediate feedback
        possible_matches = [
            x for x in _dataset_dict.keys() if name.lower() in x.lower()
        ]

        if possible_matches:
            error_string += "\nCould you have been looking for these similar " \
                            "matches?:\n{}".format(possible_matches)

        raise ValueError(error_string)

    dataset_metadata = _dataset_dict[name]
    data_path = os.path.join(_get_data_home(data_home),
                             name + "." + dataset_metadata['file_type'])
    _validate_dataset(data_path, dataset_metadata['url'],
                      dataset_metadata['hash'], download_if_missing)

    df = load_dataframe_from_json(data_path)

    return df
Пример #3
0
def load_dataset(name,
                 data_home=None,
                 download_if_missing=True,
                 include_metadata=False):
    """
    Loads a dataframe containing the dataset specified with the 'name' field.

    Dataset file is stored/loaded from data_home if specified, otherwise at
    the MATMINER_DATA environment variable if set or at matminer/datasets
    by default.

    Args:
        name (str): keyword specifying what dataset to load, run
            matminer.datasets.available_datasets() for options

        data_home (str): path to folder to look for dataset file

        download_if_missing (bool): whether to download the dataset if is not
            found on disk

        include_metadata (bool): optional argument for some datasets with
            metadata fields

    Returns: (pd.DataFrame)
    """
    dataset_dict = _load_dataset_dict()

    if name not in dataset_dict:
        error_string = "Unrecognized dataset name: {}. \n" \
                       "Use matminer.datasets.available_datasets() " \
                       "to see a list of currently available " \
                       "datasets".format(name)

        # Very simple attempt to match unrecognized keyword to existing
        # dataset names in an attempt to give the user immediate feedback
        possible_matches = [
            x for x in dataset_dict.keys() if name.lower() in x.lower()
        ]

        if possible_matches:
            error_string += "\nCould you have been looking for these similar " \
                            "matches?:\n{}".format(possible_matches)

        raise ValueError(error_string)

    dataset_metadata = dataset_dict[name]
    data_path = os.path.join(_get_data_home(data_home),
                             name + "." + dataset_metadata['file_type'])
    _validate_dataset(data_path, dataset_metadata['url'],
                      dataset_metadata['hash'], download_if_missing)

    df = load_dataframe_from_json(data_path)

    if not include_metadata:
        if name == "elastic_tensor_2015":
            df = df.drop(['cif', 'kpoint_density', 'poscar'], axis=1)

        elif name in {"piezoelectric_tensor", "dielectric_constant"}:
            df = df.drop(['cif', 'meta', 'poscar'], axis=1)

    return df
Пример #4
0
 def test_get_data_home(self):
     home = _get_data_home()
     self.assertEqual(home, self.dataset_dir)
     specified_home = _get_data_home('/some/specified/path')
     self.assertEqual(specified_home, '/some/specified/path')
Пример #5
0
 def test_get_data_home(self):
     home = _get_data_home()
     self.assertEqual(home, self.dataset_dir)
     specified_home = _get_data_home('/some/specified/path')
     self.assertEqual(specified_home, '/some/specified/path')