Exemplos de find_files em Python, exemplos de parsers.utils.find_files em Python

Exemplo n.º 1

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html" + "#gdb7-13",
            "acl": ["public"],
            "mdf_source_name":
            "gdb7-13",
            "mdf-publish.publication.collection":
            "gdb7-13",
            "mdf_data_class":
            "xyz",
            "cite_as": [
                "Gr\'egoire Montavon, Matthias Rupp, Vivekanand Gobre, Alvaro Vazquez-Mayagoitia, Katja Hansen, Alexandre Tkatchenko, Klaus-Robert M\"uller, O. Anatole von Lilienfeld: Machine learning of molecular electronic properties in chemical compound space, New Journal of Physics, 15(9): 095003, IOP Publishing, 2013.DOI: 10.1088/1367-2630/15/9/095003"
            ],
            "license":
            "https://creativecommons.org/licenses/by/3.0/",
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Machine learning of molecular electronic properties in chemical compound space",
            "dc.creator":
            "Argonne National Laboratory, Einstein Foundation, National Research Foundation of Korea",
            "dc.identifier":
            "http://qmml.org/datasets.html" + "#gdb7-13",
            "dc.contributor.author": [
                "Grégoire Montavon", "Matthias Rupp", "Vivekanand Gobre",
                "Alvaro Vazquez-Mayagoitia", "Katja Hansen",
                "Alexandre Tkatchenko", "Klaus-Robert Müller",
                "O. Anatole von Lilienfeld"
            ],
            #            "dc.subject": ,
            "dc.description":
            "7k small organic molecules, in their ground state, 14 combinations of properties and theory levels. 7,211 small organic molecules composed of H, C, N, O, S, Cl, saturated with H, and up to 7 non-H atoms. Molecules relaxed using DFT with PBE functional. Properties are atomization energy (DFT/PBE0), averaged polarizability (DFT/PBE0, SCS), H**O and LUMO eigenvalues (GW, DFT/PBE0, ZINDO), and, ionization potential, electron affinity, first excitation energy, frequency of maximal absorption (all ZINDO).",  # RCM string: Description of dataset contents
            "dc.relatedidentifier":
            ["https://doi.org/10.1088/1367-2630/15/9/095003"],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        uri = "https://data.materialsdatafacility.org/collections/" + "gdb7-13/gdb7-13_data/" + data_file[
            "filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition": record["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "gdb7-13 " + data_file["filename"],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "xyz": uri
                },
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ab_initio_solute_database_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "https://publish.globus.org/jspui/handle/ITEM/164",
        "acl": ["public"],
        "mdf_source_name":
        "ab_initio_solute_database",
        "mdf-publish.publication.collection":
        "High-Throughput ab-initio Dilute Solute Diffusion Database",
        "mdf_data_class":
        "vasp",
        "cite_as": [
            'Wu, Henry; Mayeshiba, Tam; Morgan, Dane, "Dataset for High-throughput Ab-initio Dilute Solute Diffusion Database," 2016, http://dx.doi.org/doi:10.18126/M2X59R'
        ],
        "dc.title":
        "High-throughput Ab-initio Dilute Solute Diffusion Database",
        "dc.creator":
        "Materials Data Facility",
        "dc.identifier":
        "http://dx.doi.org/doi:10.18126/M2X59R",
        "dc.contributor.author":
        ["Wu, Henry", "Mayeshiba, Tam", "Morgan, Dane"],
        "dc.subject": ["dilute", "solute", "DFT", "diffusion"],
        "dc.description":
        "We demonstrate automated generation of diffusion databases from high-throughput density functional theory (DFT) calculations. A total of more than 230 dilute solute diffusion systems in Mg, Al, Cu, Ni, Pd, and Pt host lattices have been determined using multi-frequency diffusion models. We apply a correction method for solute diffusion in alloys using experimental and simulated values of host self-diffusivity.",
        "dc.relatedidentifier": [
            "http://dx.doi.org/10.1038/sdata.2016.54",
            "http://dx.doi.org/10.6084/m9.figshare.1546772"
        ],
        "dc.year":
        2016
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path,
                                    file_pattern="^OUTCAR$",
                                    verbose=verbose),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="vasp",
                              verbose=False)
        if file_data:
            uri = "https://data.materialsdatafacility.org/published/publication_164/data/" + dir_data[
                "no_root_path"] + "/" + dir_data["filename"]
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                "mdf-publish.publication.collection":
                "High-Throughput ab-initio Dilute Solute Diffusion Database",
                "mdf-base.material_composition":
                file_data["frames"][0]["chemical_formula"],
                "dc.title":
                "High-throughput Ab-initio Dilute Solute Diffusion Database - "
                + file_data["frames"][0]["chemical_formula"],
                #"dc.creator": ,
                "dc.identifier":
                uri,
                #"dc.contributor.author": ,
                #"dc.subject": ,
                #"dc.description": ,
                #"dc.relatedidentifier": ,
                #"dc.year": ,
                "data": {
                    #                   "raw": str(file_data),
                    "files": {
                        "outcar": uri
                    }
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 3

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://solarfuelshub.org/xps-spectral-database",
            "acl": ["public"],
            "mdf_source_name": "jcap_xps_spectral_db",
            "mdf-publish.publication.collection": "JCAP XPS Spectral DB",
#            "mdf_data_class": ,

            "cite_as": ["http://solarfuelshub.org/xps-spectral-database"],
#            "license": ,

            "dc.title": "JCAP XPS Spectral Database",
            "dc.creator": "JCAP",
            "dc.identifier": "http://solarfuelshub.org/xps-spectral-database",
#            "dc.contributor.author": ,
#            "dc.subject": ,
            "dc.description": "The JCAP High Throughput Experimentation research team uses combinatorial methods to quickly identify promising light absorbers and catalysts for solar-fuel devices. Pure-phase materials — including metal oxides, nitrides, sulfides, oxinitrides, and other single- and mixed-metal materials — are prepared using multiple deposition techniques (e.g., physical vapor deposition, inkjet printing, and micro-fabrication) on various substrates. High-resolution X-ray photoelectron spectroscopy (XPS) spectra for materials that have been characterized to date are made available here as part of JCAP's Materials Characterization Standards (MatChS) database.",
#            "dc.relatedidentifier": ,
#            "dc.year": 
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".json"), desc="Processing files", disable= not verbose):
        with open(os.path.join(data_file["path"], data_file["filename"])) as in_file:
            data = json.load(in_file)
        link = data.pop("link")
        record_metadata = {
            "globus_subject": link,
            "acl": ["public"],
#            "mdf-publish.publication.collection": ,
#            "mdf_data_class": ,
            "mdf-base.material_composition": data["material"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "JCAP Spectra - " + data["xps_region"],
#            "dc.creator": ,
            "dc.identifier": link,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
            "dc.year": data.pop("year"),

            "data": {
#                "raw": json.dumps(list(parse_tab(data.pop("data")))),
                "files": {"csv": link}
                }
            }
        data.pop("data")
        record_metadata["data"].update(data)

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 4

0

Exibir arquivo

Arquivo: strain_effects_oxygen_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/701",
            "acl": ["public"],
            "mdf_source_name": "strain_effects_oxygen",
            "mdf-publish.publication.collection": "Strain Effects on Oxygen Migration",
            "mdf_data_class": "vasp",

            "cite_as": ["Mayeshiba, T. & Morgan, D. Strain effects on oxygen migration in perovskites. Physical chemistry chemical physics : PCCP 17, 2715-2721, doi:10.1039/c4cp05554c (2015).", "Mayeshiba, T. & Morgan, D. Correction: Strain effects on oxygen migration in perovskites. Physical chemistry chemical physics : PCCP, doi:10.1039/c6cp90050j (2016)."],
#            "license": ,

            "dc.title": "Strain effects on oxygen migration in perovskites: La[Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Ga]O3",
            "dc.creator": "University of Wisconsin-Madison",
            "dc.identifier": "http://hdl.handle.net/11256/701",
            "dc.contributor.author": ["Mayeshiba, Tam", "Morgan, Dane"],
#            "dc.subject": ,
#            "dc.description": ,
            "dc.relatedidentifier": ["https://dx.doi.org/10.1039/c4cp05554c", "https://dx.doi.org/10.1039/c6cp90050j"],
            "dc.year": 2016
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "^OUTCAR$"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject": uri,
                "acl": ["public"],
    #            "mdf-publish.publication.collection": ,
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

    #            "cite_as": ,
    #            "license": ,

                "dc.title": "Oxygen Migration - " + data["frames"][0]["chemical_formula"],
    #            "dc.creator": ,
                "dc.identifier": uri,
    #            "dc.contributor.author": ,
    #            "dc.subject": ,
    #            "dc.description": ,
    #            "dc.relatedidentifier": ,
    #            "dc.year": ,

                "data": {
    #                "raw": ,
                    "files": {"outcar": uri}
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            if verbose:
                print("Error on:", os.path.join(data_file["path"], data_file["filename"]))

    if verbose:
        print("Finished converting")

Exemplo n.º 5

0

Exibir arquivo

Arquivo: doak_strain_energies_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://hdl.handle.net/11256/85",
            "acl": ["public"],
            "mdf_source_name":
            "doak_strain_energies",
            "mdf-publish.publication.collection":
            "Doak Strain Energies",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Doak JW, Wolverton C (2012) Coherent and incoherent phase stabilities of thermoelectric rocksalt IV-VI semiconductor alloys. Phys. Rev. B 86: 144202 http://dx.doi.org/10.1103/PhysRevB.86.144202"
            ],
            "license":
            "http://creativecommons.org/licenses/by-sa/3.0/us/",
            "dc.title":
            "GeTe-PbTe PbS-PbTe PbSe-PbS PbTe-PbSe PbTe-SnTe SnTe-GeTe mixing and coherency strain energies",
            "dc.creator":
            "Northwestern University",
            "dc.identifier":
            "http://hdl.handle.net/11256/85",
            "dc.contributor.author": ["Doak, JW", "Wolverton, C"],
            #            "dc.subject": ,
            #            "dc.description": ,
            "dc.relatedidentifier":
            ["http://dx.doi.org/10.1103/PhysRevB.86.144202"],
            "dc.year":
            2012
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                "mdf-publish.publication.collection":
                "Doak Strain Energies",
                "mdf-base.material_composition":
                data["frames"][0]["chemical_formula"],

                #            "cite_as": ,
                #            "license": ,
                "dc.title":
                "Strain Energy - " + data["frames"][0]["chemical_formula"],
                #            "dc.creator": ,
                "dc.identifier":
                uri,
                #            "dc.contributor.author": ,
                #            "dc.subject": ,
                #            "dc.description": ,
                #            "dc.relatedidentifier": ,
                #            "dc.year": ,
                "data": {
                    #                    "raw": ,
                    "files": {
                        "outcar": uri
                    }
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"])

    if verbose:
        print("Finished converting")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: qm9_converter.py Projeto: maxhutch/forge

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://quantum-machine.org/datasets/#qm9",
        "acl": ["public"],
        "mdf_source_name":
        "qm9",
        "mdf-publish.publication.collection":
        "Quantum Machine",
        "mdf_data_class":
        "xyz",
        "cite_as": [
            "L. Ruddigkeit, R. van Deursen, L. C. Blum, J.-L. Reymond, Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17, J. Chem. Inf. Model. 52, 2864–2875, 2012.",
            "R. Ramakrishnan, P. O. Dral, M. Rupp, O. A. von Lilienfeld, Quantum chemistry structures and properties of 134 kilo molecules, Scientific Data 1, 140022, 2014."
        ],
        "dc.title":
        "Quantum Machine - QM9",
        "dc.creator":
        "Quantum Machine",
        "dc.identifier":
        "http://quantum-machine.org/datasets/#qm9",
        "dc.contributor.author": [
            "L. Ruddigkeit", "R. van Deursen", "L. C. Blum", "J.-L. Reymond",
            "R. Ramakrishnan", "P. O. Dral", "M. Rupp", "O. A. von Lilienfeld"
        ],
        "dc.subject": ["gdb-17"],
        "dc.description":
        ("Computational de novo design of new drugs and materials requires rigorous and unbiased exploration of chemical compound space. "
         "However, large uncharted territories persist due to its size scaling combinatorially with molecular size. We report computed geometric, "
         "energetic, electronic, and thermodynamic properties for 134k stable small organic molecules made up of CHONF. These molecules correspond "
         "to the subset of all 133,885 species with up to nine heavy atoms (CONF) out of the GDB-17 chemical universe of 166 billion organic "
         "molecules. We report geometries minimal in energy, corresponding harmonic frequencies, dipole moments, polarizabilities, along with "
         "energies, enthalpies, and free energies of atomization. All properties were calculated at the B3LYP/6-31G(2df,p) level of quantum "
         "chemistry. Furthermore, for the predominant stoichiometry, C7H10O2, there are 6,095 constitutional isomers among the 134k molecules. We "
         "report energies, enthalpies, and free energies of atomization at the more accurate G4MP2 level of theory for all of them. As such, this "
         "data set provides quantum chemical properties for a relevant, consistent, and comprehensive chemical space of small organic molecules. "
         "This database may serve the benchmarking of existing methods, development of new methods, such as hybrid quantum mechanics/machine "
         "learning, and systematic identification of structure-property relationships."
         ),
        "dc.relatedidentifier": ["https://doi.org/10.6084/m9.figshare.978904"],
        "dc.year":
        2014
    }

    # Make a Validator to help write the feedstock
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, "xyz"),
                          desc="Processing QM9",
                          disable=not verbose):
        file_path = os.path.join(file_data["path"], file_data["filename"])
        record = parse_ase(file_path, "xyz")
        record_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/collections/test/qm9/" +
            file_data["no_root_path"] + "/" + file_data["filename"],
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "Quantum Machine",
            "mdf-base.material_composition":
            record.get("chemical_formula", ""),
            "dc.title":
            "QM9 - " + record.get("chemical_formula", "") + " - " +
            file_data["filename"],
            "dc.creator":
            "Quantum Machine",
            "dc.identifier":
            "http://quantum-machine.org/datasets/#qm9",
            #"dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #"dc.subject": ,                          # OPT list of strings: Keywords about record
            #"dc.description": ,                      # OPT string: Description of record
            #"dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #"dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)
            "data": {
                #"raw": ,                             # RCM string: Original data record text, if feasible
                "files": {
                    "xyz":
                    "https://data.materialsdatafacility.org/collections/test/qm9/"
                    + file_data["no_root_path"] + "/" + file_data["filename"]
                },
                "quantum chemistry level": {"B3LYP/6-31G(2df,p)"}
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_data", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 7

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://materialscommons.org/mcpub/",
            "acl": ["public"],
            "mdf_source_name":
            "materials_commons",
            "mdf-publish.publication.collection":
            "Materials Commons",
            #            "mdf_data_class": ,
            "cite_as": [
                "Puchala, B., Tarcea, G., Marquis, E.A. et al. JOM (2016) 68: 2035. doi:10.1007/s11837-016-1998-7"
            ],
            #            "license": ,
            "dc.title":
            "Materials Commons Data",
            "dc.creator":
            "University of Michigan",
            "dc.identifier":
            "https://materialscommons.org/mcpub/",
            "dc.contributor.author": [
                "B Puchala", "G Tarcea", "EA Marquis", "M Hedstrom",
                "HV Hagadish", "JE Allison"
            ],
            #            "dc.subject": ,
            "dc.description":
            "A platform for sharing research data.",
            "dc.relatedidentifier":
            ["https://dx.doi.org/10.1007/s11837-016-1998-7"],
            "dc.year":
            2016
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata)
# You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(input_path,
                                    file_pattern="json",
                                    verbose=verbose),
                         desc="Processing metadata",
                         disable=not verbose):
        with open(os.path.join(dir_data["path"],
                               dir_data["filename"])) as file_data:
            mc_data = json.load(file_data)
        record_metadata = {
            "globus_subject":
            "https://materialscommons.org/mcpub/#/details/" + mc_data["id"],
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            #            "mdf-base.material_composition": ,

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            mc_data["title"],
            #            "dc.creator": ,
            "dc.identifier":
            "https://materialscommons.org/mcpub/#/details/" + mc_data["id"],
            "dc.contributor.author": [
                author["firstname"] + " " + author["lastname"]
                for author in mc_data["authors"]
            ],
            #            "dc.subject": mc_data["keywords"],
            "dc.description":
            mc_data["description"],
            #            "dc.relatedidentifier": mc_data["doi"],
            "dc.year":
            int(mc_data.get("published_date", "0000")[:4])

            #            "data": {
            #                "raw": ,
            #                "files": ,
            #                }
        }
        if mc_data["license"]["link"]:
            record_metadata["license"] = mc_data["license"]["link"]
        if mc_data["keywords"]:
            record_metadata["dc.subject"] = mc_data["keywords"]
        if mc_data["doi"]:
            record_metadata["dc.relatedidentifier"] = [mc_data["doi"]]

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: quinary_alloys_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "https://doi.org/10.17863/CAM.705",
            "acl": ["public"],
            "mdf_source_name": "quinary_alloys",
            "mdf-publish.publication.collection": "Ni-Co-Al-Ti-Cr Quinary Alloys",
#            "mdf_data_class": ,

            "cite_as": ['Christofidou, K. A., Jones, N. G., Pickering, E. J., Flacau, R., Hardy, M. C., & Stone, H. J. Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys" [Dataset]. https://doi.org/10.17863/CAM.705'],
            "license": "http://creativecommons.org/licenses/by/4.0/",
            "mdf_version": "0.1.0",

            "dc.title": 'Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys"',
            "dc.creator": "University of Cambridge",
            "dc.identifier": "https://doi.org/10.17863/CAM.705",
            "dc.contributor.author": ["Christofidou, K. A.", "Jones, N. G.", "Pickering, E. J.", "Flacau, R.", "Hardy, M. C.", "Stone, H. J."],
            "dc.subject": ["DSC", "SEM", "TEM", "neutron diffraction", "thermodynamics", "hardness"],
            "dc.description": "DSC files, neutron diffraction data, hardness measurements, SEM and TEM images and thermodynamic simulations are provided for all alloy compositions studied and presented in this manuscript. The naming convention is provided in the manuscript along with the composition of each alloy.",
            "dc.relatedidentifier": ["https://doi.org/10.1016/j.jallcom.2016.07.159"],
            "dc.year": 2016
            }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)


    # Get the data
    # Each record also needs its own metadata
    with open(os.path.join(input_path, "alloy_data.csv"), 'r') as adata:
        raw_data = adata.read()
    for record in tqdm(parse_tab(raw_data), desc="Processing records", disable= not verbose):
        links = {}
        mdf_base = "https://data.materialsdatafacility.org/collections/quinary_alloys/"
        for ln in find_files(input_path, record["Alloy"]):
            key = "_".join(ln["no_root_path"].split("/")).replace(" ", "_")
            links[key] = mdf_base + os.path.join(ln["no_root_path"], ln["filename"])
        record_metadata = {
            "globus_subject": mdf_base + "alloy_data.csv#" + record["Alloy"],
            "acl": ["public"],
#            "mdf-publish.publication.collection": ,
#            "mdf_data_class": ,
            "mdf-base.material_composition": "NiCoAlTiCr",

#            "cite_as": ,
#            "license": ,

            "dc.title": "Ni-Co-Al-Ti-Cr Quinary Alloys " + record["Alloy"],
#            "dc.creator": ,
            "dc.identifier": mdf_base + "alloy_data.csv",
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
                "raw": json.dumps(record),
                "files": links,
                "atomic_composition_percent": {
                    "Ni": record["Ni"],
                    "Co": record["Co"],
                    "Al": record["Al"],
                    "Ti": record["Ti"],
                    "Cr": record["Cr"]
                    }
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 9

0

Exibir arquivo

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject": "http://khazana.uconn.edu/module_search/search.php?m=2",
        "acl": ["public"],
        "mdf_source_name": "khazana_vasp",
        "mdf-publish.publication.collection": "Khazana",
        "mdf_data_class": "vasp",

        "cite_as": ["http://khazana.uconn.edu/module_search/search.php?m=2"],
        "dc.title": "Khazana (VASP)",
        "dc.creator": "University of Connecticut",
        "dc.identifier": "http://khazana.uconn.edu",
#        "dc.contributor.author": ,
        "dc.subject": ["DFT", "VASP"]
#        "dc.description": ,
#        "dc.relatedidentifier": ,
#        "dc.year": 
        }


    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path, file_pattern="^OUTCAR"), desc="Processing data files", disable= not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"], dir_data["filename"]), data_format="vasp", verbose=False)

        uri = "https://data.materialsdatafacility.org/collections/khazana/OUTCARS/" + dir_data["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Khazana",
            "mdf-base.material_composition": file_data["frames"][0]["chemical_formula"],

            "dc.title": "Khazana VASP - " + file_data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": str(file_data),
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata"))

    if verbose:
        print("Finished converting")

Exemplo n.º 10

0

Exibir arquivo

Arquivo: cp_complexes_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # TODO: Make sure the metadata is present in some form.
    # Fields can be:
    #    REQ (Required, must be present)
    #    RCM (Recommended, should be present if possible)
    #    OPT (Optional, can be present if useful)
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483",  # REQ string: Unique value (should be URI if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            "mdf_source_name":
            "cp_complexes",  # REQ string: Unique name for dataset
            "mdf-publish.publication.collection":
            "Cp*W(NO)(H)(η3‑allyl) Complexes",  # RCM string: Collection the dataset belongs to
            "mdf_data_class":
            "CIF",  # RCM string: Type of data in all records in the dataset (do not provide for multi-type datasets)
            "cite_as": [
                "Baillie, Rhett A.; Holmes, Aaron S.; Lefèvre, Guillaume P.; Patrick, Brian O.; Shree, Monica V.; Wakeham, Russell J.; Legzdins, Peter; Rosenfeld, Devon C. (2015): Synthesis, Characterization, and Some Properties of Cp*W(NO)(H)(η3‑allyl) Complexes. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b00747.s002"
            ],  # REQ list of strings: Complete citation(s) for this dataset.
            "license":
            "https://creativecommons.org/licenses/by-nc/4.0/",  # RCM string: License to use the dataset (preferrably a link to the actual license).
            "mdf_version":
            "0.1.0",  # REQ string: The metadata version in use (see VERSION above).
            "dc.title":
            "Synthesis, Characterization, and Some Properties of Cp*W(NO)(H)(η3‑allyl) Complexes",  # REQ string: Title of dataset
            "dc.creator":
            "The University of British Columbia, The Dow Chemical Company",  # REQ string: Owner of dataset
            "dc.identifier":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483",  # REQ string: Link to dataset (dataset DOI if available)
            "dc.contributor.author": [
                "Baillie, Rhett A.", "Holmes, Aaron S.",
                "Lefèvre, Guillaume P.", "Patrick, Brian O.",
                "Shree, Monica V.", "Wakeham, Russell J.", "Legzdins, Peter",
                "Rosenfeld, Devon C."
            ],  # RCM list of strings: Author(s) of dataset
            "dc.subject": [
                "THF", "DFT", "18 e PMe 3 adducts", "complex",
                "coordination isomers", "magnesium allyl reagent"
            ],  # RCM list of strings: Keywords about dataset
            "dc.description":
            "Sequential treatment at low temperatures of Cp*W(NO)Cl2 in THF with 1 equiv of a binary magnesium allyl reagent, followed by an excess of LiBH4, affords three new Cp*W(NO)(H)(η3-allyl) complexes, namely, Cp*W(NO)(H)(η3-CH2CHCMe2) (1), Cp*W(NO)(H)(η3-CH2CHCHPh) (2), and Cp*W(NO)(H)(η3-CH2CHCHMe) (3).",  # RCM string: Description of dataset contents
            "dc.relatedidentifier": [
                "https://doi.org/10.1021/acs.inorgchem.5b00747"
            ],  # RCM list of strings: Link(s) to related materials (such as an article)
            "dc.year":
            2015  # RCM integer: Year of dataset creation
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata, strict=False)
# You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    # TODO: Write the code to convert your dataset's records into JSON-serializable Python dictionaries
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype
    for file_data in find_files(input_path, ".cif"):
        record = parse_ase(os.path.join(file_data["path"],
                                        file_data["filename"]),
                           data_format="cif")

        # TODO: Fill in these dictionary fields for each record
        # Fields can be:
        #    REQ (Required, must be present)
        #    RCM (Recommended, should be present if possible)
        #    OPT (Optional, can be present if useful)
        record_metadata = {
            "globus_subject":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483#"
            + record[
                "chemical_formula"],  # REQ string: Unique value (should be URI to record if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            #            "mdf-publish.publication.collection": ,  # OPT string: Collection the record belongs to (if different from dataset)
            #            "mdf_data_class": ,                      # OPT string: Type of data in record (if not set in dataset metadata)
            "mdf-base.material_composition":
            record[
                "chemical_formula"],  # RCM string: Chemical composition of material in record

            #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
            #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).
            "dc.title":
            "Cp Complexes - " +
            record["chemical_formula"],  # REQ string: Title of record
            #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
            #            "dc.identifier": ,                       # RCM string: Link to record (record webpage, if available)
            #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #            "dc.subject": ,                          # OPT list of strings: Keywords about record
            #            "dc.description": ,                      # OPT string: Description of record
            #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)

            #            "data": {                                # RCM dictionary: Other record data (described below)
            #                "raw": json.dumps(record),                             # RCM string: Original data record text, if feasible
            #                "files": ,                           # RCM dictionary: {file_type : uri_to_file} pairs, data files (Example: {"cif" : "https://example.org/cifs/data_file.cif"})

            # other                              # RCM any JSON-valid type: Any other data fields you would like to include go in the "data" dictionary. Keys will be prepended with 'mdf_source_name:'
            #                }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # TODO: Save your converter as [mdf_source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 11

0

Exibir arquivo

Arquivo: matin_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://matin.gatech.edu/",
            "acl": ["public"],
            "mdf_source_name":
            "matin",
            "mdf-publish.publication.collection":
            "MATIN",
            "mdf_data_class":
            "oai_pmh",
            "cite_as": ["https://matin.gatech.edu/"],
            #            "license": ,
            "dc.title":
            "MATerials Innovation Network",
            "dc.creator":
            "Georgia Institute of Technology",
            "dc.identifier":
            "https://matin.gatech.edu/",
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            "dc.description":
            "An e-collaboration platform for accelerating materials innovation"
            #            "dc.relatedidentifier": ,
            #            "dc.year":
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata)
# You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(input_path,
                                    file_pattern="json",
                                    verbose=verbose),
                         desc="Processing metadata",
                         disable=not verbose):
        with open(os.path.join(dir_data["path"],
                               dir_data["filename"])) as file_data:
            full_record = json.load(file_data)
        matin_data = full_record["metadata"]["oai_dc:dc"]
        uri = matin_data.get("dc.identifier",
                             full_record["header"]["identifier"])
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "MATIN",
            #            "mdf_data_class": ,
            #            "mdf-base.material_composition": ,

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            matin_data.get("dc.title", "MATIN Entry " +
                           dir_data["filename"].split("_")[0]),
            #            "dc.creator": ,
            "dc.identifier":
            uri,
            #            "dc.contributor.author": [matin_data["dc:creator"]] if type(matin_data.get("dc:creator", None)) is str else matin_data.get("dc:creator", None),
            #            "dc.subject": [matin_data["dc:subject"]] if type(matin_data.get("dc:subject", None)) is str else matin_data.get("dc:subject", None),
            #            "dc.description": matin_data.get("dc:description", None),
            #            "dc.relatedidentifier": [matin_data["dc:relation"]] if type(matin_data.get("dc:relation", None)) is str else matin_data.get("dc:relation", None),
            "dc.year":
            int(matin_data["dc:date"][:4])
            if matin_data.get("dc:date", None) else None,
            "data": {
                "raw": json.dumps(full_record),
                #                "files": ,
            }
        }
        if matin_data.get("dc:creator", None):
            if type(matin_data["dc:creator"]) is not list:
                record_metadata["dc.contributor.author"] = [
                    matin_data["dc:creator"]
                ]
            else:
                record_metadata["dc.contributor.author"] = matin_data[
                    "dc:creator"]
        if matin_data.get("dc:subject", None):
            if type(matin_data["dc:subject"]) is not list:
                record_metadata["dc.subject"] = [matin_data["dc:subject"]]
            else:
                record_metadata["dc.subject"] = matin_data["dc:subject"]
        if matin_data.get("dc:description", None):
            record_metadata["dc.description"] = matin_data["dc:description"]
        if matin_data.get("dc:relation", None):
            if type(matin_data["dc:relation"]) is not list:
                record_metadata["dc.relatedidentifier"] = [
                    matin_data["dc:relation"]
                ]
            else:
                record_metadata["dc.relatedidentifier"] = matin_data[
                    "dc:relation"]

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 12

0

Exibir arquivo

Arquivo: dataset_metadata_only_converter.py Projeto: tskluzac/mdf-harvesters

if __name__ == "__main__":
    if len(sys.argv) <= 1:
        print("ARGS:")
        print(
            "--cmd: If this is the first argument, the rest of the arguments will be parsed as the metadata."
        )
        print("--all: If this is the first argument, all .json files in " +
              os.path.join(paths.datasets, "metadata_only") +
              " will be converted.")
        print(
            "Otherwise, the first argument must be the name of the JSON file to read."
        )
    elif sys.argv[1] == "--cmd":
        metadata = {}
        key = ""
        for arg in sys.argv[2:]:
            if not key:
                key = arg.replace("--", "")
            else:
                metadata[key] = arg
                key = ""
        convert(metadata, verbose=True)
    elif sys.argv[1] == "--all":
        for md_file in tqdm(find_files(
                os.path.join(paths.datasets, "metadata_only"), ".json"),
                            desc="Processing metadata"):
            convert(os.path.join(md_file["path"], md_file["filename"]))
    else:
        convert(sys.argv[1], verbose=True)

Exemplo n.º 13

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html#bfcc-13",
            "acl": ["public"],
            "mdf_source_name":
            "bfcc13",
            "mdf-publish.publication.collection":
            "bfcc13",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Lance J. Nelson, Vidvuds Ozoliņš, C. Shane Reese, Fei Zhou, Gus L.W. Hart: Cluster expansion made easy with Bayesian compressive sensing, Physical Review B 88(15): 155105, 2013."
            ],
            #            "license": ,
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Cluster expansion made easy with Bayesian compressive sensing",
            "dc.creator":
            "Brigham Young University, University of California Los Angeles, Lawrence Livermore National Laboratory",
            "dc.identifier":
            "http://qmml.org/datasets.html",
            "dc.contributor.author": [
                "Lance J. Nelson", "Vidvuds Ozoliņš", "C. Shane Reese",
                "Fei Zhou", "Gus L.W. Hart"
            ],
            #            "dc.subject": ,
            "dc.description":
            "4k DFT calculations for solid AgPd, CuPt and AgPt FCC superstructures. DFT/PBE energy, forces and stresses for cell sizes 1-16 across all compositions including primitive cells.",
            "dc.relatedidentifier": [
                "https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.155105"
            ],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        if data:
            uri = "https://data.materialsdatafacility.org/collections/" + "bfcc-13/bfcc-13/" + data_file[
                "no_root_path"] + "/" + data_file["filename"]
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                #            "mdf-publish.publication.collection": ,
                #            "mdf_data_class": ,
                "mdf-base.material_composition":
                data["frames"][0]["chemical_formula"],

                #            "cite_as": ,
                #            "license": ,
                "dc.title":
                "bfcc13 - " + data["frames"][0]["chemical_formula"],
                #            "dc.creator": ,
                "dc.identifier":
                uri,
                #            "dc.contributor.author": ,
                #            "dc.subject": ,
                #            "dc.description": ,
                #            "dc.relatedidentifier": ,
                #            "dc.year": ,
                "data": {
                    #                "raw": ,
                    "files": {
                        "outcar": uri
                    },
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))
            # The Validator may return warnings if strict=False, which should be noted
            if result.get("warnings", None):
                print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 14

0

Exibir arquivo

Arquivo: mpi_mainz_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://doi.org/10.5281/zenodo.6951",
            "acl": ["public"],
            "mdf_source_name":
            "mpi_mainz",
            "mdf-publish.publication.collection":
            "UV/VIS Spectral Atlas",
            "mdf_data_class":
            "UV/VIS",
            "cite_as": [
                "Keller-Rudek, H. M.-P. I. for C. M. G., Moortgat, G. K. M.-P. I. for C. M. G., Sander, R. M.-P. I. for C. M. G., & Sörensen, R. M.-P. I. for C. M. G. (2013). The MPI-Mainz UV/VIS Spectral Atlas of Gaseous Molecules [Data set]. Zenodo. http://doi.org/10.5281/zenodo.6951,"
            ],  # REQ list of strings: Complete citation(s) for this dataset.
            "license":
            "https://creativecommons.org/licenses/by/4.0/",
            "mdf_version":
            "0.1.0",
            "dc.title":
            "The MPI-Mainz UV/VIS Spectral Atlas of Gaseous Molecules",
            "dc.creator":
            "Max-Planck Institute for Chemistry, Mainz, Germany",
            "dc.identifier":
            "https://doi.org/10.5281/zenodo.6951",
            "dc.contributor.author":
            ["Keller-Rudek", "Moortgat, Geert K.", "Sander", "Sörensen"],
            "dc.subject": ["cross sections", "quantum yields"],
            "dc.description":
            "This archive contains a frozen snapshot of all cross section and quantum yield data files from the MPI-Mainz UV/VIS Spectral Atlas of Gaseous Molecules.",  # RCM string: Description of dataset contents
            #            "dc.relatedidentifier": ,
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".txt"),
                          desc="Processing files",
                          disable=not verbose):
        with open(os.path.join(data_file["path"], data_file["filename"]),
                  'r',
                  errors='ignore') as raw_in:
            record = raw_in.read()
        #Get the composition
        in1 = data_file["filename"].find("_")
        comp = data_file["filename"][:in1]
        #Get the temperature
        later = data_file["filename"][in1 + 1:]
        second = later.find("_")
        last = later[second + 1:]
        third = last.find("_")
        temp = last[:third - 1]
        uri = "https://data.materialsdatafacility.org/collections/" + "mpi_mainz/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition": comp,

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "mpi_mainz - " + data_file["filename"],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": record,
                #                "files": ,
                "temperature": {
                    "value": temp,
                    "unit": "K"
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 15

0

Exibir arquivo

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://khazana.uconn.edu/polymer_genome/index.php",
        "acl": ["public"],
        "mdf_source_name":
        "khazana_polymer",
        "mdf-publish.publication.collection":
        "Khazana",
        "mdf_data_class":
        "cif",
        "cite_as": [
            "T. D. Huan, A. Mannodi-Kanakkithodi, C. Kim, V. Sharma, G. Pilania, R. Ramprasad\nA polymer dataset for accelerated property prediction and design Sci. Data, 3, 160012 (2016).",
            "A. Mannodi-Kanakkithodi, G. M. Treich, T. D. Huan, R. Ma, M. Tefferi, Y. Cao, G A. Sotzing, R. Ramprasad\nRational Co-Design of Polymer Dielectrics for Energy Storage Adv. Mater., 28, 6277 (2016).",
            "T. D. Huan, A. Mannodi-Kanakkithodi, R. Ramprasad\nAccelerated materials property predictions and design using motif-based fingerprints Phys. Rev. B, 92, 014106 (2015).",
            "A. Mannodi-Kanakkithodi, G. Pilania, T. D. Huan, T. Lookman, R. Ramprasad\nMachine learning strategy for accelerated design of polymer dielectrics Sci. Rep., 6, 20952 (2016)."
        ],
        "dc.title":
        "Khazana (Polymer)",
        "dc.creator":
        "University of Connecticut",
        "dc.identifier":
        "http://khazana.uconn.edu",
        #        "dc.contributor.author": ,
        "dc.subject": ["polymer"]
        #        "dc.description": ,
        #        "dc.relatedidentifier": ,
        #        "dc.year":
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(input_path, "\.cif$"),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="cif",
                              verbose=False)

        uri = "http://khazana.uconn.edu/module_search/material_detail.php?id=" + dir_data[
            "filename"].replace(".cif", "")
        record_metadata = {
            "globus_subject": uri,
            "acl": ["pubilc"],
            "mdf-publish.publication.collection": "Khazana",
            "mdf-base.material_composition": file_data["chemical_formula"],
            "dc.title": "Khazana Polymer - " + file_data["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": str(file_data),
                "files": {
                    "cif": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 16

0

Exibir arquivo

def convert(input_path, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://kinetics.nist.gov/janaf/",
        "acl": ["public"],
        "mdf_source_name":
        "nist_janaf",
        "mdf-publish.publication.collection":
        "NIST-JANAF",
        "cite_as": [
            "M. W. Chase, Jr., JANAF Thermochemical Tables Third Edition, J. Phys. Chem. Ref. Data, Vol. 14, Suppl. 1, 1985."
        ],
        "dc.title":
        "NIST-JANAF Thermochemical Tables",
        "dc.creator":
        "NIST",
        "dc.identifier":
        "http://kinetics.nist.gov/janaf/",
        "dc.contributor.author": [
            "M.W. Chase, Jr.", "C.A. Davies", "J.R. Downey, Jr.",
            "D.J. Frurip", "R.A. McDonald", "A.N. Syverud"
        ],
        #        "dc.subject": ,
        "dc.description":
        "DISCLAIMER: NIST uses its best efforts to deliver a high quality copy of the Database and to verify that the data contained therein have been selected on the basis of sound scientific judgement. However, NIST makes no warranties to that effect, and NIST shall not be liable for any damage that may result from errors or omissions in the Database.",
        #        "dc.relatedidentifier": ,
        "dc.year":
        1985
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for entry in tqdm(find_files(input_path, ".*[0-9]\.json$"),
                      desc="Processing data",
                      disable=not verbose):
        with open(os.path.join(entry["path"], entry["filename"])) as in_file:
            data = load(in_file)

        record_metadata = {
            "globus_subject":
            "http://kinetics.nist.gov/janaf/" + entry["filename"],
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "NIST-JANAF",
            #            "mdf_data_class": ,
            "mdf-base.material_composition":
            data['identifiers']['molecular formula'],

            #            "cite_as": ,
            "dc.title":
            "NIST-JANAF - " + data['identifiers']['chemical formula'] + " " +
            data['identifiers']['state'],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                #                "files":
                'state':
                "".join([
                    data["state definitions"][st] + ", "
                    for st in data['identifiers']['state'].split(",")
                ])
                #                'cas': data['identifiers']['cas registry number']
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 17

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            #            "globus_subject": ,                      # REQ string: Unique value (should be URI if possible)
            #            "acl": ,                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            #            "mdf_source_name": ,                     # REQ string: Unique name for dataset
            #            "mdf-publish.publication.collection": ,  # RCM string: Collection the dataset belongs to

            #            "cite_as": ,                             # REQ list of strings: Complete citation(s) for this dataset.
            #            "license": ,                             # RCM string: License to use the dataset (preferrably a link to the actual license).

            #            "dc.title": ,                            # REQ string: Title of dataset
            #            "dc.creator": ,                          # REQ string: Owner of dataset
            #            "dc.identifier": ,                       # REQ string: Link to dataset (dataset DOI if available)
            #            "dc.contributor.author": ,               # RCM list of strings: Author(s) of dataset
            #            "dc.subject": ,                          # RCM list of strings: Keywords about dataset
            #            "dc.description": ,                      # RCM string: Description of dataset contents
            #            "dc.relatedidentifier": ,                # RCM list of strings: Link(s) to related materials (such as an article)
            #            "dc.year":                               # RCM integer: Year of dataset creation
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(file_data["path"], file_data["filename"]), "vasp")
            if not record:
                raise ValueError("No data returned")
        except Exception as e:
            continue
        record_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/collections/" +
            file_data["no_root_path"] + "/" + file_data[
                "filename"],  # REQ string: Unique value (should be URI to record if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            "mdf-publish.publication.collection":
            "Ti-O MEAM Model",  # RCM string: Collection the record belongs to
            #            "mdf_data_class": "vasp",                      # RCM string: Type of data in record
            "mdf-base.material_composition":
            record["frames"][0]
            ["chemical_formula"],  # RCM string: Chemical composition of material in record

            #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
            #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).
            "dc.title":
            "Ti-O MEAM Model - " + record["frames"][0]
            ["chemical_formula"],  # REQ string: Title of record
            #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
            "dc.identifier":
            "https://data.materialsdatafacility.org/collections/" +
            file_data["no_root_path"] + "/" + file_data[
                "filename"],  # RCM string: Link to record (record webpage, if available)
            #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #            "dc.subject": ,                          # OPT list of strings: Keywords about record
            #            "dc.description": ,                      # OPT string: Description of record
            #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)
            "data": {  # REQ dictionary: Other record data (described below)
                #                "raw": ,                             # RCM string: Original data record text, if feasible
                "files": {
                    "outcar":
                    "https://data.materialsdatafacility.org/collections/" +
                    file_data["no_root_path"] + "/" + file_data["filename"]
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 18

0

Exibir arquivo

def convert(input_path, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    dataset_metadata = {
        "globus_subject": "http://hdl.handle.net/11256/782",                      # REQ string: Unique value (should be URI if possible)
        "acl": ["public"],                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
        "mdf_source_name": "ti_o_fitting_db",                     # REQ string: Unique name for dataset
        "mdf-publish.publication.collection": "Ti-O Fitting Database",  # RCM string: Collection the dataset belongs to
        "mdf_data_class": "vasp",                      # RCM string: Type of data in record

        "cite_as": ["Trinkle, Dallas R.; Zhang, Pinchao Fitting database entries for a modified embedded atom method potential for interstitial oxygen in titanium (2016-07-25) http://hdl.handle.net/11256/782"],
        "license": "http://creativecommons.org/licenses/by/3.0/us/",                             # RCM string: License to use the dataset (preferrably a link to the actual license).

        "dc.title": "Fitting database entries for a modified embedded atom method potential for interstitial oxygen in titanium",                            # REQ string: Title of dataset
        "dc.creator": "University of Illinois, Urbana-Champaign",                          # REQ string: Owner of dataset
        "dc.identifier": "http://hdl.handle.net/11256/782",                       # REQ string: Link to dataset (dataset DOI if available)
        "dc.contributor.author": ["Trinkle, Dallas R", "Zhang, Pinchao"],               # RCM list of strings: Author(s) of dataset
#        "dc.subject": ,                          # RCM list of strings: Keywords about dataset
#        "dc.description": ,                      # RCM string: Description of dataset contents
#        "dc.relatedidentifier": ,                # RCM list of strings: Link(s) to related materials (such as an article)
        "dc.year": 2016                              # RCM integer: Year of dataset creation
        }


    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]

        try:
            record_metadata = {
                "globus_subject": uri,                      # REQ string: Unique value (should be URI to record if possible)
                "acl": ["public"],                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
                "mdf-publish.publication.collection": "Ti-O Fitting Database",  # RCM string: Collection the record belongs to
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],       # RCM string: Chemical composition of material in record

    #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
    #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).

                "dc.title": "Ti-O Fitting Database - " + data["frames"][0]["chemical_formula"],                            # REQ string: Title of record
    #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
                 "dc.identifier": uri,                       # RCM string: Link to record (record webpage, if available)
    #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
    #            "dc.subject": ,                          # OPT list of strings: Keywords about record
    #            "dc.description": ,                      # OPT string: Description of record
    #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
    #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)

                "data": {                                # REQ dictionary: Other record data (described below)
    #                "raw": ,                             # RCM string: Original data record text, if feasible
                    "files": {"outcar": uri}                            # RCM dictionary: {file_type : uri_to_file} pairs, data files (Example: {"cif" : "https://example.org/cifs/data_file.cif"})
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"] + "/" + data_file["filename"])


    # TODO: Save your converter as [mdf_source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 19

0

Exibir arquivo

Arquivo: autovasp_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "^OUTCAR$"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition":
            data["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            dataset_metadata["dc.title"] + " - " +
            data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier":
            uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 20

0

Exibir arquivo

Arquivo: oxygen_interstital_deformation_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/272",
            "acl": ["public"],
            "mdf_source_name": "oxygen_interstitials_deformation",
            "mdf-publish.publication.collection": "Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "mdf_data_class": "vasp",

            "cite_as": ["Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium, Acta Materialia v. 105 (2016), pp. 44 - 51 http://dx.doi.org/10.1016/j.actamat.2015.12.019"],
            "license": "http://creativecommons.org/licenses/by/3.0/us/",

            "dc.title": "Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "dc.creator": "University of Maryland",
            "dc.identifier": "http://hdl.handle.net/11256/272",
            "dc.contributor.author": ["Joost, William J.", "Ankem, Sreeramamurthy", "Kuklja, Maija M."],
#            "dc.subject": ,
#            "dc.description": ,
            "dc.relatedidentifier": ["http://dx.doi.org/10.1016/j.actamat.2015.12.019"],
            "dc.year": 2016
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "Oxygen Interstitials and Deformation Twins - " + data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": ,
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 21

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://hdl.handle.net/11256/671",
            "acl": ["public"],
            "mdf_source_name":
            "trinkle_elastic_fe_bcc",
            "mdf-publish.publication.collection":
            "Elastic Fe BCC",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Comp. Mat. Sci. 126, 503 (2017).M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Data in Brief 10, 147 (2017)."
            ],
            "license":
            "http://creativecommons.org/publicdomain/zero/1.0/",
            "dc.title":
            "Ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes",
            "dc.creator":
            "University of Illinois, General Motors",
            "dc.identifier":
            "http://hdl.handle.net/11256/671",
            "dc.contributor.author":
            ["M. R. Fellinger", "L. G. Hector Jr.", "D. R. Trinkle"],
            #            "dc.subject": ,
            #            "dc.description": ,
            "dc.relatedidentifier": [
                "http://dx.doi.org/10.1016/j.commatsci.2016.09.040",
                "http://dx.doi.org/10.1016/j.dib.2016.11.092"
            ],
            "dc.year":
            2017
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Elastic Fe BCC",
            "mdf-base.material_composition":
            data["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            "Elastic BCC - " + data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 22

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://hdl.handle.net/11256/836",
            "acl": ["public"],
            "mdf_source_name":
            "fe_cr_al_oxidation",
            "mdf-publish.publication.collection":
            "Fe-Cr-Al Oxidation Studies",
            #            "mdf_data_class": ,
            "cite_as": [
                "Bunn, Jonathan K.; Fang, Randy L.; Albing, Mark R.; Mehta, Apurva; Kramer, Matt J.; Besser, Matt F.; Hattrick-Simpers, Jason R High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies (2015-06-28)"
            ],
            "license":
            "http://creativecommons.org/licenses/by-sa/3.0/us/",
            "dc.title":
            "High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies",
            "dc.creator":
            "University of South Carolina, SLAC National Accelerator Laboratory, Iowa State University",
            "dc.identifier":
            "http://hdl.handle.net/11256/836",
            "dc.contributor.author": [
                "Bunn, Jonathan K.", "Fang, Randy L.", "Albing, Mark R.",
                "Mehta, Apurva", "Kramer, Matt J.", "Besser, Matt F.",
                "Hattrick-Simpers, Jason R"
            ],
            #            "dc.subject": ,
            "dc.description":
            "The data set was used to evaluate a Fe-Cr-Al thin film samples in a narrow composition region centered on known bulk compositions. The data are composed of two individual studies. The first set of data is a low temperature oxidation study on composition spread sampled performed at SLAC Beamline 1-5. Only the integrated and background subtracted 1-D spectra are included, the 2-D data and calibrations are available upon request. The second set of data was taken during high temperature oxidation of selected samples. These data are exclusively Raman data with values taken as a function of total oxidation time.",
            "dc.relatedidentifier": [
                "http://iopscience.iop.org/article/10.1088/0957-4484/26/27/274003/meta",
                "http://dx.doi.org/10.1088/0957-4484/26/27/274003"
            ],
            "dc.year":
            2015
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    with open(
            os.path.join(
                input_path, "Fe_Cr_Al_data",
                "Point Number to Composition.csv")) as composition_file:
        composition_list = list(parse_tab(composition_file.read()))
        compositions = {}
        for comp in composition_list:
            compositions[int(comp.pop("Sample Number"))] = comp
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".txt"),
                          desc="Processing files",
                          disable=not verbose):
        link = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        temp_k = data_file["filename"].split(" ")[0]
        point_num = int(data_file["filename"].replace(
            "_", " ").split(" ")[-1].split(".")[0])
        record_metadata = {
            "globus_subject":
            link,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition":
            "FeCrAl",

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            "Fe-Cr-Al Oxidation - " + data_file["filename"].split(".")[0],
            #            "dc.creator": ,
            "dc.identifier":
            link,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "csv": link
                },
                "temperature_k": temp_k,
                "atomic_composition_percent": {
                    "Fe": compositions[point_num]["Fe at. %"],
                    "Cr": compositions[point_num]["Cr at. %"],
                    "Al": compositions[point_num]["Al at. %"]
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 23

0

Exibir arquivo

def convert(input_path, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "https://www.ctcms.nist.gov/potentials/",
        "acl": ["public"],
        "mdf_source_name":
        "nist_ip",
        "mdf-publish.publication.collection":
        "NIST Interatomic Potentials",
        "cite_as": [
            'C.A. Becker, et al., "Considerations for choosing and using force fields and interatomic potentials in materials science and engineering," Current Opinion in Solid State and Materials Science, 17, 277-283 (2013). https://www.ctcms.nist.gov/potentials'
        ],
        #        "license": ,
        "dc.title":
        "NIST Interatomic Potentials Repository Project",
        "dc.creator":
        "National Institute of Standards and Technology",
        "dc.identifier":
        "https://www.ctcms.nist.gov/potentials/",
        "dc.contributor.author": ["C.A. Becker, et al."],
        "dc.subject": ["interatomic potential", "forcefield"],
        "dc.description":
        "This repository provides a source for interatomic potentials (force fields), related files, and evaluation tools to help researchers obtain interatomic models and judge their quality and applicability.",
        #        "dc.relatedidentifier": ,
        "dc.year":
        2013
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, "\.json$"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            with open(os.path.join(file_data["path"], file_data["filename"]),
                      'r') as ip_file:
                ip_data = json.load(ip_file)["interatomic-potential"]
            if not ip_data:
                raise ValueError("No data in file")
        except Exception as e:
            if verbose:
                print("Error reading '" +
                      os.path.join(file_data["path"], file_data["filename"]) +
                      "'")
            continue
        url_list = []
        link_texts = []
        for artifact in ip_data["implementation"]:
            for web_link in artifact["artifact"]:
                url = web_link.get("web-link", {}).get("URL", None)
                if url:
                    url_list.append(url)
                link_text = web_link.get("web-link", {}).get("link-text", None)
                if link_text:
                    link_texts.append(link_text)

        record_metadata = {
            "globus_subject": ip_data["id"],
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "NIST Interatomic Potentials",
            #            "mdf_data_class": ,
            #            "mdf-base.material_composition": "".join(ip_data["element"]),

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            "NIST Interatomic Potential - " + ", ".join(link_texts),
            #            "dc.creator": ,
            "dc.identifier": ip_data["id"],
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": "; ".join(ip_data["description"]["notes"]),
            #            "dc.relatedidentifier": url_list,
            #            "dc.year": ,
            "data": {
                "raw": json.dumps(ip_data),
                #                "files": ,
            }
        }
        if ip_data["element"]:
            record_metadata["mdf-base.material_composition"] = "".join(
                ip_data["element"])
        if url_list:
            record_metadata["dc.relatedidentifier"] = url_list

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 24

0

Exibir arquivo

Arquivo: sluschi_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://blogs.brown.edu/qhong/?page_id=102",
        "acl": ["public"],
        "mdf_source_name":
        "sluschi",
        "mdf-publish.publication.collection":
        "SLUSCHI",
        "mdf_data_class":
        "vasp",
        "cite_as": [
            "Qi-Jun Hong, Axel van de Walle, A user guide for SLUSCHI: Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces, Calphad, Volume 52, March 2016, Pages 88-97, ISSN 0364-5916, http://doi.org/10.1016/j.calphad.2015.12.003."
        ],
        "dc.title":
        "Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces",
        "dc.creator":
        "Brown University",
        "dc.identifier":
        "http://doi.org/10.1016/j.calphad.2015.12.003",
        "dc.contributor.author": ["Qi-Jun Hong", "Axel van de Walle"],
        "dc.subject": [
            "Melting temperature calculation", "Density functional theory",
            "Automated code"
        ],
        "dc.description":
        "Although various approaches for melting point calculations from first principles have been proposed and employed for years, their practical implementation has hitherto remained a complex and time-consuming process. The SLUSCHI code (Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces) drastically simplifies this procedure into an automated package, by implementing the recently-developed small-size coexistence method and putting together a series of steps that lead to final melting point evaluation. Based on density functional theory, SLUSCHI employs Born–Oppenheimer molecular dynamics techniques under the isobaric–isothermal (NPT) ensemble, with interface to the first-principles code VASP.",
        "dc.relatedidentifier": ["http://blogs.brown.edu/qhong/?page_id=102"],
        "dc.year":
        2015
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path, file_pattern="^OUTCAR$"),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="vasp",
                              verbose=False)

        # If no data, skip record
        if not file_data or not file_data["frames"]:
            continue

        uri = "globus:sluschi/" + dir_data["no_root_path"] + "/" + dir_data[
            "filename"]
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "SLUSCHI",
            "mdf-base.material_composition":
            file_data["frames"][0]["chemical_formula"],
            "dc.title":
            "SLUSCHI - " + file_data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": str(file_data),
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 25

0

Exibir arquivo

Arquivo: nist_mml_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://materialsdata.nist.gov/",
            "acl": ["public"],
            "mdf_source_name": "nist_mml",
            "mdf-publish.publication.collection":
            "NIST Material Measurement Laboratory",
            #            "mdf_data_class": ,
            "cite_as": ["http://materialsdata.nist.gov/"],
            #            "license": ,
            "dc.title": "NIST Material Measurement Laboratory Data Repository",
            "dc.creator": "National Institute of Standards and Technology",
            "dc.identifier": "http://materialsdata.nist.gov/",
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            "dc.description":
            "The laboratory supports the NIST mission by serving as the national reference laboratory for measurements in the chemical, biological and material sciences.",
            #            "dc.relatedidentifier": ,
            "dc.year": 2013
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata)
# You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    for dir_data in tqdm(find_files(input_path,
                                    file_pattern="json",
                                    verbose=verbose),
                         desc="Processing metadata",
                         disable=not verbose):
        with open(os.path.join(dir_data["path"],
                               dir_data["filename"])) as file_data:
            full_record = json.load(file_data)
        nist_data = {}
        # Collapse XML-style metadata into JSON and collect duplicates in lists
        for meta_dict in full_record:
            if not nist_data.get(meta_dict["key"],
                                 None):  #No previous value, copy data
                nist_data[meta_dict["key"]] = meta_dict["value"]
            else:  #Has value already
                new_list = []
                if type(
                        nist_data[meta_dict["key"]]
                ) is not list:  #If previous value is not a list, add the single item
                    new_list.append(nist_data[meta_dict["key"]])
                else:  #Previous value is a list
                    new_list += nist_data[meta_dict["key"]]
                #Now add new element and save
                new_list.append(meta_dict["value"])
                nist_data[meta_dict["key"]] = new_list

        uri = nist_data["dc.identifier.uri"][0] if type(
            nist_data.get("dc.identifier.uri",
                          None)) is list else nist_data.get(
                              "dc.identifier.uri", None)
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            #            "mdf-base.material_composition": ,

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            nist_data["dc.title"][0]
            if type(nist_data.get("dc.title", None)) is list else
            nist_data.get("dc.title"),
            #            "dc.creator": ,
            "dc.identifier":
            uri,
            #            "dc.contributor.author": [nist_data["dc.contributor.author"]] if type(nist_data.get("dc.contributor.author", None)) is str else nist_data.get("dc.contributor.author", None),
            #            "dc.subject": [nist_data["dc.subject"]] if type(nist_data.get("dc.subject", None)) is str else nist_data.get("dc.subject", None),
            #            "dc.description": str(nist_data.get("dc.description.abstract", None)) if nist_data.get("dc.description.abstract", None) else None,
            #            "dc.relatedidentifier": [nist_data["dc.relation.uri"]] if type(nist_data.get("dc.relation.uri", None)) is str else nist_data.get("dc.relation.uri", None)
            #            "dc.year": int(nist_data["dc.date.issued"][:4])

            #            "data": {
            #                "raw": ,
            #                "files": ,
            #                }
        }
        if nist_data.get("dc.contributor.author", None):
            if type(nist_data["dc.contributor.author"]) is not list:
                record_metadata["dc.contributor.author"] = [
                    nist_data["dc.contributor.author"]
                ]
            else:
                record_metadata["dc.contributor.author"] = nist_data[
                    "dc.contributor.author"]
        if nist_data.get("dc.subject", None):
            if type(nist_data["dc.subject"]) is not list:
                record_metadata["dc.subject"] = [nist_data["dc.subject"]]
            else:
                record_metadata["dc.subject"] = nist_data["dc.subject"]
        if nist_data.get("dc.description.abstract", None):
            record_metadata["dc.description"] = str(
                nist_data["dc.description.abstract"])
        if nist_data.get("dc.relation.uri", None):
            if type(nist_data["dc.relation.uri"]) is not list:
                record_metadata["dc.relatedidentifier"] = [
                    nist_data["dc.relation.uri"]
                ]
            else:
                record_metadata["dc.relatedidentifier"] = nist_data[
                    "dc.relation.uri"]
        if nist_data.get("dc.date.issued", None):
            record_metadata["dc.year"] = int(nist_data["dc.date.issued"][:4])

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 26

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/88",
            "acl": ["public"],
            "mdf_source_name": "ta_melting",
            "mdf-publish.publication.collection": "Ta Melting",
            "mdf_data_class": "vasp",

            "cite_as": ["Qi-Jun Hong and Axel van de Walle, Solid-liquid coexistence in small systems: A statistical method to calculate melting temperatures, Journal of chemical physics, 139, 094114 (2013). http://dx.doi.org/10.1063/1.4819792"],
            "license": "http://creativecommons.org/licenses/by/3.0/us/",

            "dc.title": "Ta Melting Point Calculation by Small-cell Coexistence Method",
            "dc.creator": "Brown University, Caltech",
            "dc.identifier": "http://hdl.handle.net/11256/88",
            "dc.contributor.author": ["Qi-Jun Hong", "Axel van de Walle"],
#            "dc.subject": ,
            "dc.description": "We calculate the melting temperature of Tantalum, by employing the small-size coexistence solid-liquid coexistence method.",
            "dc.relatedidentifier": ["http://dx.doi.org/10.1063/1.4819792"],
            "dc.year": 2013
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject": uri,
                "acl": ["public"],
                "mdf-publish.publication.collection": "Ta Melting",
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#                "cite_as": ,
#                "license": ,

                "dc.title": "Ta Melting - " + data["frames"][0]["chemical_formula"],
#                "dc.creator": ,
                "dc.identifier": uri,
#                "dc.contributor.author": ,
#                "dc.subject": ,
#                "dc.description": ,
#                "dc.relatedidentifier": ,
#                "dc.year": ,

                "data": {
#                    "raw": ,
                    "files": {"outcar": uri}
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"])

    if verbose:
        print("Finished converting")

Exemplo n.º 27

0

Exibir arquivo

Arquivo: monoborides_dft_converter.py Projeto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://dx.doi.org/doi:10.18126/M24S3J",
            "acl": ["public"],
            "mdf_source_name": "monoborides_dft",
            "mdf-publish.publication.collection": "Monoborides DFT",
            "mdf_data_class": "vasp",

            "cite_as": ["Kim, Hyojung; Trinkle, Dallas R., \"Mechanical Properties and Phase Stability of Monoborides using Density Functional Theory Calculations,\" 2017, http://dx.doi.org/doi:10.18126/M24S3J"],
#            "license": "",

            "dc.title": "Mechanical Properties and Phase Stability of Monoborides using Density Functional Theory Calculations",
            "dc.creator": "University of Illinois at Urbana-Champaign",
            "dc.identifier": "http://dx.doi.org/doi:10.18126/M24S3J",
            "dc.contributor.author": ["Kim, Hyojung", "Trinkle, Dallas R."],
            "dc.subject": ["ab-initio", "special quasirandom structure", "DFT", "polycrystalline mechanical properties", "stacking fault energy", "solubility limit", "monoboride", "B27 structure", "Bf structure", "Vegard's law"],
            "dc.description": "This data demonstrates the Ti-monoborides with improved polycrystalline elastic properties such as Young's modulus and Pugh's ratio, and stacking fault energies. The lattice parameters, total energies and elastic constants of monoborides are computed using density functional theory",
#            "dc.relatedidentifier": [],
            "dc.year": 2017
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/published/publication_232/" + data_file["no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
#            "mdf-publish.publication.collection": ,
            "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "Monoborides DFT - " + data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": ,
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Exemplo n.º 28

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "https://srdata.nist.gov/xps/Default.aspx",
            "acl": ["public"],
            "mdf_source_name": "nist_xps_db",
            "mdf-publish.publication.collection": "NIST XPS DB",
#            "mdf_data_class": ,

            "cite_as": ["©2012 copyright by the U.S. Secretary of Commerce on behalf of the United States of America. All rights reserved."],
#            "license": ,

            "dc.title": "NIST X-ray Photoelectron Spectroscopy Database",
            "dc.creator": "NIST",
            "dc.identifier": "https://srdata.nist.gov/xps/Default.aspx",
            "dc.contributor.author": ["Alexander V. Naumkin", "Anna Kraut-Vass", "Stephen W. Gaarenstroom", "Cedric J. Powell"],
#            "dc.subject": ,
            "dc.description": "NIST Standard Reference Database 20",
#            "dc.relatedidentifier": ,
            "dc.year": 2000
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)


    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, ".json"), desc="Processing files", disable= not verbose):
        with open(os.path.join(file_data["path"], file_data["filename"]), "r") as in_file:
            record = json.load(in_file)
        id_num = file_data["filename"].rsplit("_", 1)[1].split(".", 1)[0]
        link = "https://srdata.nist.gov/xps/XPSDetailPage.aspx?AllDataNo=" + id_num
        record_metadata = {
            "globus_subject": link,
            "acl": ["public"],
#            "mdf-publish.publication.collection": ,
#            "mdf_data_class": ,
            "mdf-base.material_composition": record["Formula"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "NIST XPS DB - " + record["Name"],
#            "dc.creator": ,
            "dc.identifier": link,
#            "dc.contributor.author": record["Author Name(s)"].split(","),
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
                "raw": json.dumps(record)
#                "cas_number": record["CAS Registry No"]
#                "files": 
                }
            }
        if record["Citation"]:
            record_metadata["cite_as"] = record["Citation"]
        if record["Author Name(s)"]:
            record_metadata["dc.contributor.author"] = record["Author Name(s)"].split(",")
        if record["Notes"]:
            record_metadata["dc.description"] = record["Notes"]

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    if verbose:
        print("Finished converting")

Exemplo n.º 29

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html#h2o-13",
            "acl": ["public"],
            "mdf_source_name":
            "h2o-13",
            "mdf-publish.publication.collection":
            "h2o-13",
            "mdf_data_class":
            "xyz",
            "cite_as": [
                "Albert P. Bartók, Michael J. Gillan, Frederick R. Manby, Gábor Csányi: Machine-learning approach for one- and two-body corrections to density functional theory: Applications to molecular and condensed water, Physical Review B 88(5): 054104, 2013. http://dx.doi.org/10.1103/PhysRevB.88.054104"
            ],
            #            "license": ,
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Machine-learning approach for one- and two-body corrections to density functional theory: Applications to molecular and condensed water",
            "dc.creator":
            "University of Cambridge, University College London, University of Bristol",
            "dc.identifier":
            "http://qmml.org/datasets.html#h2o-13",
            "dc.contributor.author": [
                "Albert P. Bartók", "Michael J. Gillan", "Frederick R. Manby",
                "Gábor Csányi"
            ],
            #            "dc.subject": ,
            "dc.description":
            "Water monomer and dimer geometries, with calculations at DFT, MP2 and CCSD(T) level of theory. 7k water monomer geometries corresponding to a grid, with energies and forces at DFT / BLYP, PBE, PBE0 with AV5Z basis set",
            "dc.relatedidentifier":
            ["https://doi.org/10.1103/PhysRevB.88.054104"],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]))
        uri = "https://data.materialsdatafacility.org/collections/" + "h2o-13/split_xyz_files/" + data_file[
            "no_root_path"] + '/' + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition": record["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "H2o-13 - " + data_file["filename"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "xyz": uri
                },
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemplo n.º 30

0

Exibir arquivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",
            "acl": ["public"],
            "mdf_source_name":
            "trinkle_mg_x_diffusion",
            "mdf-publish.publication.collection":
            "Mg-X Diffusion Dataset",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Citation for dataset Mg-X-Diffusion with author(s): Dallas Trinkle, Ravi Agarwal"
            ],
            #            "license": "",
            "dc.title":
            "Mg-X-Diffusion",
            "dc.creator":
            "University of Illinois at Urbana-Champaign",
            "dc.identifier":
            "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",
            "dc.contributor.author": ["Trinkle, Dallas", "Agarwal, Ravi"],
            #"dc.subject": [],
            #"dc.description": "",
            #            "dc.relatedidentifier": [],
            "dc.year":
            2017
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + "mg-x/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf-base.material_composition": record["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "Mg-X Diffusions - ",
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }
        try:
            record_metadata["mdf-base.material_composition"] = record[
                "frames"][0]["chemical_formula"]
            record_metadata[
                "dc.title"] += " " + record["frames"][0]["chemical_formula"]
        except:
            #parse_ase unable to read composition of record 1386: https://data.materialsdatafacility.org/collections/mg-x/Elements/Eu/Mg-X_Eu/OUTCAR
            #Placing in the correct material composition
            record_metadata["mdf-base.material_composition"] = "EuMg149"
            record_metadata["dc.title"] += "EuMg149"

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")