Python Validator.cancel_validation примеры использования

Язык программирования: Python

Пространство имен/Пакет: mdf_refinery.validator

Класс/Тип: Validator

Метод/Функция: cancel_validation

Примеров на hotexamples.com: 30

Python Validator.cancel_validation - 30 примеров найдено. Это лучшие примеры Python кода для mdf_refinery.validator.Validator.cancel_validation, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Validator(30)

cancel_validation(30)

write_record(30)

Пример #1

Показать файл

Файл: exchange_correlation_forces_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Non-self-consistent Density-Functional Theory Exchange-Correlation Forces for GGA Functionals",
                "acl": ["public"],
                "source_name":
                "exchange_correlation_forces",
                "data_contact": {
                    "given_name": "Antonio S.",
                    "family_name": "Torralba",
                    "email": "*****@*****.**",
                    "institution": "London Centre for Nanotechnology",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Torralba, Antonio S.; Bowler, David R.; Miyazaki, Tsuyoshi; Gillan, Michael J. (2009): Non-self-consistent Density-Functional Theory Exchange-Correlation Forces for GGA Functionals. ACS Publications. https://doi.org/10.1021/ct8005425.s001"
                ],
                "author": [{
                    "given_name": "Michael J.",
                    "family_name": "Gillan",
                    "institution": "London Centre for Nanotechnology",
                }, {
                    "given_name":
                    "Tsuyoshi",
                    "family_name":
                    "Miyazaki",
                    "institution":
                    "National Institute for Materials Science",
                }, {
                    "given_name": "David R.",
                    "family_name": "Bowler",
                    "institution": "London Centre for Nanotechnology",
                }, {
                    "given_name": "Antonio S.",
                    "family_name": "Torralba",
                    "email": "*****@*****.**",
                    "institution": "London Centre for Nanotechnology",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "DFT Exchange-Correlation Forces",
                "tags": [
                    "DFT", "relaxation", "DFT code Conquest",
                    "Example calculations", "alanine peptides",
                    "computer effort", "method", "gradient approximation",
                    "GGA FunctionalsWhen", "expression", "GGA NSC force"
                ],
                "description":
                "When using density functional theory (DFT), generalized gradient approximation (GGA) functionals are often necessary for accurate modeling of important properties of biomolecules, including hydrogen-bond strengths and relative energies of conformers. We consider the calculations of forces using non-self-consistent (NSC) methods based on the Harris−Foulkes expression for energy. We derive an expression for the GGA NSC force on atoms, valid for a hierarchy of methods based on local orbitals, and discuss its implementation in the linear scaling DFT code Conquest, using a standard (White−Bird) approach. We investigate the use of NSC structural relaxations before full self-consistent relaxations as a method for improving convergence.",
                "year":
                2009,
                "links": {
                    "landing_page":
                    "https://figshare.com/articles/Non_self_consistent_Density_Functional_Theory_Exchange_Correlation_Forces_for_GGA_Functionals/2851375",
                    "publication":
                    ["http://pubs.acs.org/doi/abs/10.1021/ct8005425"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",
                        "path": "/files/4549114",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Exchange Correlation Forces - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/exchange_correlation_forces/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #2

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption",
                "acl": ["public"],
                "source_name":
                "ru_pt_complexes",
                "data_contact": {
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Shi, Pengfei; Coe, Benjamin J.; Sánchez, Sergio; Wang, Daqi; Tian, Yupeng; Nyk, Marcin; Samoc, Marek (2015): Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b02089 Retrieved: 15:54, Jul 27, 2017 (GMT)"
                ],
                "author": [{
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                }, {
                    "given_name": "Benjamin J.",
                    "family_name": "Coe",
                    "email": "*****@*****.**",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Sergio",
                    "family_name": "Sánchez",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Daqi",
                    "family_name": "Wang",
                    "institution": "Liaocheng University",
                }, {
                    "given_name": "Yupeng",
                    "family_name": "Tian",
                    "institution": "Anhui University",
                }, {
                    "given_name": "Marcin",
                    "family_name": "Nyk",
                    "institution": "Wrocław University of Technology",
                }, {
                    "given_name": "Marek",
                    "family_name": "Samoc",
                    "institution": "Wrocław University of Technology",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "Ru Pt Heteropolymetallic Complexes",
                "tags": [
                    "Heteropolymetallic Complexes", "850 nm", "834 nm",
                    "polymetallic species", "Pt coordination",
                    "spectra change", "moietie", "qpy", "MLCT",
                    "2 PA activities", "complex", "301 GM", "PtII", "RuII",
                    "523 GM", "heptanuclear RuPt 6", "absorption bands"
                ],
                "description":
                "New trinuclear RuPt2 and heptanuclear RuPt6 complex salts are prepared by attaching PtII 2,2′:6′,2″-terpyridine (tpy) moieties to RuII 4,4′:2′,2″:4″,4‴-quaterpyridine (qpy) complexes. Characterization includes single crystal X-ray structures for both polymetallic species. The visible absorption bands are primarily due to RuII → qpy metal-to-ligand charge-transfer (MLCT) transitions, according to time-dependent density functional theory (TD-DFT) calculations. These spectra change only slightly on Pt coordination, while the orange-red emission from the complexes shows corresponding small red-shifts, accompanied by decreases in intensity. Cubic molecular nonlinear optical behavior has been assessed by using Z-scan measurements. These reveal relatively high two-photon absorption (2PA) cross sections σ2, with maximal values of 301 GM at 834 nm (RuPt2) and 523 GM at 850 nm (RuPt6) when dissolved in methanol or acetone, respectively. Attaching PtII(tpy) moieties triples or quadruples the 2PA activities when compared with the RuII-based cores.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://figshare.com/collections/Uniting_Ruthenium_II_and_Platinum_II_Polypyridine_Centers_in_Heteropolymetallic_Complexes_Giving_Strong_Two_Photon_Absorption/2204182",
                    "publication":
                    ["https://doi.org/10.1021/acs.inorgchem.5b02089"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ru Pt Heteropolymetallic Complexes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ru_pt_complexes/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #3

Показать файл

Файл: halogen_ionic_liquids_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Halogen-Substituted Ionic Liquids",
                "acl": ["public"],
                "source_name": "halogen_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "University of Rochester",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Chaban, Vitaly V. (2016). Halogen-Substituted Ionic Liquids [Data set]. Zenodo. http://doi.org/10.5281/zenodo.165493"],

                "author": [{

                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "Universidade Federal de São Paulo",

                }],

                "license": "https://creativecommons.org/licenses/by/4.0/",
                "collection": "Halogen Substituted Ionic Liquids",
                #"tags": [""],
                "description": "Pre-equilibrated systems for different size for AIMD for Halogen-Substituted Ionic Liquids.",
                "year": 2016,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.165493",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Halogen Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/halogen_ionic_liquids/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #4

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Prediction of Compounds in Different Local SAR Environments using ECP",
                "acl": ["public"],
                "source_name":
                "ecp_sar_environments",
                "data_contact": {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Namasivayam, V., Gupta-Ostermann, D., Balfer, J., Heikamp, K., & Bajorath, J. (2014). Prediction of Compounds in Different Local SAR Environments using ECP [Data set]. Zenodo. http://doi.org/10.5281/zenodo.8626"
                ],
                "author": [{
                    "given_name":
                    "Vigneshwaran",
                    "family_name":
                    "Namasivayam",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Disha",
                    "family_name":
                    "Gupta-Ostermann",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jenny",
                    "family_name":
                    "Balfer",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Kathrin",
                    "family_name":
                    "Heikamp",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "SAR Environments using ECP",
                #"tags": [""],
                "description":
                "Active compounds can participate in different local structure–activity relationship (SAR) environments and introduce different degrees of local SAR discontinuity, depending on their structural and potency relationships in data sets. Such SAR features have thus far mostly been analyzed using descriptive approaches, in particular, on the basis of activity landscape modeling. However, compounds in different local SAR environments have not yet been predicted. Herein, we adapt the emerging chemical patterns (ECP) method, a machine learning approach for compound classification, to systematically predict compounds with different local SAR characteristics. ECP analysis is shown to accurately assign many compounds to different local SAR environments across a variety of activity classes covering the entire range of observed local SARs.",
                "year":
                2014,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.8626",
                    "publication":
                    ["http://pubs.acs.org/doi/abs/10.1021/ci500147b"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/8626/files/Data_sets.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "SAR Environments using ECP - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ecp_sar_environments/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")

Пример #5

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Uncertainty quantification for quantum chemical models of complex reaction networks	",
                "acl": ["public"],
                "source_name": "reiher_quantum_chemical_models",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["(2016). Uncertainty quantification for quantum chemical models of complex reaction networks. , 195, 497-520. 10.1039/C6FD00144K"],

                "author": [{

                    "given_name": "Jonny",
                    "family_name": "Proppe",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Tamara",
                    "family_name": "Husch",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Gregor N.",
                    "family_name": "Simma",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                }],

                "license": "http://creativecommons.org/licenses/by/3.0/",
                "collection": "Reiher Quantum Chemical Models",
                #"tags": [""],
                "description": "For the quantitative understanding of complex chemical reaction mechanisms, it is, in general, necessary to accurately determine the corresponding free energy surface and to solve the resulting continuous-time reaction rate equations for a continuous state space. For a general (complex) reaction network, it is computationally hard to fulfill these two requirements. However, it is possible to approximately address these challenges in a physically consistent way. On the one hand, it may be sufficient to consider approximate free energies if a reliable uncertainty measure can be provided. On the other hand, a highly resolved time evolution may not be necessary to still determine quantitative fluxes in a reaction network if one is interested in specific time scales. In this paper, we present discrete-time kinetic simulations in discrete state space taking free energy uncertainties into account.",
                "year": 2016,

                "links": {

                    "landing_page": "http://pubs.rsc.org/en/content/articlelanding/fd/2016/c6fd00144k#!divAbstract",
                    "publication": ["http://pubs.rsc.org/doi/c6fd90075e"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz$"), desc="Processing files", disable=not verbose):
        if "PaxHeaders" in data_file["path"]:
            continue
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Reiher Quantum Chemical Models - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/reiher_quantum_chemical_models/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #6

Показать файл

Файл: ohmic_si_c_contacts_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC",
                "acl": ["public"],
                "source_name":
                "ohmic_si_c_contacts",
                "data_contact": {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Fashandi, Hossein, Dahlqvist, Martin, Lu, Jun, Palisaitis, Justinas, Simak, Sergei I, Abrikosov, Igor A, … Eklund, Per. (2017). Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC [Data set]. Zenodo. http://doi.org/10.5281/zenodo.376969"
                ],
                "author": [{
                    "given_name":
                    "Hossein",
                    "family_name":
                    "Fashandi",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Martin",
                    "family_name":
                    "Dahlqvist",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Jun",
                    "family_name":
                    "Lu",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Justinas",
                    "family_name":
                    "Palisaitis",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Sergei I",
                    "family_name":
                    "Simak",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Igor A",
                    "family_name":
                    "Abrikosov",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Johanna",
                    "family_name":
                    "Rosen",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Lars",
                    "family_name":
                    "Hultman",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Mike",
                    "family_name":
                    "Andersson",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Anita Lloyd",
                    "family_name":
                    "Spetz",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ohmic Contact to SiC",
                "tags": [
                    "electronic structure calculations", "MAX phase", "XRD",
                    "I/V measurement", "spin-orbit coupling",
                    "density of states", "Nanoscale materials",
                    "Structure of solids and liquids",
                    "Surfaces, interfaces and thin films",
                    "Two-dimensional materials"
                ],
                "description":
                "The large class of layered ceramics encompasses both van der Waals (vdW) and non-vdW solids. While intercalation of noble metals in vdW solids is known, formation of compounds by incorporation of noble-metal layers in non-vdW layered solids is largely unexplored. Here, we show formation of Ti3AuC2 and Ti3Au2C2 phases with up to 31% lattice swelling by a substitutional solid-state reaction of Au into Ti3SiC2 single-crystal thin films with simultaneous out-diffusion of Si. Ti3IrC2 is subsequently produced by a substitution reaction of Ir for Au in Ti3Au2C2. These phases form Ohmic electrical contacts to SiC and remain stable after 1,000 h of ageing at 600 °C in air. The present results, by combined analytical electron microscopy and ab initio calculations, open avenues for processing of noble-metal-containing layered ceramics that have not been synthesized from elemental sources, along with tunable properties such as stable electrical contacts for high-temperature power electronics or gas sensors.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.376969",
                    "publication": [
                        "http://www.nature.com/nmat/journal/v16/n8/full/nmat4896.html"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(OUTCAR|cif$)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "cif":
            ftype = "cif"
        else:
            ftype = "vasp-out"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ohmic Contact to SiC - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ohmic_si_c_contacts/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")

Пример #7

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit",
                "acl": ["public"],
                "source_name":
                "cnmultifit_groel",
                "data_contact": {
                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Lasker, K., Velázquez-Muriel, J. A., Webb, B. M., Yang, Z., Ferrin, T. E., & Sali, A. (2012). Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit [Data set]. Methods Mol Biol. Zenodo. http://doi.org/10.5281/zenodo.46596"
                ],
                "author": [{
                    "given_name":
                    "Keren",
                    "family_name":
                    "Lasker",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Javier A.",
                    "family_name":
                    "Velázquez-Muriel",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Benjamin M.",
                    "family_name":
                    "Webb",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Zheng",
                    "family_name":
                    "Yang",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Thomas E.",
                    "family_name":
                    "Ferrin",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "GroEL cnmultifit",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Electron microscopy density map", "MODELLER", "MultiFit"
                ],
                "description":
                "These scripts demonstrate the use of IMP, MODELLER and Chimera in the modeling of the bacterial molecular chaperone GroEL. First, MODELLER is used to generate structures for the individual components in the GroEL complex. Then, IMP is used to fit these components together into the electron microscopy density map of the entire complex.",
                "year":
                2012,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.46596",
                    "publication": [
                        "https://doi.org/10.1007/978-1-61779-588-6_15",
                        "https://github.com/integrativemodeling/multifit_groel/tree/v1.0"
                    ],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46596/files/multifit_groel-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "GroEL cnmultifit - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cnmultifit_groel/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #8

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Benzonitrile on Si(001). XPS, NEXAFS, and STM data. Accepted for publication in PCCP Sept. 2016",
                "acl": ["public"],
                "source_name":
                "benzonitrile_si",
                "data_contact": {
                    "given_name": "Steven",
                    "family_name": "Schofield",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "O'Donnell, Kane, Hedgeland, Holly, Moore, Gareth (2016) Benzonitrile on Si(001). XPS, NEXAFS, and STM data. Accepted for publication in PCCP Sept. 2016."
                ],
                "author": [{
                    "given_name": "Kane",
                    "family_name": "O'Donnell",
                    "institution": "Curtin University",
                }, {
                    "given_name": "Holly",
                    "family_name": "Hedgeland",
                    "institution": "The Open University",
                }, {
                    "given_name": "Gareth",
                    "family_name": "Moore",
                    "institution": "University College London",
                }, {
                    "given_name": "Asif",
                    "family_name": "Suleman",
                    "institution": "University College London",
                }, {
                    "given_name": "Manuel",
                    "family_name": "Siegl",
                    "institution": "University College London",
                }, {
                    "given_name": "Lars",
                    "family_name": "Thomsen",
                    "institution": "The Australian Synchrotron",
                }, {
                    "given_name": "Oliver",
                    "family_name": "Warschkow",
                    "institution": "The University of Sydney",
                }, {
                    "given_name": "Steven",
                    "family_name": "Schofield",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Benzonitrile on Si",
                "tags":
                ["benzonitrile", "Si(001)", "adsorption", "XPS", "NEXAFS"],
                "description":
                "This data set contains original XPS and NEXAFS data collected at the Australian Synchrotron.  The data are the results of experiments investigating benzonitrile adsorption to the Si(001) surface.  The results were written up and have been accepted for publication in Physical Chemistry Chemical Physics in Sept. 2016. The publication date is not yet known.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.154112",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlepdf/2016/CP/C6CP04328C"
                    ],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": "",

                    #"path": "",

                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(pdb$|qe$)"),
                          desc="Processing Files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "pdb":
            ftype = "proteindatabank"
        else:
            ftype = "espresso-in"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Benzonitrile on Si - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/benzonitrile_si/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")

Пример #9

Показать файл

Файл: sar_chemogenomics_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "The ‘SAR Matrix’ method and its extensions for applications in medicinal chemistry and chemogenomics",
                "acl": ["public"],
                "source_name": "sar_chemogenomics",

                "data_contact": {
                    
                    "given_name": "Jürgen",
                    "family_name": "Bajorath",
                    "email": "*****@*****.**",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Gupta-Ostermann, D., & Bajorath, J. (2014). The ‘SAR Matrix’ method and its extensions for applications in medicinal chemistry and chemogenomics [Data set]. F1000Research. Zenodo. http://doi.org/10.5281/zenodo.10457"],

                "author": [{

                    "given_name": "Disha",
                    "family_name": "Gupta-Ostermann",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                },
                {

                    "given_name": "Jürgen",
                    "family_name": "Bajorath",
                    "email": "*****@*****.**",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                }],

                "license": "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection": "SAR Chemogenomics",
                #"tags": [""],
                "description": "We describe the ‘Structure-Activity Relationship (SAR) Matrix’ (SARM) methodology that is based upon a special two-step application of the matched molecular pair (MMP) formalism. The SARM method has originally been designed for the extraction, organization, and visualization of compound series and associated SAR information from compound data sets.",
                "year": 2014,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.10457",
                    "publication": ["https://f1000research.com/articles/3-113/v2"],
                    #"data_doi": "",
                    #"related_id": ,

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/10457/files/Cpd_data_sets.zip",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "SAR Chemogenomics - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "sdf": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/sar_chemogenoomics/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")

Пример #10

Показать файл

Файл: delaney_esol_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "ESOL: Estimating Aqueous Solubility Directly from Molecular Structure",
                "acl": ["public"],
                "source_name":
                "delaney_esol",
                "data_contact": {
                    "given_name":
                    "John S.",
                    "family_name":
                    "Delaney",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Syngenta, Jealott's Hill International Research Centre",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Delaney, John S. (2004/05/01). ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure. Journal of Chemical Information and Computer Sciences, 44, 1000-1005. doi: 10.1021/ci034243x"
                ],
                "author": [{
                    "given_name":
                    "John S.",
                    "family_name":
                    "Delaney",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Syngenta, Jealott's Hill International Research Centre",
                }],

                #"license": "",
                "collection":
                "ESOL",
                #"tags": [""],
                "description":
                "This paper describes a simple method for estimating the aqueous solubility (ESOL − Estimated SOLubility) of a compound directly from its structure. The model was derived from a set of 2874 measured solubilities using linear regression against nine molecular properties. The most significant parameter was calculated logPoctanol, followed by molecular weight, proportion of heavy atoms in aromatic systems, and number of rotatable bonds. The model performed consistently well across three validation sets, predicting solubilities within a factor of 5−8 of their measured values, and was competitive with the well-established “General Solubility Equation” for medicinal/agrochemical sized molecules.",
                "year":
                2004,
                "links": {
                    "landing_page":
                    "http://pubs.acs.org/doi/abs/10.1021/ci034243x#ci034243xAF1",
                    "publication": [
                        "http://pubs.acs.org/doi/full/10.1021/ci034243x#ci034243xAF1"
                    ],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": "",

                    #"path": "",

                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata

    with open(os.path.join(input_path, "delaney_esol.txt"), 'r') as raw_in:
        headers = raw_in.readline().strip("\n").split(",")
        data = raw_in.readlines()

    for line in data:
        line_data = line.strip("\n").split(",")
        record = {}
        indx = -3
        record[headers[0]] = ",".join(line_data[:indx])

        for head in headers[1:]:
            record[head] = line_data[indx]
            indx += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "ESOL - " + record["SMILES"],
                "acl": ["public"],
                "composition": record["SMILES"],

                #"tags": ,
                #"description": ,
                "raw": json.dumps(record),
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "txt": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
                        "path": "/collections/delaney_esol/delaney_esol.txt",
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #11

Показать файл

Файл: silverman_qsar_comma_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Comparative Molecular Moment Analysis (CoMMA):  3D-QSAR without Molecular Superposition",
                "acl": ["public"],
                "source_name":
                "silverman_qsar_comma",
                "data_contact": {
                    "given_name": "Daniel. E.",
                    "family_name": "Platt",
                    "email": "*****@*****.**",
                    "institution": "Thomas J. Watson Research Center",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Platt, Daniel. E. (1996/01/01). Comparative Molecular Moment Analysis (CoMMA):  3D-QSAR without Molecular Superposition. Journal of Medicinal Chemistry, 39, 2129-2140. doi: 10.1021/jm950589q"
                ],
                "author": [{
                    "given_name": "B. D.",
                    "family_name": "Silverman",
                    "institution": "Thomas J. Watson Research Center",
                }, {
                    "given_name": "Daniel. E.",
                    "family_name": "Platt",
                    "email": "*****@*****.**",
                    "institution": "Thomas J. Watson Research Center",
                }],

                #"license": "",
                "collection":
                "Silverman QSAR CoMMA",
                #"tags": [""],
                "description":
                "3d-QSAR procedures utilize descriptors that characterize molecular shape and charge distributions responsible for the steric and electrostatic nonbonding interactions intimately involved in ligand−receptor binding. Comparative molecular moment analysis (CoMMA) utilizes moments of the molecular mass and charge distributions up to and including second order in the development of molecular similarity descriptors. As a consequence, two Cartesian reference frames are then defined with respect to each molecular structure.",
                "year":
                1996,
                "links": {
                    "landing_page":
                    "ftp://ftp.ics.uci.edu/pub/baldig/learning/Silverman/",
                    "publication":
                    ["http://pubs.acs.org/doi/full/10.1021/jm950589q"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "sdf")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Silverman QSAR CoMMA - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/silverman_qsar_comma/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                    "txt": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/silverman_qsar_comma/activity.txt"
                    }
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #12

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "BaCrO3-x JSSC 2015 (High-pressure BaCrO3 polytypes and the 5H–BaCrO2.8 phase)",
                "acl": ["public"],
                "source_name":
                "high_pressure_ba_cro3",
                "data_contact": {
                    "given_name": "Attfield J.",
                    "family_name": "Paul",
                    "email": "*****@*****.**",
                    "institution":
                    "University of Edinburgh School of Chemistry",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Attfield, J. Paul. (2015). BaCrO3-x JSSC 2015, 2014-2015 [dataset]. University of Edinburgh School of Chemistry. http://dx.doi.org/10.7488/ds/305."
                ],
                "author": [{
                    "given_name":
                    "Attfield J.",
                    "family_name":
                    "Paul",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of Edinburgh School of Chemistry",
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/legalcode",
                "collection":
                "High Pressure Ba-CrO3",
                "tags": [
                    "Reduced oxides", "Perovskites", "High pressure synthesis",
                    "Vacancyordering", "Magnetic structure"
                ],
                "description":
                "Polytypism of BaCrO3 perovskites has been investigated at 900–1100 °C and pressures up to 22 GPa. Hexagonal 5H, 4H, and 6H perovskites are observed with increasing pressure, and the cubic 3C perovskite (a=3.99503(1) Å) is observed in bulk form for the first time at 19–22 GPa. An oxygen-deficient material with limiting composition 5H–BaCrO2.8 is synthesised at 1200 °C under ambient pressure. This contains double tetrahedral Cr4+ layers and orders antiferromagnetically below 260 K with a (0 0 1/2) magnetic structure.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "http://www.research.ed.ac.uk/portal/en/datasets/bacro3x-jssc-2015-highpressure-bacro3-polytypes-and-the-5hbacro28-phase(17dcd792-2bb9-43d9-b244-a1d3a3ea7c15).html",
                    "publication":
                    ["http://dx.doi.org/10.1016/j.jssc.2015.09.029"],
                    "data_doi": "http://dx.doi.org/10.7488/ds/305",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host":
                        "http://datashare.is.ed.ac.uk",
                        "path":
                        "/bitstream/handle/10283/862/BaCrO3Data.zip?sequence=1&isAllowed=y",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "High Pressure Ba-CrO3 - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/high_pressure_ba_cro3/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata
        if "X-ray" in data_file["path"]:
            if "oxidised" in data_file["filename"]:
                ext = ".XY"
            else:
                ext = ".xye"
            name = data_file["filename"].split(".")[0] + ext
            record_metadata["mdf"]["links"][ext[1:]] = {
                "globus_endpoint":
                "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                "http_host":
                "https://data.materialsdatafacility.org",
                "path":
                "/collections/high_pressure_ba_cro3/" +
                data_file["no_root_path"] + "/" + name,
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #13

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "A Counterion-Directed Approach to the Diels-Alder Paradigm: Cascade Synthesis of Tricyclic Fused Cyclopropanes",
                "acl": ["public"],
                "source_name":
                "tricyclic_fused_cyclopropanes",
                "data_contact": {
                    "given_name": "Robert",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Paton, R. (2016). A Counterion-Directed Approach to the Diels-Alder Paradigm: Cascade Synthesis of Tricyclic Fused Cyclopropanes [Data set]. Zenodo. http://doi.org/10.5281/zenodo.60147"
                ],
                "author": [{
                    "given_name": "Robert",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Tricyclic Fused Propanes",
                "tags": ["DFT", "computational chemistry"],
                "description":
                "An approach to the intramolecular Diels–Alder reaction has led to a cascade synthesis of complex carbocycles composed of three fused rings and up to five stereocenters with complete stereocontrol. Computational analysis reveals that the reaction proceeds by a Michael/Michael/cyclopropanation/epimerization cascade in which size and coordination of the counterion is key.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.60147",
                    "publication": [
                        "http://onlinelibrary.wiley.com/doi/10.1002/anie.201608534/full"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "log"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "gaussian-out")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Tricyclic Fused Cyclopropanes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "log": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/tricyclic_fused_cyclopropanes/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #14

Показать файл

Файл: bi_fe_o3_dft_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Supplementary information for a study of DFT+U in the context of BiFeO3",
                "acl": ["public"],
                "source_name":
                "bi_fe_o3_dft",
                "data_contact": {
                    "given_name": "J. Kane",
                    "family_name": "Shenton",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Shenton, J. Kane, Cheah, Wei Li, & Bowler, David R. (2017). Supplementary information for a study of DFT+U in the context of BiFeO3 [Data set]. Zenodo. http://doi.org/10.5281/zenodo.581302"
                ],
                "author": [{
                    "given_name":
                    "Wei Li",
                    "family_name":
                    "Cheah",
                    "institution":
                    "Institute of High Performance Computing, A-STAR, Singapore",
                }, {
                    "given_name":
                    "David R.",
                    "family_name":
                    "Bowler",
                    "institution":
                    "London Centre for Nanotechnology, International Centre for Materials Nanoarchitectonics (MANA)",
                }, {
                    "given_name": "J. Kane",
                    "family_name": "Shenton",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                }],

                #"license": "",
                "collection":
                "BiFeO3 DFT+U",
                #"tags": [""],
                "description":
                "Jupyter notebooks containing much of the analysis featured in an upcoming publication. The .xml files contain all of the structure information and input parameters necessary to completely reproduce the work. Examples of how to work with the .xml files using pymatgen, in order to extract and analyse the relevant results are given.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://zenodo.org/record/581302#.WWWmB8aZPFQ",
                    "publication": [
                        "https://github.com/jks-science/BFO_dftu_SI/blob/v1.0/dft_u_BFO_SI.ipynb"
                    ],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host":
                        "https://zenodo.org",
                        "path":
                        "/record/581302/files/jks-science/BFO_dftu_SI-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(outcar|vasp$)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if "outcar" in data_file["filename"]:
            dtype = "outcar"
            ftype = "vasp-out"
        else:
            ftype = "vasp"
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), ftype)
        ## Metadata:record
        title_path = "/".join(
            os.path.join(data_file["no_root_path"],
                         data_file["filename"]).split("/")[1:])
        record_metadata = {
            "mdf": {
                "title": "BiFeO3 DFT+U - " + title_path,
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/bi_fe_o3_dft/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #15

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Platinum pyridine cations: the DFT optimized geometries",
                "acl": ["public"],
                "source_name":
                "pt_pyridine_cations",
                "data_contact": {
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Markov, A. (2015). Platinum pyridine cations: the DFT optimized geometries [Data set]. Zenodo. http://doi.org/10.5281/zenodo.31335"
                ],
                "author": [{
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Platinum Pyridine Cations",
                "tags": ["platinum", "dft", "computational chemistry"],
                "description":
                "The geometries were optimized with the hybrid M06 functional, the mDZP all-electron basis set for platinum atom, and the def2-TZVP basis set for light atoms.",
                "year":
                2015,
                "links": {
                    "landing_page": "http://doi.org/10.5281/zenodo.31335",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Platinum Pyridine Cations - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/pt_pyridine_cations/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #16

Показать файл

Файл: cys_scanning_phoq_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Cys-Scanning Disulfide Crosslinking and Bayesian Modeling Probe the Transmembrane Signaling Mechanism of the Histidine Kinase, PhoQ",
                "acl": ["public"],
                "source_name":
                "cys_scanning_phoq",
                "data_contact": {
                    "given_name": "William F",
                    "family_name": "DeGrado",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Molnar, K. S., Bonomi, M., Pellarin, R., Clinthorne, G. D., Gonzalez, G., Goldberg, S. D., … DeGrado, W. F. (2014). Multi-state modeling of the PhoQ two-component system [Data set]. Structure. Zenodo. http://doi.org/10.5281/zenodo.46600"
                ],
                "author": [{
                    "given_name": "Kathleen S",
                    "family_name": "Molnar",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name":
                    "Massimiliano",
                    "family_name":
                    "Bonomi",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name":
                    "Riccardo",
                    "family_name":
                    "Pellarin",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name": "Graham D",
                    "family_name": "Clinthorne",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Gabriel",
                    "family_name": "Gonzalez",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Shalom D",
                    "family_name": "Goldberg",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Mark",
                    "family_name": "Goulian",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name":
                    "William F",
                    "family_name":
                    "DeGrado",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California, San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "Cys-Scanning PhoQ",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Cysteine crosslinks", "Multi-state"
                ],
                "description":
                "Bacteria transduce signals across the membrane using two-component systems (TCSs), consisting of a membrane-spanning sensor histidine kinase and a cytoplasmic response regulator. In gram-negative bacteria, the PhoPQ TCS senses cations and antimicrobial peptides, yet little is known about the structural changes involved in transmembrane signaling. We construct a model of PhoQ signal transduction using Bayesian inference, based on disulfide crosslinking data and homologous crystal structures.",
                "year":
                2014,
                "links": {
                    "landing_page": "https://doi.org/10.5281/zenodo.46600",
                    "publication":
                    ["https://doi.org/10.1016/j.str.2014.04.019"],
                    #"data_doi": "",
                    #"related_id": "",
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46600/files/phoq-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"),
                          desc="Processing Files",
                          disable=not verbose):
        if "data" not in data_file[
                "no_root_path"]:  #frame files are under pqr format which currently we do not have a file reader
            continue
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Cys-Scanning PhoQ - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cys_scanning_phoq/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #17

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation",
                "acl": ["public"],
                "source_name":
                "cyclopropenes",
                "data_contact": {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Paton, R., & Jackson, K. (2016). Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation [Data set]. Zenodo. http://doi.org/10.5281/zenodo.53736"
                ],
                "author": [{
                    "given_name": "Tomislav",
                    "family_name": "Rovis",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Kelvin E.",
                    "family_name": "Jackson",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Natthawat",
                    "family_name": "Semakul",
                    "institution": "Colorado State University",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Diastereoselective Benzamidation of Cyclopropenes",
                "tags":
                ["DFT", "Gaussian", "Transition State", "Stereoselectivity"],
                "description":
                "The diastereoselective coupling of O-substituted arylhydroxamates and cyclopropenes mediated by Rh(III) catalysis was successfully developed. Through ligand development, the diastereoselectivity of this reaction was improved using a heptamethylindenyl (Ind*) ligand, which has been rationalized using quantum chemical calculations. In addition, the nature of the O-substituted ester of benzhydroxamic acid proved important for high diastereoselectivity. This transformation tolerates a variety of benzamides and cyclopropenes that furnish cyclopropa[c]dihydroisoquinolones with high diastereocontrol, which could then be easily transformed into synthetically useful building blocks for pharmaceuticals and bio-active molecules.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://zenodo.org/record/53736#.WWWmjMaZPFQ",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlelanding/2016/sc/c6sc02587k#!divAbstract"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".out$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "gaussian-out")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Diastereoselective Benzamidation of Cyclopropenes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "out": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cyclopropenes/" + data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #18

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "PanDDA analysis of JMJD2D screened against Zenobia Fragment Library",
                "acl": ["public"],
                "source_name":
                "pandda_zenobia_fragment",
                "data_contact": {
                    "given_name": "Frank",
                    "family_name": "von Delft",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Pearce, N., Bradley, A., Marsden, B. D., & von Delft, F. (2016). PanDDA analysis of JMJD2D screened against Zenobia Fragment Library [Data set]. Zenodo. http://doi.org/10.5281/zenodo.48770"
                ],
                "author": [{
                    "given_name": "Nicholas",
                    "family_name": "Pearce",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Anthony",
                    "family_name": "Bradley",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Brian D",
                    "family_name": "Marsden",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Frank",
                    "family_name": "von Delft",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }],
                "license":
                "https://creativecommons.org/licenses/by-sa/4.0/",
                "collection":
                "PanDDA Zenobia Fragment",
                "tags": [
                    "PANDDA", "Fragment Screening by X-ray Crystallography",
                    "Structural Genomics Consortium (SGC)",
                    "Diamond Light Source I04-1"
                ],
                "description":
                "De-methylase JMJD2D screened against the Zenobia Fragment Library by X-ray Crystallography.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.48770",
                    "publication":
                    ["https://www.nature.com/articles/ncomms15123#ref33"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": "",

                    #"path": "",

                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"),
                          desc="Processing Files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "PanDDA Zenobia Fragment Library - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/pandda_zenobia_fragment/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata
        if "ligand" in data_file["filename"]:
            record_metadata["mdf"]["links"]["cif"] = {
                "globus_endpoint":
                "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                "http_host":
                "https://data.materialsdatafacility.org",
                "path":
                "/collections/pandda_zenobia_fragment/" +
                data_file["no_root_path"] + "/ligand.cif",
            }
        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # TODO: Save your converter as [mdf-source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")

Пример #19

Показать файл

Файл: yeast_mediator_complex_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Molecular architecture of the yeast Mediator complex",
                "acl": ["public"],
                "source_name": "yeast_mediator_complex",

                "data_contact": {

                    "given_name": "Benjamin",
                    "family_name": "Webb",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Robinson, Philip J, Trnka, Michael J, Pellarin, Riccardo, Greenberg, Charles H, Bushnell, David A, Davis, Ralph, … Kornberg, Roger D. (2015). Molecular architecture of the yeast Mediator complex [Data set]. eLife. Zenodo. http://doi.org/10.5281/zenodo.802915"],

                "author": [{

                    "given_name": "Philip J",
                    "family_name": "Robinson",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Michael J",
                    "family_name": "Trnka",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Charles H",
                    "family_name": "Greenberg",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "David A",
                    "family_name": "Bushnell",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Ralph",
                    "family_name": "Davis",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Alma L",
                    "family_name": "Burlingame",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Roger D",
                    "family_name": "Kornberg",
                    "institution": "Stanford University",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "Yeast Mediator Complex",
                "tags": ["Integrative Modeling Platform (IMP)", "Chemical crosslinks", "PMI", "X-ray crystallography"],
                "description": "The 21-subunit Mediator complex transduces regulatory information from enhancers to promoters, and performs an essential role in the initiation of transcription in all eukaryotes. This repository contains files used in the 3-D modeling of the entire Mediator complex, using an integrative modeling approach that combines information from chemical cross-linking and mass spectrometry; X-ray crystallography; homology modeling; and cryo-electron microscopy.",
                "year": 2015,

                "links": {

                    "landing_page": "https://zenodo.org/record/802915",
                    "publication": ["https://doi.org/10.7554/eLife.08719", "https://github.com/integrativemodeling/mediator/tree/v1.0.3"],
                    "data_doi": "https://doi.org/10.5281/zenodo.802915",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Yeast Mediator Complex - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/yeast_mediator_complex/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #20

Показать файл

Файл: scan_ionic_liquids_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Data for the article \"Performance of SCAN density functional method for a set of ionic liquids\"",
                "acl": ["public"],
                "source_name": "scan_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vladislav",
                    "family_name": "Ivaništšev",
                    "email": "*****@*****.**",
                    "institution": "University of Tartu",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Karu, Karl, Ers, Heigo, Mišin, Maksim, Sun, Jianwei, & Ivaništšev, Vladislav. (2017). Data for the article \"Performance of SCAN density functional method for a set of ionic liquids\" [Data set]. Zenodo. http://doi.org/10.5281/zenodo.495089"],

                "author": [{

                    "given_name": "Karl",
                    "family_name": "Karu",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Heigo",
                    "family_name": "Ers",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Maksim",
                    "family_name": "Mišin",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Jianwei",
                    "family_name": "Sun",
                    "institution": "The University of Texas at El Paso",

                },
                {

                    "given_name": "Vladislav",
                    "family_name": "Ivaništšev",
                    "email": "*****@*****.**",
                    "institution": "University of Tartu",

                }],

                #"license": "",
                "collection": "SCAN of Ionic Liquids",
                #"tags": [""],
                "description": "The repository (https://github.com/vilab-tartu/SCAN) contains the database, geometries and an illustrative ipython notebook supporting the article \"Performance of SCAN density functional method for a set of ionic liquids\". ",
                "year": 2017,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.495089",
                    "publication": ["https://github.com/vilab-tartu/SCAN/tree/v.05"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "SCAN of Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
               # "raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/scan_ionic_liquids/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    
                    "json": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/scan_ionic_liquids/database.json",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #21

Показать файл

Файл: carbonyl_sulfide_fluxes_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Dataset for \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\"",
                "acl": ["public"],
                "source_name":
                "carbonyl_sulfide_fluxes",
                "data_contact": {
                    "given_name":
                    "Huilin",
                    "family_name":
                    "Chen",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of Groningen, University of Colorado"
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Linda M.J. Kooijmans, Kadmiel Maseyk, Ulli Seibt, Wu Sun, Timo Vesala, Ivan Mammarella, … Huilin Chen. (2017). Dataset for \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\" [Data set]. Zenodo. http://doi.org/10.5281/zenodo.580303"
                ],
                "author": [{
                    "given_name": "Linda M.J.",
                    "family_name": "Kooijmans",
                    "institution": "University of Groningen",
                }, {
                    "given_name": "Kadmiel",
                    "family_name": "Maseyk",
                    "institution": "The Open University",
                }, {
                    "given_name": "Ulli",
                    "family_name": "Seibt",
                    "institution": "University of California",
                }, {
                    "given_name": "Wu",
                    "family_name": "Sun",
                    "institution": "University of California",
                }, {
                    "given_name": "Timo",
                    "family_name": "Vesala",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Ivan",
                    "family_name": "Mammarella",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Pasi",
                    "family_name": "Kolari",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Juho",
                    "family_name": "Aalto",
                    "institution": "University of Helsinki",
                }, {
                    "given_name":
                    "Alessandro",
                    "family_name":
                    "Franchin",
                    "institution":
                    "University of Helsinki, University of Colorado",
                }, {
                    "given_name": "Roberta",
                    "family_name": "Vecchi",
                    "institution": "University of Milan",
                }, {
                    "given_name": "Gianluigi",
                    "family_name": "Valli",
                    "institution": "University of Milan",
                }, {
                    "given_name":
                    "Huilin",
                    "family_name":
                    "Chen",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of Groningen, University of Colorado",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Carbonyl Sulfide Fluxes",
                #"tags": [""],
                "description":
                "Nighttime averaged ecosystem fluxes of COS and CO2 obtained through the radon-tracer and eddy-covariance method as presented in \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\" submitted to Atmospheric Chemistry and Physics.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.580303",
                    "publication":
                    ["https://www.atmos-chem-phys-discuss.net/acp-2017-407/"],
                    #"data_doi": "",
                    #"related_id": "",
                    "txt": {

                        #"globus_endpoint": ,
                        "http_host":
                        "https://zenodo.org",
                        "path":
                        "/record/580303/files/Kooijmans_et_al_2017_ACPD_20170516.txt",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    with open(
            os.path.join(input_path, "Kooijmans_et_al_2017_ACPD_20170516.txt"),
            "r") as raw_in:
        data = raw_in.read()
    description = "".join(data.split("\n\n")[1:2])
    start = "##########################################\n"
    for line in tqdm(parse_tab(data.split(start)[-1], sep=","),
                     desc="Processing Data",
                     disable=not verbose):
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Carbonyl Sulfide Fluxes doy: " + line["doy"],
                "acl": ["public"],
                #"composition": ,

                #"tags": ,
                "description": description,
                "raw": json.dumps(line),
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "txt": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/carbonyl_sulfide_fluxes/Kooijmans_et_al_2017_ACPD_20170516.txt",
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #22

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly",
                "acl": ["public"],
                "source_name": "pt_cyanometallates",

                "data_contact": {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Schneider, Leon; Sivchik, Vasily; Chung, Kun-you; Chen, Yi-Ting; Karttunen, Antti J.; Chou, Pi-Tai; Koshevoy, Igor O. (2017): Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly. ACS Publications. https://doi.org/10.1021/acs.inorgchem.7b00006"],

                "author": [{

                    "given_name": "Leon",
                    "family_name": "Schneider",
                    "institution": "Julius-Maximilians-Universität",

                },
                {

                    "given_name": "Vasily",
                    "family_name": "Sivchik",
                    "institution": "University of Eastern Finland",

                },
                {

                    "given_name": "Kun-you",
                    "family_name": "Chung",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Yi-Ting",
                    "family_name": "Chen",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Antti J.",
                    "family_name": "Karttunen",
                    "email": "*****@*****.**",
                    "institution": "Aalto University",

                },
                {

                    "given_name": "Pi-Tai",
                    "family_name": "Chou",
                    "email": "*****@*****.**",
                    "institution": "National Taiwan University",
                    "orcid": "orcid.org/0000-0002-8925-7747",

                },
                {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",
                    "orcid": "orcid.org/0000-0003-4380-1302",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cyclometalated Platinum(II) Cyanometallates",
                "tags": ["coordination geometries", "compound", "luminescence studies", "Coordination Self-Assembly", "Luminescent Blocks", "emission performance", "cyclometalated fragment", "chromophoric cycloplatinated metalloligands", "frontier orbitals", "complexes exhibit", "tetranuclear complexes", "time-dependent density", "η 1", "photophysical behavior", "cyanide-bridged heterometallic aggregates", "squarelike arrangement", "F 2 ppy", "phosphine motifs", "Cu", "M Pt LCT contribution", "alternative cluster topology", "metal ions", "tolpy", "10 fragments", "Ag", "room-temperature phosphorescence", "HF 2 ppy"],
                "description": "A family of cyanide-bridged heterometallic aggregates has been constructed of the chromophoric cycloplatinated metalloligands and coordinatively unsaturated d10 fragments {M(PPh3)n}. The tetranuclear complexes of general composition [Pt(C^N)(CN)2M(PPh3)2]2 [C^N = ppy, M = Cu (1), Ag (2); C^N = tolpy (Htolpy = 2-(4-tolyl)-pyridine), M = Cu (4), Ag (5); C^N = F2ppy (HF2ppy = 2-(4, 6-difluorophenyl)-pyridine), M = Cu (7), Ag (8)] demonstrate a squarelike arrangement of the molecular frameworks, which is achieved due to favorable coordination geometries of the bridging ligands and the metal ions. Variation of the amount of the ancillary phosphine (for M = Ag) afforded compounds [Pt(C^N)(CN)2Ag(PPh3)]2 (C^N = ppy, 3; C^N = tolpy, 6); for the latter one an alternative cluster topology, stabilized by the Pt–Ag metallophilic and η1-Cipso(C^N)–Ag bonding, was observed.",
                "year": 2017,

                "links": {

                    "landing_page": "https://figshare.com/collections/Cyclometalated_Platinum_II_Cyanometallates_Luminescent_Blocks_for_Coordination_Self-Assembly/3730237",
                    "publication": ["https://doi.org/10.1021/acs.inorgchem.7b00006"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"), desc="Processing Files", disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    dtype: {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/pt_cyanometallates/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #23

Показать файл

Файл: cytotoxic_pt_complexes_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes",
                "acl": ["public"],
                "source_name": "cytotoxic_pt_complexes",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Keppler, Bernhard K. (2013/01/10). Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes. Journal of Medicinal Chemistry, 56, 330-344. doi: 10.1021/jm3016427"],

                "author": [{

                    "given_name": "Hristo P.",
                    "family_name": "Varbanov",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Michael A.",
                    "family_name": "Jakupec",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Alexander",
                    "family_name": "Roller",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Frank",
                    "family_name": "Jensen",
                    "email": "*****@*****.**",
                    "institution": "University of Aarhus",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Bernhard K.",
                    "family_name": "Keppler",
                    "institution": "University of Vienna",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cytotoxic Platinum Complexes",
                "tags": ["structure geometry", "series", "resistance", "Herein", "laboratory", "tetraki", "tris", "Relationship", "wb 97x", "mechanism", "cisplatin", "complex", "SW", "Cytotoxic", "calculation", "relationship", "Density Functional Theory", "DFT", "Reliable", "ComplexesOctahedral", "bi", "compound", "Quantitative", "Model", "QSAR investigations", "cytotoxicity", "candidate", "cell line CH 1", "descriptor", "optimization", "QSAR models", "toxicity", "Theoretical Investigations"],
                "description": "Octahedral platinum(IV) complexes are promising candidates in the fight against cancer. In order to rationalize the further development of this class of compounds, detailed studies on their mechanisms of action, toxicity, and resistance must be provided and structure–activity relationships must be drawn. Herein, we report on theoretical and QSAR investigations of a series of 53 novel bis-, tris-, and tetrakis(carboxylato)platinum(IV) complexes, synthesized and tested for cytotoxicity in our laboratories. ",
                "year": 2012,

                "links": {

                    "landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3557934/",
                    "publication": ["https://dx.doi.org/10.1021%2Fjm3016427"],
                    #"data_doi": "",
                    #"related_id": ,

                    "cif": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",

                        "path": "/files/3593325",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "temp_file.cif"), desc="Processing files", disable=not verbose):
        #Temp_file is the same as the real file, but with authors and adresses deleted so that ase can read composition
        #It should only be used for converting purposes
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "cif")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cytotoxic Platinum Complexes - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "cif": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/cytotoxic_pt_complexes/" + "jm3016427_si_002.cif",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #24

Показать файл

Файл: nist_trc_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Repository processing
    # Copied from the dataset metadata, so the comments are incorrect
    repo_metadata = {
        # REQ dictionary: MDF-format dataset metadata
        "mdf": {

            # REQ string: The title of the dataset
            "title":
            "Thermodynamics Research Center Alloy Data",

            # REQ list of strings: The UUIDs allowed to view this metadata, or 'public'
            "acl": ["public"],

            # REQ string: A short version of the dataset name, for quick reference. Spaces and dashes will be replaced with underscores, and other non-alphanumeric characters will be removed.
            "source_name":
            "nist_trc",

            # REQ dictionary: The contact person/steward/custodian for the dataset
            "data_contact": {

                # REQ string: The person's given (or first) name
                "given_name": "Scott",

                # REQ string: The person's family (or last) name
                "family_name": "Townsend",

                # REQ string: The person's email address
                "email": "*****@*****.**",

                # RCM string: The primary affiliation for the person
                "institution":
                "National Institute of Standards and Technology",
            },

            # REQ list of dictionaries: The person/people contributing the tools (harvester, this converter) to ingest the dataset
            "data_contributor": [{

                # REQ string: The person's given (or first) name
                "given_name": "Jonathon",

                # REQ string: The person's family (or last) name
                "family_name": "Gaff",

                # REQ string: The person's email address
                "email": "*****@*****.**",

                # RCM string: The primary affiliation for the person
                "institution": "The University of Chicago",

                # RCM string: The person's GitHub username
                "github": "jgaff",
            }],

            # RCM list of strings: The full bibliographic citation(s) for the dataset
            "citation": [
                "E. A. Pfeif and K. Kroenlein, Perspective: Data infrastructure for high throughput materials discovery, APL Materials 4, 053203, 2016. doi: 10.1063/1.4942634",
                "B. Wilthan, E.A. Pfeif, V.V. Diky, R.D. Chirico, U.R. Kattner, K. Kroenlein, Data resources for thermophysical properties of metals and alloys, Part 1: Structured data capture from the archival literature, Calphad 56, pp 126-138, 2017. doi: 10.1016/j.calphad.2016.12.004"
            ],

            # RCM list of dictionaries: A list of the authors of this dataset
            "author": [
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Boris",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Wilthan",

                    # RCM string: The person's email address
                    "email":
                    "*****@*****.**",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                },
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Erik",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Pfeif",

                    # RCM string: The person's email address
                    #                    "email": "",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                },
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Vladimir",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Diky",

                    # RCM string: The person's email address
                    #                    "email": "",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                },
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Robert",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Chirico",

                    # RCM string: The person's email address
                    #                    "email": "",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                },
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Ursula",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Kattner",

                    # RCM string: The person's email address
                    #                    "email": "",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                },
                {

                    # REQ string: The person's given (or first) name
                    "given_name":
                    "Kenneth",

                    # REQ string: The person's family (or last) name
                    "family_name":
                    "Kroenlein",

                    # RCM string: The person's email address
                    #                    "email": "",

                    # RCM string: The primary affiliation for the person
                    "institution":
                    "National Institute of Standards and Technology",
                }
            ],

            # RCM string: A link to the license for distribution of the dataset
            "license":
            "©2017 copyright by the US Secretary of Commerce on behalf of the United States of America. All rights reserved.",

            # RCM string: The repository (that should already be in MDF) holding the dataset
            #                "repository": ,

            # RCM string: The collection for the dataset, commonly a portion of the title
            "collection":
            "NIST",

            # RCM list of strings: Tags, keywords, or other general descriptors for the dataset
            "tags":
            "alloy",

            # RCM string: A description of the dataset
            "description":
            "The NIST Alloy Data web application provides access to thermophysical property data with a focus on unary, binary, and ternary metal systems. All data is from original experimental publications including full provenance and uncertainty.",

            # RCM integer: The year of dataset creation
            "year":
            2016,

            # REQ dictionary: Links relating to the dataset
            "links": {

                # REQ string: The human-friendly landing page for the dataset
                "landing_page":
                "http://trc.nist.gov/applications/metals_data/metals_data.php",

                # RCM list of strings: The DOI(s) (in link form, ex. 'https://dx.doi.org/10.12345') for publications connected to the dataset
                "publication": [
                    "http://dx.doi.org/10.1063/1.4942634",
                    "http://dx.doi.org/10.1016/j.calphad.2016.12.004"
                ],

                # RCM string: The DOI of the dataset itself (in link form)
                #                    "data_doi": ,

                # OPT list of strings: The mdf-id(s) of related entries, not including records from this dataset
                #                    "related_id": ,

                # RCM dictionary: Links to raw data files from the dataset (multiple allowed, field name should be data type)
                #                    "data_link": {

                # RCM string: The ID of the Globus Endpoint hosting the file
                #                        "globus_endpoint": ,

                # RCM string: The fully-qualified HTTP hostname, including protocol, but without the path (for example, 'https://data.materialsdatafacility.org')
                #                        "http_host": ,

                # REQ string: The full path to the data file on the host
                #                        "path": ,

                #                    },
            },
        },

        # OPT dictionary: DataCite-format metadata
        "dc": {},
    }

    Validator(repo_metadata, resource_type="repository")

    with open(os.path.join(input_path, "nist_trc.json")) as infile:
        all_data = json.load(infile)

    refs = all_data["refs"]
    systems = all_data["systems"]
    specimens = all_data["specimen"]
    comps = all_data["comps"]

    for ref_id, ref_data in tqdm(refs.items(),
                                 desc="Processing references",
                                 disable=not verbose):
        ## Metadata:dataset
        dataset_metadata = {
            # REQ dictionary: MDF-format dataset metadata
            "mdf": {

                # REQ string: The title of the dataset
                "title": ref_data["title"],

                # REQ list of strings: The UUIDs allowed to view this metadata, or 'public'
                "acl": ["public"],

                # REQ string: A short version of the dataset name, for quick reference. Spaces and dashes will be replaced with underscores, and other non-alphanumeric characters will be removed.
                "source_name": "nist_trc_" + str(ref_id),

                # REQ dictionary: The contact person/steward/custodian for the dataset
                "data_contact": repo_metadata["mdf"]["data_contact"],

                # REQ list of dictionaries: The person/people contributing the tools (harvester, this converter) to ingest the dataset
                "data_contributor": repo_metadata["mdf"]["data_contributor"],

                # RCM list of strings: The full bibliographic citation(s) for the dataset
                "citation": ref_data["refstring"],

                # RCM list of dictionaries: A list of the authors of this dataset
                #                "author": [{

                # REQ string: The person's given (or first) name
                #                    "given_name": ,

                # REQ string: The person's family (or last) name
                #                    "family_name": ,

                # RCM string: The person's email address
                #                    "email": ,

                # RCM string: The primary affiliation for the person
                #                    "institution": ,

                #                }],

                # RCM string: A link to the license for distribution of the dataset
                #                "license": ,

                # RCM string: The repository (that should already be in MDF) holding the dataset
                "repository": "nist_trc",

                # RCM string: The collection for the dataset, commonly a portion of the title
                "collection": "NIST",

                # RCM list of strings: Tags, keywords, or other general descriptors for the dataset
                "tags": ["alloy"] + ref_data.get("keywords", "").split(" "),

                # RCM string: A description of the dataset
                #                "description": ,

                # RCM integer: The year of dataset creation
                "year": int(ref_data["year"]),

                # REQ dictionary: Links relating to the dataset
                "links": {

                    # REQ string: The human-friendly landing page for the dataset
                    "landing_page":
                    "http://trc.nist.gov/applications/metals_data/metals_data.php#"
                    + str(ref_id),

                    # RCM list of strings: The DOI(s) (in link form, ex. 'https://dx.doi.org/10.12345') for publications connected to the dataset
                    "publication": [
                        "https://dx.doi.org/" + doi
                        for doi in (ref_data.get("doi", [])
                                    if type(ref_data.get("doi", [])) is list
                                    else [ref_data.get("doi", [])])
                    ],

                    # RCM string: The DOI of the dataset itself (in link form)
                    #                    "data_doi": ,

                    # OPT list of strings: The mdf-id(s) of related entries, not including records from this dataset
                    #                    "related_id": ,

                    # RCM dictionary: Links to raw data files from the dataset (multiple allowed, field name should be data type)
                    #                    "data_link": {

                    # RCM string: The ID of the Globus Endpoint hosting the file
                    #                        "globus_endpoint": ,

                    # RCM string: The fully-qualified HTTP hostname, including protocol, but without the path (for example, 'https://data.materialsdatafacility.org')
                    #                        "http_host": ,

                    # REQ string: The full path to the data file on the host
                    #                        "path": ,

                    #                    },
                },
            },

            # OPT dictionary: DataCite-format metadata
            "dc": {},
        }

        # Make validator
        dataset_validator = Validator(dataset_metadata)

        # Parse out specimens for this reference
        # tqdm disabled due to speed of processing specimens
        for spec_id, spec_data in tqdm([(s_id, s_data)
                                        for s_id, s_data in specimens.items()
                                        if s_data["refid"] == ref_id],
                                       desc="Processing specimens",
                                       disable=True):
            ## Metadata:record
            try:
                record_metadata = {
                    # REQ dictionary: MDF-format record metadata
                    "mdf": {

                        # REQ string: The title of the record
                        "title": "TRC Specimen " + str(spec_id),

                        # RCM list of strings: The UUIDs allowed to view this metadata, or 'public' (defaults to the dataset ACL)
                        "acl": ["public"],

                        # RCM string: Subject material composition, expressed in a chemical formula (ex. Bi2S3)
                        "composition": comps[spec_data["cmpid"]]["formula"],

                        # RCM list of strings: Tags, keywords, or other general descriptors for the record
                        #                    "tags": ,

                        # RCM string: A description of the record
                        #                    "description": spec_data["descr"],

                        # RCM string: The record as a JSON string (see json.dumps())
                        "raw": json.dumps(spec_data),

                        # REQ dictionary: Links relating to the record
                        "links": {

                            # RCM string: The human-friendly landing page for the record (defaults to the dataset landing page)
                            #                        "landing_page": ,

                            # RCM list of strings: The DOI(s) (in link form, ex. 'https://dx.doi.org/10.12345') for publications specific to this record
                            #                        "publication": ,

                            # RCM string: The DOI of the record itself (in link form)
                            #                        "data_doi": ,

                            # OPT list of strings: The mdf-id(s) of related entries, not including the dataset entry
                            #                        "related_id": ,

                            # RCM dictionary: Links to raw data files from the dataset (multiple allowed, field name should be data type)
                            #                        "data_link": {

                            # RCM string: The ID of the Globus Endpoint hosting the file
                            #                            "globus_endpoint": ,

                            # RCM string: The fully-qualified HTTP hostname, including protocol, but without the path (for example, 'https://data.materialsdatafacility.org')
                            #                            "http_host": ,

                            # REQ string: The full path to the data file on the host
                            #                            "path": ,

                            #                        },
                        },

                        # OPT list of strings: The full bibliographic citation(s) for the record, if different from the dataset
                        #                    "citation": ,

                        # OPT dictionary: The contact person/steward/custodian for the record, if different from the dataset
                        #                    "data_contact": {

                        # REQ string: The person's given (or first) name
                        #                        "given_name": ,

                        # REQ string: The person's family (or last) name
                        #                        "family_name": ,

                        # REQ string: The person's email address
                        #                        "email": ,

                        # RCM string: The primary affiliation for the person
                        #                        "institution": ,

                        #                    },

                        # OPT list of dictionaries: A list of the authors of this record, if different from the dataset
                        #                    "author": [{

                        # REQ string: The person's given (or first) name
                        #                       "given_name": ,

                        # REQ string: The person's family (or last) name
                        #                        "family_name": ,

                        # RCM string: The person's email address
                        #                        "email": ,

                        # RCM string: The primary affiliation for the person
                        #                        "institution": ,

                        #                    }],

                        # OPT integer: The year of dataset creation, if different from the dataset
                        #                    "year": ,
                    },

                    # OPT dictionary: DataCite-format metadata
                    "dc": {},
                }
                ## End metadata
            # Ignore records without composition
            except KeyError:
                continue

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and stop processing if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if not result["success"]:
                if not dataset_validator.cancel_validation()["success"]:
                    print(
                        "Error cancelling validation. The partial feedstock may not be removed."
                    )
                raise ValueError(result["message"] + "\n" +
                                 result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #25

Показать файл

Файл: klh_1_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "KLH Dataset I",
                "acl": ["public"],
                "source_name": "klh_1",

                "data_contact": {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                #"citation": [""],

                "author": [{

                    "given_name": "Yuanxin",
                    "family_name": "Zhu",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Bridget",
                    "family_name": "Carragher",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Robert M",
                    "family_name": "Glaeser",
                    "institution": "University of California, Berkeley",

                },
                {

                    "given_name": "Denis",
                    "family_name": "Fellmann",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Chandrajit",
                    "family_name": "Bajaj",
                    "institution": "University of Texas at Austin,",

                },
                {

                    "given_name": "Marshall",
                    "family_name": "Bern",
                    "institution": "Palo Alto Research Center",

                },
                {

                    "given_name": "Fabrice",
                    "family_name": "Mouche",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Felix",
                    "family_name": "de Haas",
                    "institution": "FEI Company, Eindhoven",

                },
                {

                    "given_name": "Richard J",
                    "family_name": "Hall",
                    "institution": "Imperial College London",

                },
                {

                    "given_name": "David J",
                    "family_name": "Kriegman",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Steven J",
                    "family_name": "Ludtke",
                    "institution": "Baylor College of Medicine",

                },
                {

                    "given_name": "Satya P",
                    "family_name": "Mallick",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Pawel A",
                    "family_name": "Penczek",
                    "institution": "University of Texas-Houston Medical School",

                },
                {

                    "given_name": "Alan M",
                    "family_name": "Roseman",
                    "institution": "MRC Laboratory of Molecular Biology",

                },
                {

                    "given_name": "Fred J",
                    "family_name": "Sigworth",
                    "institution": "Yale University School of Medicine",

                },
                {

                    "given_name": "Niels",
                    "family_name": "Volkmann",
                    "institution": "The Burnham Institute",

                },
                {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                }],

                #"license": "",
                "collection": "Keyhole Limpet Hemocyanin",
                "tags": ["Electron microscopy", "Single-particle reconstruction", "Automatic particle selection", "Image processing", "Pattern recognition"],
                "description": "Manual selection of single particles in images acquired using cryo-electron microscopy (cryoEM) will become a significant bottleneck when datasets of a hundred thousand or even a million particles are required for structure determination at near atomic resolution. Algorithm development of fully automated particle selection is thus an important research objective in the cryoEM field. A number of research groups are making promising new advances in this area. Evaluation of algorithms using a standard set of cryoEM images is an essential aspect of this algorithm development. With this goal in mind, a particle selection \"bakeoff\" was included in the program of the Multidisciplinary Workshop on Automatic Particle Selection for cryoEM. Twelve groups participated by submitting the results of testing their own algorithms on a common dataset. The dataset consisted of 82 defocus pairs of high-magnification micrographs, containing keyhole limpet hemocyanin particles, acquired using cryoEM.",
                "year": 2004,

                "links": {

                    "landing_page": "http://emg.nysbc.org/redmine/projects/public-datasets/wiki/KLH_dataset_I",
                    "publication": ["http://www.sciencedirect.com/science/article/pii/S1047847703002004#!"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "map$"), desc="Processing Files", disable=not verbose):
        with open(os.path.join(data_file["path"], data_file["filename"]), 'r') as raw_in:
            map_data = raw_in.read()
        headers = ["index", "image", "coordinate"]
        for line in parse_tab(map_data, headers=headers, sep=" "):
            ifile_1 = line["image"].replace(".002", ".001")
            ifile_2 = line["image"]
            cfile = line["coordinate"]
            df = pd.read_csv(os.path.join(data_file["path"], cfile), delim_whitespace=True)
            ## Metadata:record
            record_metadata = {
                "mdf": {
    
                    "title": "Keyhole Limpet Hemocyanin 1 - " + cfile,
                    "acl": ["public"],
                    #"composition": ,
    
                    #"tags": ,
                    "description": "Images under exposure1 are near-to-focus (NTF). Images under exposure2 are far-from-focus (FFF).",
                    #"raw": ,
    
                    "links": {
    
                        #"landing_page": ,
                        #"publication": ,
                        #"data_doi": ,
                        #"related_id": ,
    
                        "klh": {
    
                            "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                            "http_host": "https://data.materialsdatafacility.org",
    
                            "path": "/collections/klh_1/" + data_file["no_root_path"] + "/" + cfile,
    
                            },
    
                        "jpg": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_jpeg/" + ifile_1.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_mrc/" + ifile_1,
        
                            },
                        
                        "jpg2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_jpeg/" + ifile_2.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_mrc/" + ifile_2,
        
                            },
                    },
    
                    #"citation": ,
    
                    #"data_contact": {
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #},
    
                    #"author": [{
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #}],
    
                    #"year": ,
    
                },
    
                #"dc": {
    
                #},
    
    
            }
            ## End metadata
    
            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)
    
            # Check if the Validator accepted the record, and stop processing if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if not result["success"]:
                if not dataset_validator.cancel_validation()["success"]:
                    print("Error cancelling validation. The partial feedstock may not be removed.")
                raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #26

Показать файл

Файл: fe_cr_al_oxidation_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies",
                "acl": ["public"],
                "source_name":
                "fe_cr_al_oxidation",
                "citation": [
                    "Bunn, Jonathan K.; Fang, Randy L.; Albing, Mark R.; Mehta, Apurva; Kramer, Matt J.; Besser, Matt F.; Hattrick-Simpers, Jason R High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies (2015-06-28)"
                ],
                "data_contact": {
                    "given_name": "Jason",
                    "family_name": "Hattrick-Simpers",
                    "email": "*****@*****.**",
                    "institution": "University of South Carolina Columbia",
                },

                #            "author": ,

                #            "license": ,
                "collection":
                "Fe-Cr-Al Oxidation Studies",
                #            "tags": ,
                "description":
                "The data set was used to evaluate a Fe-Cr-Al thin film samples in a narrow composition region centered on known bulk compositions. The data are composed of two individual studies. The first set of data is a low temperature oxidation study on composition spread sampled performed at SLAC Beamline 1-5. Only the integrated and background subtracted 1-D spectra are included, the 2-D data and calibrations are available upon request. The second set of data was taken during high temperature oxidation of selected samples. These data are exclusively Raman data with values taken as a function of total oxidation time.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/836",
                    "publication":
                    "http://dx.doi.org/10.1088/0957-4484/26/27/274003",
                    "data_doi": "http://hdl.handle.net/11256/836",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    with open(
            os.path.join(
                input_path, "Fe_Cr_Al_data",
                "Point Number to Composition.csv")) as composition_file:
        composition_list = list(parse_tab(composition_file.read()))
        compositions = {}
        for comp in composition_list:
            compositions[int(comp.pop("Sample Number"))] = comp
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".txt"),
                          desc="Processing files",
                          disable=not verbose):
        temp_k = data_file["filename"].split(" ")[0]
        point_num = int(data_file["filename"].replace(
            "_", " ").split(" ")[-1].split(".")[0])
        record_metadata = {
            "mdf": {
                "title":
                "Fe-Cr-Al Oxidation - " + data_file["filename"].split(".")[0],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition":
                "FeCrAl",
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,
                    "csv": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/" + data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "fe_cr_al_oxidation": {
                "temperature_k":
                float(temp_k) if temp_k != "Room" else 293.15,  # Avg room temp
                "atomic_composition_percent": {
                    "Fe": float(compositions[point_num]["Fe at. %"]),
                    "Cr": float(compositions[point_num]["Cr at. %"]),
                    "Al": float(compositions[point_num]["Al at. %"])
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            dataset_validator.cancel_validation()
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    if verbose:
        print("Finished converting")

Пример #27

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Elemental vacancy diffusion database from high-throughput first-principles calculations for fcc and hcp structures",
                "acl": ["public"],
                "source_name": "nist_fcc_hcp_structures",

                "data_contact": {
                    
                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Angsten, Thomas; Mayeshiba, Tam; Wu, Henry; Morgan, Dane Elemental vacancy diffusion for fcc and hcp structures (2014-08-08) http://hdl.handle.net/11256/76"],

                "author": [{

                    "given_name": "Thomas",
                    "family_name": "Angsten",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Tam",
                    "family_name": "Mayeshiba",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Henry",
                    "family_name": "Wu",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                }],

                #"license": "",  NO LICENSE ON SITE... CONTACT AUTHOR
                "collection": "NIST fcc hcp Structures",
                #"tags": [""],
                "description": "This work demonstrates how databases of diffusion-related properties can be developed from high-throughput ab initio calculations. The formation and migration energies for vacancies of all adequately stable pure elements in both the face-centered cubic (fcc) and hexagonal close packing (hcp) crystal structures were determined using ab initio calculations. For hcp migration, both the basal plane and z-direction nearest-neighbor vacancy hops were considered. Energy barriers were successfully calculated for 49 elements in the fcc structure and 44 elements in the hcp structure.",
                "year": 2014,

                "links": {

                    "landing_page": "http://hdl.handle.net/11256/76",
                    "publication": ["http://dx.doi.org/10.1088/1367-2630/16/1/015018"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "NIST fcc hcp structures - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": ,

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "outcar": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/nist_fcc_hcp_structures/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")

Пример #28

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents",
                "acl": ["public"],
                "source_name":
                "cytochrome_qsar",
                "data_contact": {
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Kulkarni, Vithal M. (1999/03/22). Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents. Journal of Chemical Information and Computer Sciences, 39, 204-210. doi: 10.1021/ci9800413"
                ],
                "author": [{
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                }, {
                    "given_name": "Vithal M.",
                    "family_name": "Kulkarni",
                    "institution": "University of Mumbai",
                }],

                # "license": "",
                "collection":
                "Cytochrome QSAR",
                #"tags": [""],
                "description":
                "Molecular modeling was performed by a combined use of conformational analysis and 3D-QSAR methods to distinguish structural attributes common to a series of azole antifungal agents. Apex-3D program was used to recognize the common biophoric structural patterns of 13 diverse sets of azole antifungal compounds demonstrating different magnitudes of biological activity. Apex-3D identified three common biophoric features significant for activity:  N1 atom of azole ring, the aromatic ring centroid 1, and aromatic ring centroid 2. A common biophore model proposed from the Apex-3D analysis can be useful for the design of novel cytochrome P-45014αDM inhibiting antifungal agents.",
                "year":
                1999,
                "links": {
                    "landing_page":
                    "ftp://ftp.ics.uci.edu/pub/baldig/learning/Cytochrome/",
                    "publication":
                    ["http://pubs.acs.org/doi/full/10.1021/ci9800413"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "sdf")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Cytochrome QSAR - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cytochrome_qsar/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Пример #29

Показать файл

Файл: fretr_bayesian_restraint_converter.py Проект: maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Benchmark of the FRETR Bayesian restraint",
                "acl": ["public"],
                "source_name": "fretr_bayesian_restraint",

                "data_contact": {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Bonomi, M., Pellarin, R., Kim, S. J., Russel, D., Sundin, B. A., Riffle, M., … Sali, A. (2014). Benchmark of the FRETR Bayesian restraint [Data set]. Mol Cell Proteomics. Zenodo. http://doi.org/10.5281/zenodo.46558"],

                "author": [{

                    "given_name": "Massimiliano",
                    "family_name": "Bonomi",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco, University of Cambridge",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Seung Joong",
                    "family_name": "Kim",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Russel",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Bryan A.",
                    "family_name": "Sundin",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Michael",
                    "family_name": "Riffle",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Jaschob",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Richard",
                    "family_name": "Ramsden",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Trisha N.",
                    "family_name": "Davis",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Eric G. D.",
                    "family_name": "Muller",
                    "email": "*****@*****.**",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "FRETR Bayesian Restraint",
                "tags": ["Integrative Modeling Platform (IMP)", "Benchmark", "Förster resonance energy transfer (FRET)"],
                "description": "The use of in vivo Förster resonance energy transfer (FRET) data to determine the molecular architecture of a protein complex in living cells is challenging due to data sparseness, sample heterogeneity, signal contributions from multiple donors and acceptors, unequal fluorophore brightness, photobleaching, flexibility of the linker connecting the fluorophore to the tagged protein, and spectral cross-talk. We addressed these challenges by using a Bayesian approach that produces the posterior probability of a model, given the input data. The posterior probability is defined as a function of the dependence of our FRET metric FRETR on a structure (forward model), a model of noise in the data, as well as prior information about the structure, relative populations of distinct states in the sample, forward model parameters, and data noise.",
                "year": 2014,

                "links": {

                    "landing_page": "https://zenodo.org/record/46558",
                    "publication": ["https://doi.org/10.1074/mcp.M114.040824", "https://github.com/integrativemodeling/fret_benchmark/tree/v1.0"],
                    "data_doi": "https://doi.org/10.5281/zenodo.46558",
                    #"related_id": "",

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/46558/files/fret_benchmark-v1.0.zip",

                    },

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "FRETR Bayesian Restraint - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/fretr_bayesian_restraint/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Пример #30

Показать файл

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Polymorphism in magic-sized Au144(SR)60 clusters",
                "acl": ["public"],
                "source_name":
                "au_sr_polymorphism",
                "data_contact": {
                    "given_name": "Simon J. L.",
                    "family_name": "Billinge",
                    "email": "*****@*****.**",
                    "institution": "Columbia University",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Juhas, P., Billinge, S., Jensen, K., Tofanelli, M., Ackerson, C., & Vaughn, G. (2016). ncomm-goldnp-2016: Release 3.0 [Data set]. Zenodo. http://doi.org/10.5281/zenodo.56682"
                ],
                "author": [{
                    "given_name": "Kirsten M.Ø.",
                    "family_name": "Jensen",
                    "institution": "Columbia University",
                }, {
                    "given_name": "Pavol",
                    "family_name": "Juhas",
                    "institution": "Brookhaven National Laboratory",
                }, {
                    "given_name": "Marcus A.",
                    "family_name": "Tofanelli",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Christine L.",
                    "family_name": "Heinecke",
                    "institution": "Colorado State University",
                }, {
                    "given_name":
                    "Gavin",
                    "family_name":
                    "Vaughan",
                    "institution":
                    "European Synchrotron Radiation Facility",
                }, {
                    "given_name": "Christopher J.",
                    "family_name": "Ackerson",
                    "email": "*****@*****.**",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Simon J. L.",
                    "family_name": "Billinge",
                    "email": "*****@*****.**",
                    "institution": "Columbia University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Polymorphism in Au144(SR)60 Clusters",
                "tags": [
                    "Inorganic chemistry", "Nanoparticles",
                    "Physical chemistry", "pair distribution function",
                    "gold cluster", "nanoparticle", "x-ray diffraction",
                    "polymorphism"
                ],
                "description":
                "Ultra-small, magic-sized metal nanoclusters represent an important new class of materials with properties between molecules and particles. However, their small size challenges the conventional methods for structure characterization. Here we present the structure of ultra-stable Au144(SR)60 magic-sized nanoclusters obtained from atomic pair distribution function analysis of X-ray powder diffraction data.",
                "year":
                2016,
                "links": {
                    "landing_page": "https://doi.org/10.5281/zenodo.56682",
                    "publication": ["http://dx.doi.org/10.1038/ncomms11859"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path":
                        "/record/56682/files/ncomm-goldnp-2016-v3.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Au(SR) Polymorphism - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/au_sr_polymorphism/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")