Exemplo n.º 1
0
def call_ingester_repo(repos, globus_index, batch_size=100, verbose=VERBOSE):
    if type(repos) is not list:
        repos = [repos]
    if verbose:
        print("INGESTING THE FOLLOWING REPOS:", repos)
    for repo in repos:
        sources = [s["filename"].replace("_all.json", "") for s in find_files(PATH_FEEDSTOCK, repo+".*json$")]
        call_ingester(sources, globus_index=globus_index, batch_size=batch_size, verbose=VERBOSE)
        if verbose:
            print("\nREPO INGESTING COMPLETE")
Exemplo n.º 2
0
def test_find_files():
    root = os.path.join(os.path.dirname(__file__), "testing_files")
    # Get everything
    res1 = list(toolbox.find_files(root))
    fn1 = [r["filename"] for r in res1]
    assert all([
        name in fn1 for name in [
            "2_toolbox.txt", "3_toolbox_3.txt", "4toolbox4.txt",
            "6_toolbox.dat", "toolbox_1.txt", "toolbox_5.csv",
            "txttoolbox.csv", "toolbox_compressed.tar"
        ]
    ])
    # Check paths and no_root_paths
    for res in res1:
        assert res["path"] == os.path.join(root, res["no_root_path"])
        assert os.path.isfile(os.path.join(res["path"], res["filename"]))

    # Get everything (by regex)
    res2 = list(toolbox.find_files(root, "toolbox"))
    fn2 = [r["filename"] for r in res2]
    correct2 = [
        "2_toolbox.txt", "3_toolbox_3.txt", "4toolbox4.txt", "6_toolbox.dat",
        "toolbox_1.txt", "toolbox_5.csv", "txttoolbox.csv",
        "toolbox_compressed.tar"
    ]
    fn2.sort()
    correct2.sort()
    assert fn2 == correct2

    # Get only txt files
    res3 = list(toolbox.find_files(root, "txt$"))
    fn3 = [r["filename"] for r in res3]
    correct3 = [
        "2_toolbox.txt", "3_toolbox_3.txt", "4toolbox4.txt", "toolbox_1.txt"
    ]
    fn3.sort()
    correct3.sort()
    assert fn3 == correct3
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Halogen-Substituted Ionic Liquids",
                "acl": ["public"],
                "source_name": "halogen_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "University of Rochester",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Chaban, Vitaly V. (2016). Halogen-Substituted Ionic Liquids [Data set]. Zenodo. http://doi.org/10.5281/zenodo.165493"],

                "author": [{

                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "Universidade Federal de São Paulo",

                }],

                "license": "https://creativecommons.org/licenses/by/4.0/",
                "collection": "Halogen Substituted Ionic Liquids",
                #"tags": [""],
                "description": "Pre-equilibrated systems for different size for AIMD for Halogen-Substituted Ionic Liquids.",
                "year": 2016,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.165493",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Halogen Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/halogen_ionic_liquids/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 4
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Elemental vacancy diffusion database from high-throughput first-principles calculations for fcc and hcp structures",
                "acl": ["public"],
                "source_name": "nist_fcc_hcp_structures",

                "data_contact": {
                    
                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Angsten, Thomas; Mayeshiba, Tam; Wu, Henry; Morgan, Dane Elemental vacancy diffusion for fcc and hcp structures (2014-08-08) http://hdl.handle.net/11256/76"],

                "author": [{

                    "given_name": "Thomas",
                    "family_name": "Angsten",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Tam",
                    "family_name": "Mayeshiba",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Henry",
                    "family_name": "Wu",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                }],

                #"license": "",  NO LICENSE ON SITE... CONTACT AUTHOR
                "collection": "NIST fcc hcp Structures",
                #"tags": [""],
                "description": "This work demonstrates how databases of diffusion-related properties can be developed from high-throughput ab initio calculations. The formation and migration energies for vacancies of all adequately stable pure elements in both the face-centered cubic (fcc) and hexagonal close packing (hcp) crystal structures were determined using ab initio calculations. For hcp migration, both the basal plane and z-direction nearest-neighbor vacancy hops were considered. Energy barriers were successfully calculated for 49 elements in the fcc structure and 44 elements in the hcp structure.",
                "year": 2014,

                "links": {

                    "landing_page": "http://hdl.handle.net/11256/76",
                    "publication": ["http://dx.doi.org/10.1088/1367-2630/16/1/015018"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "NIST fcc hcp structures - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": ,

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "outcar": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/nist_fcc_hcp_structures/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")
Exemplo n.º 5
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Quantum Chemistry Structures and Properties of 134 kilo Molecules",
                "acl": ['public'],
                "source_name":
                "gdb9_14",
                "citation": [
                    "Raghunathan Ramakrishnan, Pavlo Dral, Matthias Rupp, O. Anatole von Lilienfeld: Quantum Chemistry Structures and Properties of 134 kilo Molecules, Scientific Data 1: 140022, 2014."
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Raghunathan",
                    "family_name": "Ramakrishnan",
                    "institution": "University of Basel"
                }, {
                    "given_name":
                    "Pavlo O.",
                    "family_name":
                    "Dral",
                    "instituition":
                    "Max-Planck-Institut für Kohlenforschung, University of Erlangen-Nuremberg",
                }, {
                    "given_name": "Matthias",
                    "family_name": "Rupp",
                    "instituition": "University of Basel",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc-sa/4.0/",
                "collection":
                "gdb9_14",
                "tags": [
                    "Computational chemistry", "Density functional theory",
                    "Quantum chemistry"
                ],
                "description":
                "133,885 small organic molecules with up to 9 C, O, N, F atoms, saturated with H. Geometries, harmonic frequencies, dipole moments, polarizabilities, energies, enthalpies, and free energies of atomization at the DFT/B3LYP/6-31G(2df,p) level of theory. For a subset of 6,095 constitutional isomers of C7H10O2, energies, enthalpies, and free energies of atomization are provided at the G4MP2 level of theory.",
                "year":
                2014,
                "links": {
                    "landing_page": "http://qmml.org/datasets.html#gdb9-14",
                    "publication": ["http://dx.doi.org/10.1038/sdata.2014.22"],
                    # "data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb9-14.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        index = ""
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "xyz")
        except Exception as e:  #Unable to convert string to float on some files.
            errors += 1  #String is in scientific form e.g. 6.2198*^-6
        comp = record["chemical_formula"]
        if data_file["no_root_path"] == "dsgdb9nsd.xyz":
            start = data_file["filename"].find('_')
            #index is between the underscore and ".xyz"
            index = int(data_file["filename"][start + 1:-4])

        record_metadata = {
            "mdf": {
                "title": "gdb9_14 - " + comp,
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": comp,
                #            "raw": ,
                "links": {
                    # "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb9_14/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "gdb9_14": {
                "index": index,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("TOTAL ERRORS: " + str(errors))
        print("Finished converting")
Exemplo n.º 6
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "KLH Dataset I",
                "acl": ["public"],
                "source_name": "klh_1",

                "data_contact": {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                #"citation": [""],

                "author": [{

                    "given_name": "Yuanxin",
                    "family_name": "Zhu",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Bridget",
                    "family_name": "Carragher",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Robert M",
                    "family_name": "Glaeser",
                    "institution": "University of California, Berkeley",

                },
                {

                    "given_name": "Denis",
                    "family_name": "Fellmann",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Chandrajit",
                    "family_name": "Bajaj",
                    "institution": "University of Texas at Austin,",

                },
                {

                    "given_name": "Marshall",
                    "family_name": "Bern",
                    "institution": "Palo Alto Research Center",

                },
                {

                    "given_name": "Fabrice",
                    "family_name": "Mouche",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Felix",
                    "family_name": "de Haas",
                    "institution": "FEI Company, Eindhoven",

                },
                {

                    "given_name": "Richard J",
                    "family_name": "Hall",
                    "institution": "Imperial College London",

                },
                {

                    "given_name": "David J",
                    "family_name": "Kriegman",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Steven J",
                    "family_name": "Ludtke",
                    "institution": "Baylor College of Medicine",

                },
                {

                    "given_name": "Satya P",
                    "family_name": "Mallick",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Pawel A",
                    "family_name": "Penczek",
                    "institution": "University of Texas-Houston Medical School",

                },
                {

                    "given_name": "Alan M",
                    "family_name": "Roseman",
                    "institution": "MRC Laboratory of Molecular Biology",

                },
                {

                    "given_name": "Fred J",
                    "family_name": "Sigworth",
                    "institution": "Yale University School of Medicine",

                },
                {

                    "given_name": "Niels",
                    "family_name": "Volkmann",
                    "institution": "The Burnham Institute",

                },
                {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                }],

                #"license": "",
                "collection": "Keyhole Limpet Hemocyanin",
                "tags": ["Electron microscopy", "Single-particle reconstruction", "Automatic particle selection", "Image processing", "Pattern recognition"],
                "description": "Manual selection of single particles in images acquired using cryo-electron microscopy (cryoEM) will become a significant bottleneck when datasets of a hundred thousand or even a million particles are required for structure determination at near atomic resolution. Algorithm development of fully automated particle selection is thus an important research objective in the cryoEM field. A number of research groups are making promising new advances in this area. Evaluation of algorithms using a standard set of cryoEM images is an essential aspect of this algorithm development. With this goal in mind, a particle selection \"bakeoff\" was included in the program of the Multidisciplinary Workshop on Automatic Particle Selection for cryoEM. Twelve groups participated by submitting the results of testing their own algorithms on a common dataset. The dataset consisted of 82 defocus pairs of high-magnification micrographs, containing keyhole limpet hemocyanin particles, acquired using cryoEM.",
                "year": 2004,

                "links": {

                    "landing_page": "http://emg.nysbc.org/redmine/projects/public-datasets/wiki/KLH_dataset_I",
                    "publication": ["http://www.sciencedirect.com/science/article/pii/S1047847703002004#!"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "map$"), desc="Processing Files", disable=not verbose):
        with open(os.path.join(data_file["path"], data_file["filename"]), 'r') as raw_in:
            map_data = raw_in.read()
        headers = ["index", "image", "coordinate"]
        for line in parse_tab(map_data, headers=headers, sep=" "):
            ifile_1 = line["image"].replace(".002", ".001")
            ifile_2 = line["image"]
            cfile = line["coordinate"]
            df = pd.read_csv(os.path.join(data_file["path"], cfile), delim_whitespace=True)
            ## Metadata:record
            record_metadata = {
                "mdf": {
    
                    "title": "Keyhole Limpet Hemocyanin 1 - " + cfile,
                    "acl": ["public"],
                    #"composition": ,
    
                    #"tags": ,
                    "description": "Images under exposure1 are near-to-focus (NTF). Images under exposure2 are far-from-focus (FFF).",
                    #"raw": ,
    
                    "links": {
    
                        #"landing_page": ,
                        #"publication": ,
                        #"data_doi": ,
                        #"related_id": ,
    
                        "klh": {
    
                            "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                            "http_host": "https://data.materialsdatafacility.org",
    
                            "path": "/collections/klh_1/" + data_file["no_root_path"] + "/" + cfile,
    
                            },
    
                        "jpg": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_jpeg/" + ifile_1.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_mrc/" + ifile_1,
        
                            },
                        
                        "jpg2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_jpeg/" + ifile_2.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_mrc/" + ifile_2,
        
                            },
                    },
    
                    #"citation": ,
    
                    #"data_contact": {
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #},
    
                    #"author": [{
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #}],
    
                    #"year": ,
    
                },
    
                #"dc": {
    
                #},
    
    
            }
            ## End metadata
    
            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)
    
            # Check if the Validator accepted the record, and stop processing if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if not result["success"]:
                if not dataset_validator.cancel_validation()["success"]:
                    print("Error cancelling validation. The partial feedstock may not be removed.")
                raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes",
                "acl": ["public"],
                "source_name": "cytotoxic_pt_complexes",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Keppler, Bernhard K. (2013/01/10). Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes. Journal of Medicinal Chemistry, 56, 330-344. doi: 10.1021/jm3016427"],

                "author": [{

                    "given_name": "Hristo P.",
                    "family_name": "Varbanov",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Michael A.",
                    "family_name": "Jakupec",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Alexander",
                    "family_name": "Roller",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Frank",
                    "family_name": "Jensen",
                    "email": "*****@*****.**",
                    "institution": "University of Aarhus",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Bernhard K.",
                    "family_name": "Keppler",
                    "institution": "University of Vienna",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cytotoxic Platinum Complexes",
                "tags": ["structure geometry", "series", "resistance", "Herein", "laboratory", "tetraki", "tris", "Relationship", "wb 97x", "mechanism", "cisplatin", "complex", "SW", "Cytotoxic", "calculation", "relationship", "Density Functional Theory", "DFT", "Reliable", "ComplexesOctahedral", "bi", "compound", "Quantitative", "Model", "QSAR investigations", "cytotoxicity", "candidate", "cell line CH 1", "descriptor", "optimization", "QSAR models", "toxicity", "Theoretical Investigations"],
                "description": "Octahedral platinum(IV) complexes are promising candidates in the fight against cancer. In order to rationalize the further development of this class of compounds, detailed studies on their mechanisms of action, toxicity, and resistance must be provided and structure–activity relationships must be drawn. Herein, we report on theoretical and QSAR investigations of a series of 53 novel bis-, tris-, and tetrakis(carboxylato)platinum(IV) complexes, synthesized and tested for cytotoxicity in our laboratories. ",
                "year": 2012,

                "links": {

                    "landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3557934/",
                    "publication": ["https://dx.doi.org/10.1021%2Fjm3016427"],
                    #"data_doi": "",
                    #"related_id": ,

                    "cif": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",

                        "path": "/files/3593325",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "temp_file.cif"), desc="Processing files", disable=not verbose):
        #Temp_file is the same as the real file, but with authors and adresses deleted so that ase can read composition
        #It should only be used for converting purposes
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "cif")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cytotoxic Platinum Complexes - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "cif": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/cytotoxic_pt_complexes/" + "jm3016427_si_002.cif",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 8
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Prediction of Compounds in Different Local SAR Environments using ECP",
                "acl": ["public"],
                "source_name":
                "ecp_sar_environments",
                "data_contact": {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Namasivayam, V., Gupta-Ostermann, D., Balfer, J., Heikamp, K., & Bajorath, J. (2014). Prediction of Compounds in Different Local SAR Environments using ECP [Data set]. Zenodo. http://doi.org/10.5281/zenodo.8626"
                ],
                "author": [{
                    "given_name":
                    "Vigneshwaran",
                    "family_name":
                    "Namasivayam",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Disha",
                    "family_name":
                    "Gupta-Ostermann",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jenny",
                    "family_name":
                    "Balfer",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Kathrin",
                    "family_name":
                    "Heikamp",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "SAR Environments using ECP",
                #"tags": [""],
                "description":
                "Active compounds can participate in different local structure–activity relationship (SAR) environments and introduce different degrees of local SAR discontinuity, depending on their structural and potency relationships in data sets. Such SAR features have thus far mostly been analyzed using descriptive approaches, in particular, on the basis of activity landscape modeling. However, compounds in different local SAR environments have not yet been predicted. Herein, we adapt the emerging chemical patterns (ECP) method, a machine learning approach for compound classification, to systematically predict compounds with different local SAR characteristics. ECP analysis is shown to accurately assign many compounds to different local SAR environments across a variety of activity classes covering the entire range of observed local SARs.",
                "year":
                2014,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.8626",
                    "publication":
                    ["http://pubs.acs.org/doi/abs/10.1021/ci500147b"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/8626/files/Data_sets.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "SAR Environments using ECP - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ecp_sar_environments/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")
Exemplo n.º 9
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation",
                "acl": ["public"],
                "source_name":
                "cyclopropenes",
                "data_contact": {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Paton, R., & Jackson, K. (2016). Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation [Data set]. Zenodo. http://doi.org/10.5281/zenodo.53736"
                ],
                "author": [{
                    "given_name": "Tomislav",
                    "family_name": "Rovis",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Kelvin E.",
                    "family_name": "Jackson",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Natthawat",
                    "family_name": "Semakul",
                    "institution": "Colorado State University",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Diastereoselective Benzamidation of Cyclopropenes",
                "tags":
                ["DFT", "Gaussian", "Transition State", "Stereoselectivity"],
                "description":
                "The diastereoselective coupling of O-substituted arylhydroxamates and cyclopropenes mediated by Rh(III) catalysis was successfully developed. Through ligand development, the diastereoselectivity of this reaction was improved using a heptamethylindenyl (Ind*) ligand, which has been rationalized using quantum chemical calculations. In addition, the nature of the O-substituted ester of benzhydroxamic acid proved important for high diastereoselectivity. This transformation tolerates a variety of benzamides and cyclopropenes that furnish cyclopropa[c]dihydroisoquinolones with high diastereocontrol, which could then be easily transformed into synthetically useful building blocks for pharmaceuticals and bio-active molecules.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://zenodo.org/record/53736#.WWWmjMaZPFQ",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlelanding/2016/sc/c6sc02587k#!divAbstract"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".out$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "gaussian-out")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Diastereoselective Benzamidation of Cyclopropenes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "out": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cyclopropenes/" + data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis and structural data of a Fe-base sodium metaphosphate compound, NaFe(PO3)3",
                "acl": ['public'],
                "source_name":
                "fe_na_metaphosphate_structure",
                "citation": [
                    "Lin, Xinghao et al. “Synthesis and Structural Data of a Fe-Base Sodium Metaphosphate Compound, NaFe(PO3)3.” Data in Brief 4 (2015): 217–221. PMC. Web. 30 June 2017."
                ],
                "data_contact": {
                    "given_name": "Yanming",
                    "family_name": "Zhao",
                    "email": "nc.ude.tucs@myoahz",
                    "instituition": "South China University of Technology"
                },
                "author": [{
                    "given_name":
                    "Xinghao",
                    "family_name":
                    "Lin",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Yanming",
                    "family_name":
                    "Zhao",
                    "email":
                    "nc.ude.tucs@myoahz",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Youzhong",
                    "family_name":
                    "Dong",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Quan",
                    "family_name":
                    "Kuang",
                    "instituition":
                    "South China University of Technology"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Fe-base sodium metaphosphate Synthesis and Structure",
                # "mdf-tags": ,
                "description":
                "In this data article, the synthesized process of this metaphosphate compound and the morphology of the obtained sample will be provided. The high-power XRD Rietveld refinement is applied to determine the crystal structure of this metaphosphate compound and the refinement result including the main refinement parameters, atomic coordinate and some important lattace parameters are stored in the cif file.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4510540/",

                    #  "publication": ,
                    # "data_doi": "",

                    #  "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://www.ncbi.nlm.nih.gov",
                        "path": "/pmc/articles/PMC4510540/bin/mmc2.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Synthesis and Structure of - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "mdescription": ,
                "composition":
                record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/fe_na_metaphosphate_structure/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 11
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Platinum pyridine cations: the DFT optimized geometries",
                "acl": ["public"],
                "source_name":
                "pt_pyridine_cations",
                "data_contact": {
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Markov, A. (2015). Platinum pyridine cations: the DFT optimized geometries [Data set]. Zenodo. http://doi.org/10.5281/zenodo.31335"
                ],
                "author": [{
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Platinum Pyridine Cations",
                "tags": ["platinum", "dft", "computational chemistry"],
                "description":
                "The geometries were optimized with the hybrid M06 functional, the mDZP all-electron basis set for platinum atom, and the def2-TZVP basis set for light atoms.",
                "year":
                2015,
                "links": {
                    "landing_page": "http://doi.org/10.5281/zenodo.31335",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Platinum Pyridine Cations - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/pt_pyridine_cations/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 12
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents",
                "acl": ["public"],
                "source_name":
                "cytochrome_qsar",
                "data_contact": {
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Kulkarni, Vithal M. (1999/03/22). Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents. Journal of Chemical Information and Computer Sciences, 39, 204-210. doi: 10.1021/ci9800413"
                ],
                "author": [{
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                }, {
                    "given_name": "Vithal M.",
                    "family_name": "Kulkarni",
                    "institution": "University of Mumbai",
                }],

                # "license": "",
                "collection":
                "Cytochrome QSAR",
                #"tags": [""],
                "description":
                "Molecular modeling was performed by a combined use of conformational analysis and 3D-QSAR methods to distinguish structural attributes common to a series of azole antifungal agents. Apex-3D program was used to recognize the common biophoric structural patterns of 13 diverse sets of azole antifungal compounds demonstrating different magnitudes of biological activity. Apex-3D identified three common biophoric features significant for activity:  N1 atom of azole ring, the aromatic ring centroid 1, and aromatic ring centroid 2. A common biophore model proposed from the Apex-3D analysis can be useful for the design of novel cytochrome P-45014αDM inhibiting antifungal agents.",
                "year":
                1999,
                "links": {
                    "landing_page":
                    "ftp://ftp.ics.uci.edu/pub/baldig/learning/Cytochrome/",
                    "publication":
                    ["http://pubs.acs.org/doi/full/10.1021/ci9800413"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "sdf")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Cytochrome QSAR - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cytochrome_qsar/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 13
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies",
                "acl": ["public"],
                "source_name":
                "fe_cr_al_oxidation",
                "citation": [
                    "Bunn, Jonathan K.; Fang, Randy L.; Albing, Mark R.; Mehta, Apurva; Kramer, Matt J.; Besser, Matt F.; Hattrick-Simpers, Jason R High-throughput Diffraction and Spectroscopic Data for Fe-Cr-Al Oxidation Studies (2015-06-28)"
                ],
                "data_contact": {
                    "given_name": "Jason",
                    "family_name": "Hattrick-Simpers",
                    "email": "*****@*****.**",
                    "institution": "University of South Carolina Columbia",
                },

                #            "author": ,

                #            "license": ,
                "collection":
                "Fe-Cr-Al Oxidation Studies",
                #            "tags": ,
                "description":
                "The data set was used to evaluate a Fe-Cr-Al thin film samples in a narrow composition region centered on known bulk compositions. The data are composed of two individual studies. The first set of data is a low temperature oxidation study on composition spread sampled performed at SLAC Beamline 1-5. Only the integrated and background subtracted 1-D spectra are included, the 2-D data and calibrations are available upon request. The second set of data was taken during high temperature oxidation of selected samples. These data are exclusively Raman data with values taken as a function of total oxidation time.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/836",
                    "publication":
                    "http://dx.doi.org/10.1088/0957-4484/26/27/274003",
                    "data_doi": "http://hdl.handle.net/11256/836",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    with open(
            os.path.join(
                input_path, "Fe_Cr_Al_data",
                "Point Number to Composition.csv")) as composition_file:
        composition_list = list(parse_tab(composition_file.read()))
        compositions = {}
        for comp in composition_list:
            compositions[int(comp.pop("Sample Number"))] = comp
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".txt"),
                          desc="Processing files",
                          disable=not verbose):
        temp_k = data_file["filename"].split(" ")[0]
        point_num = int(data_file["filename"].replace(
            "_", " ").split(" ")[-1].split(".")[0])
        record_metadata = {
            "mdf": {
                "title":
                "Fe-Cr-Al Oxidation - " + data_file["filename"].split(".")[0],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition":
                "FeCrAl",
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,
                    "csv": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/" + data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "fe_cr_al_oxidation": {
                "temperature_k":
                float(temp_k) if temp_k != "Room" else 293.15,  # Avg room temp
                "atomic_composition_percent": {
                    "Fe": float(compositions[point_num]["Fe at. %"]),
                    "Cr": float(compositions[point_num]["Cr at. %"]),
                    "Al": float(compositions[point_num]["Al at. %"])
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            dataset_validator.cancel_validation()
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    if verbose:
        print("Finished converting")
Exemplo n.º 14
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly",
                "acl": ["public"],
                "source_name": "pt_cyanometallates",

                "data_contact": {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Schneider, Leon; Sivchik, Vasily; Chung, Kun-you; Chen, Yi-Ting; Karttunen, Antti J.; Chou, Pi-Tai; Koshevoy, Igor O. (2017): Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly. ACS Publications. https://doi.org/10.1021/acs.inorgchem.7b00006"],

                "author": [{

                    "given_name": "Leon",
                    "family_name": "Schneider",
                    "institution": "Julius-Maximilians-Universität",

                },
                {

                    "given_name": "Vasily",
                    "family_name": "Sivchik",
                    "institution": "University of Eastern Finland",

                },
                {

                    "given_name": "Kun-you",
                    "family_name": "Chung",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Yi-Ting",
                    "family_name": "Chen",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Antti J.",
                    "family_name": "Karttunen",
                    "email": "*****@*****.**",
                    "institution": "Aalto University",

                },
                {

                    "given_name": "Pi-Tai",
                    "family_name": "Chou",
                    "email": "*****@*****.**",
                    "institution": "National Taiwan University",
                    "orcid": "orcid.org/0000-0002-8925-7747",

                },
                {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",
                    "orcid": "orcid.org/0000-0003-4380-1302",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cyclometalated Platinum(II) Cyanometallates",
                "tags": ["coordination geometries", "compound", "luminescence studies", "Coordination Self-Assembly", "Luminescent Blocks", "emission performance", "cyclometalated fragment", "chromophoric cycloplatinated metalloligands", "frontier orbitals", "complexes exhibit", "tetranuclear complexes", "time-dependent density", "η 1", "photophysical behavior", "cyanide-bridged heterometallic aggregates", "squarelike arrangement", "F 2 ppy", "phosphine motifs", "Cu", "M Pt LCT contribution", "alternative cluster topology", "metal ions", "tolpy", "10 fragments", "Ag", "room-temperature phosphorescence", "HF 2 ppy"],
                "description": "A family of cyanide-bridged heterometallic aggregates has been constructed of the chromophoric cycloplatinated metalloligands and coordinatively unsaturated d10 fragments {M(PPh3)n}. The tetranuclear complexes of general composition [Pt(C^N)(CN)2M(PPh3)2]2 [C^N = ppy, M = Cu (1), Ag (2); C^N = tolpy (Htolpy = 2-(4-tolyl)-pyridine), M = Cu (4), Ag (5); C^N = F2ppy (HF2ppy = 2-(4, 6-difluorophenyl)-pyridine), M = Cu (7), Ag (8)] demonstrate a squarelike arrangement of the molecular frameworks, which is achieved due to favorable coordination geometries of the bridging ligands and the metal ions. Variation of the amount of the ancillary phosphine (for M = Ag) afforded compounds [Pt(C^N)(CN)2Ag(PPh3)]2 (C^N = ppy, 3; C^N = tolpy, 6); for the latter one an alternative cluster topology, stabilized by the Pt–Ag metallophilic and η1-Cipso(C^N)–Ag bonding, was observed.",
                "year": 2017,

                "links": {

                    "landing_page": "https://figshare.com/collections/Cyclometalated_Platinum_II_Cyanometallates_Luminescent_Blocks_for_Coordination_Self-Assembly/3730237",
                    "publication": ["https://doi.org/10.1021/acs.inorgchem.7b00006"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"), desc="Processing Files", disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    dtype: {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/pt_cyanometallates/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 15
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "The Coherent X-ray Imaging Data Bank",
                "acl": ["public"],
                "source_name":
                "cxidb",
                "citation": [
                    "Maia, F. R. N. C. The Coherent X-ray Imaging Data Bank. Nat. Methods 9, 854–855 (2012)."
                ],
                "data_contact": {
                    "given_name": "Filipe",
                    "family_name": "Maia",
                    "email": "*****@*****.**",
                    "institution": "Lawrence Berkeley National Laboratory",

                    # IDs
                },
                "author": {
                    "given_name": "Filipe",
                    "family_name": "Maia",
                    "institution": "Lawrence Berkeley National Laboratory",

                    # IDs
                },

                #            "license": ,
                "collection":
                "CXIDB",
                "tags": ["x-ray", "coherent"],
                "description":
                "A new database which offers scientists from all over the world a unique opportunity to access data from Coherent X-ray Imaging (CXI) experiments.",
                "year":
                2012,
                "links": {
                    "landing_page": "http://www.cxidb.org/",

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for dir_data in tqdm(find_files(input_path,
                                    file_pattern="json",
                                    verbose=verbose),
                         desc="Processing metadata",
                         disable=not verbose):
        with open(os.path.join(dir_data["path"],
                               dir_data["filename"])) as file_data:
            cxidb_data = json.load(file_data)
        record_metadata = {
            "mdf": {
                "title": cxidb_data["citation_title"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,

                #            "composition": ,
                "raw": json.dumps(cxidb_data),
                "links": {
                    "landing_page":
                    cxidb_data["url"],
                    "publication": [
                        cxidb_data.get("citation_DOI", None),
                        cxidb_data.get("entry_DOI", None)
                    ],
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Mg-X-Diffusion",
                "acl": ['public'],
                "source_name":
                "trinkle_mg_x_diffusion",
                "citation": [
                    "Citation for dataset Mg-X-Diffusion with author(s): Dallas Trinkle, Ravi Agarwal"
                ],
                "data_contact": {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "email": "*****@*****.**",
                    "institution":
                    "University of Illinois at Urbana-Champaign",
                },
                "author": [{
                    "given_name":
                    "Dallas",
                    "family_name":
                    "Trinkle",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "University of Illinois at Urbana-Champaign"
                }, {
                    "given_name":
                    "Ravi",
                    "family_name":
                    "Agarwal",
                    "institution":
                    "University of Illinois at Urbana-Champaign"
                }],

                #"license": "",
                "collection":
                "Mg-X Diffusion Dataset",
                #            "tags": ,

                #"description": ,
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",

                    # "publication": [""],
                    #"data_doi": "",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    total_errors = 0
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]),
                "vasp-out")
        except Exception as e:
            #print("Error on: " + data_file["path"] + "/" + data_file["filename"] + "\n" + repr(e))
            total_errors += 1
        record_metadata = {
            "mdf": {
                "title": "Mg-X Diffusion - ",
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,

                #"composition": ,
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/mg-x/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }
        try:
            record_metadata["mdf"]["composition"] = record["mdf"][
                "chemical_formula"]
            record_metadata["mdf"]["title"] += record["mdf"][
                "chemical_formula"]
        except:
            #parse_ase unable to read composition of record 1386: https://data.materialsdatafacility.org/collections/mg-x/Elements/Eu/Mg-X_Eu/OUTCAR
            #Placing in the correct material composition
            record_metadata["mdf"]["composition"] = "EuMg149"
            record_metadata["mdf"]["title"] += "EuMg149"

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Total errors: " + str(total_errors))
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "JCAP XPS Spectral Database",
                "acl": ["public"],
                "source_name": "jcap_xps_spectral_db",
                "citation": ["http://solarfuelshub.org/xps-spectral-database"],
                "data_contact": {
                    "given_name": "Harry",
                    "family_name": "Atwater",
                    "email": "*****@*****.**",
                    "institution":
                    "Joint Center for Artificial Photosynthesis",
                },

                #            "author": ,

                #            "license": ,
                "collection": "JCAP XPS Spectral DB",
                "tags": ["xps", "spectra"],
                "description":
                "The JCAP High Throughput Experimentation research team uses combinatorial methods to quickly identify promising light absorbers and catalysts for solar-fuel devices. Pure-phase materials — including metal oxides, nitrides, sulfides, oxinitrides, and other single- and mixed-metal materials — are prepared using multiple deposition techniques (e.g., physical vapor deposition, inkjet printing, and micro-fabrication) on various substrates. High-resolution X-ray photoelectron spectroscopy (XPS) spectra for materials that have been characterized to date are made available here as part of JCAP's Materials Characterization Standards (MatChS) database.",
                #            "year": ,
                "links": {
                    "landing_page":
                    "http://solarfuelshub.org/xps-spectral-database",

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for data_file in tqdm(find_files(input_path, ".json"),
                          desc="Processing files",
                          disable=not verbose):
        with open(os.path.join(data_file["path"],
                               data_file["filename"])) as in_file:
            data = json.load(in_file)
        record_metadata = {
            "mdf": {
                "title": "JCAP Spectra - " + data["xps_region"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": data.pop("material"),
                #            "raw": ,
                "links": {
                    "landing_page": data.pop("link"),

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                "year": data.pop("year"),

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }
        data.pop("data")
        record_metadata["jcap_xps_spectral_db"] = data

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")
Exemplo n.º 18
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC",
                "acl": ["public"],
                "source_name":
                "ohmic_si_c_contacts",
                "data_contact": {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Fashandi, Hossein, Dahlqvist, Martin, Lu, Jun, Palisaitis, Justinas, Simak, Sergei I, Abrikosov, Igor A, … Eklund, Per. (2017). Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC [Data set]. Zenodo. http://doi.org/10.5281/zenodo.376969"
                ],
                "author": [{
                    "given_name":
                    "Hossein",
                    "family_name":
                    "Fashandi",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Martin",
                    "family_name":
                    "Dahlqvist",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Jun",
                    "family_name":
                    "Lu",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Justinas",
                    "family_name":
                    "Palisaitis",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Sergei I",
                    "family_name":
                    "Simak",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Igor A",
                    "family_name":
                    "Abrikosov",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Johanna",
                    "family_name":
                    "Rosen",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Lars",
                    "family_name":
                    "Hultman",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Mike",
                    "family_name":
                    "Andersson",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Anita Lloyd",
                    "family_name":
                    "Spetz",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ohmic Contact to SiC",
                "tags": [
                    "electronic structure calculations", "MAX phase", "XRD",
                    "I/V measurement", "spin-orbit coupling",
                    "density of states", "Nanoscale materials",
                    "Structure of solids and liquids",
                    "Surfaces, interfaces and thin films",
                    "Two-dimensional materials"
                ],
                "description":
                "The large class of layered ceramics encompasses both van der Waals (vdW) and non-vdW solids. While intercalation of noble metals in vdW solids is known, formation of compounds by incorporation of noble-metal layers in non-vdW layered solids is largely unexplored. Here, we show formation of Ti3AuC2 and Ti3Au2C2 phases with up to 31% lattice swelling by a substitutional solid-state reaction of Au into Ti3SiC2 single-crystal thin films with simultaneous out-diffusion of Si. Ti3IrC2 is subsequently produced by a substitution reaction of Ir for Au in Ti3Au2C2. These phases form Ohmic electrical contacts to SiC and remain stable after 1,000 h of ageing at 600 °C in air. The present results, by combined analytical electron microscopy and ab initio calculations, open avenues for processing of noble-metal-containing layered ceramics that have not been synthesized from elemental sources, along with tunable properties such as stable electrical contacts for high-temperature power electronics or gas sensors.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.376969",
                    "publication": [
                        "http://www.nature.com/nmat/journal/v16/n8/full/nmat4896.html"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(OUTCAR|cif$)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "cif":
            ftype = "cif"
        else:
            ftype = "vasp-out"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ohmic Contact to SiC - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ohmic_si_c_contacts/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")
Exemplo n.º 19
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Pore Shape Modification of a Microporous Metal–Organic Framework Using High Pressure: Accessing a New Phase with Oversized Guest Molecules",
                "acl": ['public'],
                "source_name":
                "porous_mof",
                "citation": [
                    "The University of Edinburgh School of Chemistry. (2016). Pore Shape Modification of a Microporous Metal-Organic Frame-work Using High Pressure: Accessing a New Phase with Oversized Guest Molecules, [dataset]. http://dx.doi.org/10.7488/ds/371."
                ],
                "data_contact": {
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                },
                "author": [{
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Scott C.",
                    "family_name": "McKellar",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Jorge",
                    "family_name": "Sotelo",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Alex",
                    "family_name": "Greenaway",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "John P. S.",
                    "family_name": "Mowat",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "Odin",
                    "family_name": "Kvam",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Carole A.",
                    "family_name": "Morrison",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Paul A.",
                    "family_name": "Wright",
                    "instituition": "University of St Andrews"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/legalcode",
                "collection":
                "Porous Metal-Organic-Framework",
                #"tags": ,
                "description":
                "Pressures up to 0.8 GPa have been used to squeeze a range of sterically “oversized” C5–C8 alkane guest molecules into the cavities of a small-pore Sc-based metal–organic framework. Guest inclusion causes a pronounced reorientation of the aromatic rings of one-third of the terephthalate linkers, which act as “torsion springs”, resulting in a fully reversible change in the local pore structure. The study demonstrates how pressure-induced guest uptake can be used to investigate framework flexibility relevant to “breathing” behavior and to understand the uptake of guest molecules in MOFs relevant to hydrocarbon separation.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "http://datashare.is.ed.ac.uk/handle/10283/942",
                    "publication":
                    ["http://dx.doi.org/10.1021/acs.chemmater.5b02891"],
                    "data_doi":
                    "http://dx.doi.org//10.7488/ds/371",

                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host":
                        "http://datashare.is.ed.ac.uk",
                        "path":
                        "/download/10283/942/Pore_Shape_Modification_of_a_Microporous_Metal-Organic_Frame-work_Using_High_Pressure:_Accessing_a_New_Phase_with_Oversized_Guest_Molecules.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Metal-Organic-Frame-Work - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/porous_mof/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 20
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Machine learning of molecular electronic properties in chemical compound space",
                "acl": ['public'],
                "source_name":
                "gdb7_13",
                "citation": [
                    "Gr\'egoire Montavon, Matthias Rupp, Vivekanand Gobre, Alvaro Vazquez-Mayagoitia, Katja Hansen, Alexandre Tkatchenko, Klaus-Robert M\"uller, O. Anatole von Lilienfeld: Machine learning of molecular electronic properties in chemical compound space, New Journal of Physics, 15(9): 095003, IOP Publishing, 2013.DOI: 10.1088/1367-2630/15/9/095003"
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Grégoire",
                    "family_name": "Montavon",
                    "institution": "Technical University of Berlin"
                }, {
                    "given_name":
                    "Matthias",
                    "family_name":
                    "Rupp",
                    "instituition":
                    "Institute of Pharmaceutical Sciences, ETH Zurich",
                }, {
                    "given_name":
                    "Vivekanand",
                    "family_name":
                    "Gobre",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name": "Alvaro",
                    "family_name": "Vazquez-Mayagoitia",
                    "instituition": "Argonne National Laboratory",
                }, {
                    "given_name":
                    "Katja",
                    "family_name":
                    "Hansen",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name":
                    "Alexandre",
                    "family_name":
                    "Tkatchenko",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft, Pohang University of Science and Technology",
                }, {
                    "given_name":
                    "Klaus-Robert",
                    "family_name":
                    "Müller",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Technical University of Berlin, Korea University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/3.0/",
                "collection":
                "gdb7_13",
                #            "tags": ,
                "description":
                "7k small organic molecules, in their ground state, 14 combinations of properties and theory levels. 7,211 small organic molecules composed of H, C, N, O, S, Cl, saturated with H, and up to 7 non-H atoms. Molecules relaxed using DFT with PBE functional. Properties are atomization energy (DFT/PBE0), averaged polarizability (DFT/PBE0, SCS), H**O and LUMO eigenvalues (GW, DFT/PBE0, ZINDO), and, ionization potential, electron affinity, first excitation energy, frequency of maximal absorption (all ZINDO).",
                "year":
                2013,
                "links": {
                    "landing_page":
                    "http://qmml.org/datasets.html#gdb7-13",
                    "publication":
                    ["http://dx.doi.org/10.1088/1367-2630/15/9/095003"],
                    #"data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb7-13.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        record_metadata = {
            "mdf": {
                "title": "gdb7_13 " + data_file["filename"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb7_13/gdb7_13_data/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 21
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Uncertainty quantification for quantum chemical models of complex reaction networks	",
                "acl": ["public"],
                "source_name": "reiher_quantum_chemical_models",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["(2016). Uncertainty quantification for quantum chemical models of complex reaction networks. , 195, 497-520. 10.1039/C6FD00144K"],

                "author": [{

                    "given_name": "Jonny",
                    "family_name": "Proppe",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Tamara",
                    "family_name": "Husch",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Gregor N.",
                    "family_name": "Simma",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                }],

                "license": "http://creativecommons.org/licenses/by/3.0/",
                "collection": "Reiher Quantum Chemical Models",
                #"tags": [""],
                "description": "For the quantitative understanding of complex chemical reaction mechanisms, it is, in general, necessary to accurately determine the corresponding free energy surface and to solve the resulting continuous-time reaction rate equations for a continuous state space. For a general (complex) reaction network, it is computationally hard to fulfill these two requirements. However, it is possible to approximately address these challenges in a physically consistent way. On the one hand, it may be sufficient to consider approximate free energies if a reliable uncertainty measure can be provided. On the other hand, a highly resolved time evolution may not be necessary to still determine quantitative fluxes in a reaction network if one is interested in specific time scales. In this paper, we present discrete-time kinetic simulations in discrete state space taking free energy uncertainties into account.",
                "year": 2016,

                "links": {

                    "landing_page": "http://pubs.rsc.org/en/content/articlelanding/fd/2016/c6fd00144k#!divAbstract",
                    "publication": ["http://pubs.rsc.org/doi/c6fd90075e"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz$"), desc="Processing files", disable=not verbose):
        if "PaxHeaders" in data_file["path"]:
            continue
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Reiher Quantum Chemical Models - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/reiher_quantum_chemical_models/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Neighborhood Behavior:  A Useful Concept for Validation of “Molecular Diversity” Descriptors",
                "acl": ['public'],
                "source_name": "qsar_molecular_diversity",
                "citation": ["David E Patterson, Richard D Cramer, Allan M Ferguson, Robert D Clark, Laurence W Weinberger. Neighbourhood Behaviour: A Useful Concept for Validation of \"Molecular Diversity\" Descriptors. J. Med. Chem. 1996 (39) 3049 - 3059."],
                "data_contact": {
    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
    
                    },
    
                "author": [{
                    
                    "given_name": "David E.",
                    "family_name": "Patterson",
                    
                    },
                    {
                    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
                    
                    },
                    {
                    
                    "given_name": "Allan M.",
                    "family_name": "Ferguson",
                    
                    },
                    {
                    
                    "given_name": "Robert D.",
                    "family_name": "Clark",
                    
                    },
                    {
                    
                    "given_name": "Laurence E.",
                    "family_name": "Weinberger",
                    
                    }],
    
              #  "license": "",
    
                "collection": "QSAR Molecular Diversity",
                #"tags": ,
    
                "description": "If a molecular descriptor is to be a valid and useful measure of “similarity” in drug discovery, a plot of differences in its values vs differences in biological activities for a set of related molecules will exhibit a characteristic trapezoidal distribution enhancement, revealing a “neighborhood behavior” for the descriptor. Applying this finding to 20 datasets allows 11 molecular diversity descriptors to be ranked by their validity for compound library design",
                "year": 1996,
    
                "links": {
    
                    "landing_page": "ftp://ftp.ics.uci.edu/pub/baldig/learning/Patterson/",
    
                    "publication": ["http://pubs.acs.org/doi/abs/10.1021/jm960290n"],
                  #  "data_doi": ,
    
                   # "related_id": ,
    
                    # data links: {
                    
                        #"globus_endpoint": ,
                        #"http_host": ,
    
                        #"path": ,
                        #}
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        record_metadata = {
            "mdf": {
                "title": "QSAR Molecular Diversity - " + record["chemical_formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["chemical_formula"],
               # "raw": json.dumps(record),
    
                "links": {
    #                "landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "sdf": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/qsar_molecular_diversity/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 23
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption",
                "acl": ["public"],
                "source_name":
                "ru_pt_complexes",
                "data_contact": {
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Shi, Pengfei; Coe, Benjamin J.; Sánchez, Sergio; Wang, Daqi; Tian, Yupeng; Nyk, Marcin; Samoc, Marek (2015): Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b02089 Retrieved: 15:54, Jul 27, 2017 (GMT)"
                ],
                "author": [{
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                }, {
                    "given_name": "Benjamin J.",
                    "family_name": "Coe",
                    "email": "*****@*****.**",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Sergio",
                    "family_name": "Sánchez",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Daqi",
                    "family_name": "Wang",
                    "institution": "Liaocheng University",
                }, {
                    "given_name": "Yupeng",
                    "family_name": "Tian",
                    "institution": "Anhui University",
                }, {
                    "given_name": "Marcin",
                    "family_name": "Nyk",
                    "institution": "Wrocław University of Technology",
                }, {
                    "given_name": "Marek",
                    "family_name": "Samoc",
                    "institution": "Wrocław University of Technology",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "Ru Pt Heteropolymetallic Complexes",
                "tags": [
                    "Heteropolymetallic Complexes", "850 nm", "834 nm",
                    "polymetallic species", "Pt coordination",
                    "spectra change", "moietie", "qpy", "MLCT",
                    "2 PA activities", "complex", "301 GM", "PtII", "RuII",
                    "523 GM", "heptanuclear RuPt 6", "absorption bands"
                ],
                "description":
                "New trinuclear RuPt2 and heptanuclear RuPt6 complex salts are prepared by attaching PtII 2,2′:6′,2″-terpyridine (tpy) moieties to RuII 4,4′:2′,2″:4″,4‴-quaterpyridine (qpy) complexes. Characterization includes single crystal X-ray structures for both polymetallic species. The visible absorption bands are primarily due to RuII → qpy metal-to-ligand charge-transfer (MLCT) transitions, according to time-dependent density functional theory (TD-DFT) calculations. These spectra change only slightly on Pt coordination, while the orange-red emission from the complexes shows corresponding small red-shifts, accompanied by decreases in intensity. Cubic molecular nonlinear optical behavior has been assessed by using Z-scan measurements. These reveal relatively high two-photon absorption (2PA) cross sections σ2, with maximal values of 301 GM at 834 nm (RuPt2) and 523 GM at 850 nm (RuPt6) when dissolved in methanol or acetone, respectively. Attaching PtII(tpy) moieties triples or quadruples the 2PA activities when compared with the RuII-based cores.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://figshare.com/collections/Uniting_Ruthenium_II_and_Platinum_II_Polypyridine_Centers_in_Heteropolymetallic_Complexes_Giving_Strong_Two_Photon_Absorption/2204182",
                    "publication":
                    ["https://doi.org/10.1021/acs.inorgchem.5b02089"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ru Pt Heteropolymetallic Complexes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ru_pt_complexes/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Molecular architecture of the yeast Mediator complex",
                "acl": ["public"],
                "source_name": "yeast_mediator_complex",

                "data_contact": {

                    "given_name": "Benjamin",
                    "family_name": "Webb",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Robinson, Philip J, Trnka, Michael J, Pellarin, Riccardo, Greenberg, Charles H, Bushnell, David A, Davis, Ralph, … Kornberg, Roger D. (2015). Molecular architecture of the yeast Mediator complex [Data set]. eLife. Zenodo. http://doi.org/10.5281/zenodo.802915"],

                "author": [{

                    "given_name": "Philip J",
                    "family_name": "Robinson",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Michael J",
                    "family_name": "Trnka",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Charles H",
                    "family_name": "Greenberg",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "David A",
                    "family_name": "Bushnell",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Ralph",
                    "family_name": "Davis",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Alma L",
                    "family_name": "Burlingame",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Roger D",
                    "family_name": "Kornberg",
                    "institution": "Stanford University",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "Yeast Mediator Complex",
                "tags": ["Integrative Modeling Platform (IMP)", "Chemical crosslinks", "PMI", "X-ray crystallography"],
                "description": "The 21-subunit Mediator complex transduces regulatory information from enhancers to promoters, and performs an essential role in the initiation of transcription in all eukaryotes. This repository contains files used in the 3-D modeling of the entire Mediator complex, using an integrative modeling approach that combines information from chemical cross-linking and mass spectrometry; X-ray crystallography; homology modeling; and cryo-electron microscopy.",
                "year": 2015,

                "links": {

                    "landing_page": "https://zenodo.org/record/802915",
                    "publication": ["https://doi.org/10.7554/eLife.08719", "https://github.com/integrativemodeling/mediator/tree/v1.0.3"],
                    "data_doi": "https://doi.org/10.5281/zenodo.802915",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Yeast Mediator Complex - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/yeast_mediator_complex/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 25
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
        "mdf": {
            "title": "Materials Commons Data",
            "acl": ["public"],
            "source_name": "materials_commons",
            "citation": ["Puchala, B., Tarcea, G., Marquis, E.A. et al. JOM (2016) 68: 2035. doi:10.1007/s11837-016-1998-7"],
            "data_contact": {

                "given_name": "Brian",
                "family_name": "Puchala",

                "email": "*****@*****.**",
                "institution": "University of Michigan",
                "orcid": "https://orcid.org/0000-0002-2461-6614"

                },

            "author": [{

                "given_name": "Brian",
                "family_name": "Puchala",

                "email": "*****@*****.**",
                "institution": "University of Michigan",
                "orcid": "https://orcid.org/0000-0002-2461-6614"

                },
                {

                "given_name": "Glenn",
                "family_name": "Tarcea",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Emmanuelle",
                "family_name": "Marquis",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Margaret",
                "family_name": "Hedstrom",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Hosagrahar",
                "family_name": "Jagadish",

                "institution": "University of Michigan",

                },
                {

                "given_name": "John",
                "family_name": "Allison",

                "institution": "University of Michigan",

                }],

#            "license": ,

            "collection": "Materials Commons",
            "tags": ["materials"],

            "description": "A platform for sharing research data.",
            "year": 2016,

            "links": {

                "landing_page": "https://materialscommons.org/mcpub/",

                "publication": "https://dx.doi.org/10.1007/s11837-016-1998-7",
#                "dataset_doi": ,

#                "related_id": ,

                # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

#            "mrr": ,

            "data_contributor": {
                "given_name": "Jonathon",
                "family_name": "Gaff",
                "email": "*****@*****.**",
                "institution": "The University of Chicago",
                "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")


    dataset_validator = Validator(dataset_metadata)


    # Get the data
    for dir_data in tqdm(find_files(input_path, file_pattern="json", verbose=verbose), desc="Processing metadata", disable= not verbose):
        with open(os.path.join(dir_data["path"], dir_data["filename"])) as file_data:
            mc_data = json.load(file_data)
        record_metadata = {
        "mdf": {
            "title": mc_data["title"],
            "acl": ["public"],

            "tags": mc_data["keywords"],
            "description": mc_data["description"],
            
#            "composition": ,
#            "raw": ,

            "links": {
                "landing_page": "https://materialscommons.org/mcpub/#/details/" + mc_data["id"],

                "publication": mc_data["doi"],
#                "dataset_doi": ,

#                "related_id": ,

                # data links: {
 
                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },

#            "citation": ,
#            "data_contact": {

#                "given_name": ,
#                "family_name": ,

#                "email": ,
#                "institution":,

                # IDs
#                },

#            "author": ,

#            "license": mc_data["license"]["link"],
#            "collection": ,
#            "data_format": ,
#            "data_type": ,
            "year": int(mc_data.get("published_date", "0000")[:4]),

#            "mrr":

#            "processing": ,
#            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])


    if verbose:
        print("Finished converting")
Exemplo n.º 26
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                'Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys"',
                "acl": ["public"],
                "source_name":
                "quinary_alloys",
                "citation": [
                    'Christofidou, K. A., Jones, N. G., Pickering, E. J., Flacau, R., Hardy, M. C., & Stone, H. J. Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys" [Dataset]. https://doi.org/10.17863/CAM.705'
                ],
                "data_contact": {
                    "given_name": "Howard",
                    "family_name": "Stone",
                    "email": "*****@*****.**",
                    "institution": "University of Cambridge"
                },
                "author": [{
                    "given_name": "Howard",
                    "family_name": "Stone",
                    "email": "*****@*****.**",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Katerina",
                    "family_name": "Christofidou",
                    "institution": "University of Cambridge",
                    "orcid": "https://orcid.org/0000-0002-8064-5874"
                }, {
                    "given_name": "Nicholas",
                    "family_name": "Jones",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Edward",
                    "family_name": "Pickering",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Roxana",
                    "family_name": "Flacau",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Mark",
                    "family_name": "Hardy",
                    "institution": "University of Cambridge"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ni-Co-Al-Ti-Cr Quinary Alloys",
                #            "data_format": ,
                #            "data_type": ,
                "tags": ["alloys"],
                "description":
                "DSC files, neutron diffraction data, hardness measurements, SEM and TEM images and thermodynamic simulations are provided for all alloy compositions studied and presented in this manuscript.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://www.repository.cam.ac.uk/handle/1810/256771",
                    "publication":
                    "https://doi.org/10.1016/j.jallcom.2016.07.159",
                    "data_doi": "https://doi.org/10.17863/CAM.705",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    with open(os.path.join(input_path, "alloy_data.csv"), 'r') as adata:
        raw_data = adata.read()
    for record in tqdm(parse_tab(raw_data),
                       desc="Processing records",
                       disable=not verbose):
        links = {}
        for ln in find_files(input_path, record["Alloy"]):
            key = "_".join(ln["no_root_path"].split("/")).replace(" ", "_")
            links[key] = {
                "globus_endpoint":
                "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                "http_host":
                "https://data.materialsdatafacility.org",
                "path":
                os.path.join("/collections/quinary_alloys", ln["no_root_path"],
                             ln["filename"])
            }
        links["csv"] = {
            "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
            "http_host": "https://data.materialsdatafacility.org",
            "path": "/collections/quinary_alloys/alloy_data.csv"
        }
        record_metadata = {
            "mdf": {
                "title": "Ni-Co-Al-Ti-Cr Quinary Alloys " + record["Alloy"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": "NiCoAlTiCr",
                "raw": json.dumps(record),
                "links": links,  #{
                #                "landing_page": ,

                #                "publication": ,
                #                "dataset_doi": ,

                #                "related_id": ,

                #                 "csv": {

                #                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                #                    "http_host": "https://data.materialsdatafacility.org",

                #                    "path": "/collections/quinary_alloys/alloy_data.csv",
                #                    },
                #                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #               "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "quinary_alloys": {
                "atomic_composition_percent": {
                    "Ni": record["Ni"],
                    "Co": record["Co"],
                    "Al": record["Al"],
                    "Ti": record["Ti"],
                    "Cr": record["Cr"]
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")
Exemplo n.º 27
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes",
                "acl": ["public"],
                "source_name":
                "trinkle_elastic_fe_bcc",
                "citation": [
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Comp. Mat. Sci. 126, 503 (2017).",
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Data in Brief 10, 147 (2017)."
                ],
                "data_contact": {
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                },
                "author": [{
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Louis",
                    "family_name": "Hector Jr.",
                    "institution": "General Motors",
                }],
                "license":
                "http://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Elastic Fe BCC",
                "tags": ["dft"],
                "description":
                "We introduce a solute strain misfit tensor that quantifies how solutes change the lattice parameter.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/671",
                    "publication": [
                        "http://dx.doi.org/10.1016/j.commatsci.2016.09.040",
                        "http://dx.doi.org/10.1016/j.dib.2016.11.092"
                    ],
                    "data_doi":
                    "http://hdl.handle.net/11256/671",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        record_metadata = {
            "mdf": {
                "title": "Elastic BCC - " + data["chemical_formula"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": data["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/" + data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")
Exemplo n.º 28
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit",
                "acl": ["public"],
                "source_name":
                "cnmultifit_groel",
                "data_contact": {
                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Lasker, K., Velázquez-Muriel, J. A., Webb, B. M., Yang, Z., Ferrin, T. E., & Sali, A. (2012). Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit [Data set]. Methods Mol Biol. Zenodo. http://doi.org/10.5281/zenodo.46596"
                ],
                "author": [{
                    "given_name":
                    "Keren",
                    "family_name":
                    "Lasker",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Javier A.",
                    "family_name":
                    "Velázquez-Muriel",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Benjamin M.",
                    "family_name":
                    "Webb",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Zheng",
                    "family_name":
                    "Yang",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Thomas E.",
                    "family_name":
                    "Ferrin",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "GroEL cnmultifit",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Electron microscopy density map", "MODELLER", "MultiFit"
                ],
                "description":
                "These scripts demonstrate the use of IMP, MODELLER and Chimera in the modeling of the bacterial molecular chaperone GroEL. First, MODELLER is used to generate structures for the individual components in the GroEL complex. Then, IMP is used to fit these components together into the electron microscopy density map of the entire complex.",
                "year":
                2012,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.46596",
                    "publication": [
                        "https://doi.org/10.1007/978-1-61779-588-6_15",
                        "https://github.com/integrativemodeling/multifit_groel/tree/v1.0"
                    ],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46596/files/multifit_groel-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "GroEL cnmultifit - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cnmultifit_groel/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Benchmark of the FRETR Bayesian restraint",
                "acl": ["public"],
                "source_name": "fretr_bayesian_restraint",

                "data_contact": {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Bonomi, M., Pellarin, R., Kim, S. J., Russel, D., Sundin, B. A., Riffle, M., … Sali, A. (2014). Benchmark of the FRETR Bayesian restraint [Data set]. Mol Cell Proteomics. Zenodo. http://doi.org/10.5281/zenodo.46558"],

                "author": [{

                    "given_name": "Massimiliano",
                    "family_name": "Bonomi",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco, University of Cambridge",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Seung Joong",
                    "family_name": "Kim",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Russel",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Bryan A.",
                    "family_name": "Sundin",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Michael",
                    "family_name": "Riffle",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Jaschob",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Richard",
                    "family_name": "Ramsden",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Trisha N.",
                    "family_name": "Davis",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Eric G. D.",
                    "family_name": "Muller",
                    "email": "*****@*****.**",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "FRETR Bayesian Restraint",
                "tags": ["Integrative Modeling Platform (IMP)", "Benchmark", "Förster resonance energy transfer (FRET)"],
                "description": "The use of in vivo Förster resonance energy transfer (FRET) data to determine the molecular architecture of a protein complex in living cells is challenging due to data sparseness, sample heterogeneity, signal contributions from multiple donors and acceptors, unequal fluorophore brightness, photobleaching, flexibility of the linker connecting the fluorophore to the tagged protein, and spectral cross-talk. We addressed these challenges by using a Bayesian approach that produces the posterior probability of a model, given the input data. The posterior probability is defined as a function of the dependence of our FRET metric FRETR on a structure (forward model), a model of noise in the data, as well as prior information about the structure, relative populations of distinct states in the sample, forward model parameters, and data noise.",
                "year": 2014,

                "links": {

                    "landing_page": "https://zenodo.org/record/46558",
                    "publication": ["https://doi.org/10.1074/mcp.M114.040824", "https://github.com/integrativemodeling/fret_benchmark/tree/v1.0"],
                    "data_doi": "https://doi.org/10.5281/zenodo.46558",
                    #"related_id": "",

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/46558/files/fret_benchmark-v1.0.zip",

                    },

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "FRETR Bayesian Restraint - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/fretr_bayesian_restraint/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemplo n.º 30
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
        "mdf": {
            "title": "Quantum Machine - MD Trajectories of C7O2H10",
            "acl": ["public"],
            "source_name": "qm_mdt_c",
            "citation": ["S. Chmiela, A. Tkatchenko, H. E. Sauceda, I. Poltavsky, K. T. Schütt, K.-R. Müller Machine Learning of Accurate Energy-Conserving Molecular Force Fields, 2017.", "K. T. Schütt, F. Arbabzadah, S. Chmiela, K.-R. Müller, A. Tkatchenko Quantum-Chemical Insights from Deep Tensor Neural Networks, Nat. Commun. 8, 13890, 2017."],

            "data_contact": {

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },

            "author": [{

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },
                {

                "given_name": "Kristof",
                "family_name": "Schütt",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Farhad",
                "family_name": "Arbabzadah",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Stefan",
                "family_name": "Chmiela",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {
                "given_name": "Klaus",
                "family_name": "Müller",

                "institution": "Technical University of Berlin"
                }],

#            "license": ,

            "collection": "Quantum Machine",
            "tags": ["molecular", "dynamics", "trajectories", "DFT", "density functional theory", "PBE", "exchange", "simulation"],

            "description": "This data set consists of molecular dynamics trajectories of 113 randomly selected C7O2H10 isomers calculated at a temperature of 500 K and resolution of 1fs using density functional theory with the PBE exchange-correlation potential.",
            "year": 2016,

            "links": {

                "landing_page": "http://quantum-machine.org/datasets/#C7O2H10",

                "publication": ["https://dx.doi.org/10.1038/ncomms13890"],
#                "dataset_doi": ,

#                "related_id": ,

                "tar_gz": {

                    #"globus_endpoint": ,
                    "http_host": "http://quantum-machine.org",

                    "path": "/data/c7o2h10_md.tar.gz",
                    }
                },

#            "mrr": ,

            "data_contributor": [{
                "given_name": "Jonathon",
                "family_name": "Gaff",
                "email": "*****@*****.**",
                "institution": "The University of Chicago",
                "github": "jgaff"
                }]
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")


    dataset_validator = Validator(dataset_metadata)


    # Get the data
    for file_data in tqdm(find_files(os.path.join(input_path, "c7o2h10_md"), "xyz"), desc="Processing QM_MDT_C", disable= not verbose):
        file_path = os.path.join(file_data["path"], file_data["filename"])
        record = parse_ase(file_path, "xyz")
        record_metadata = {
        "mdf": {
            "title": "MD Trajectories of C7O2H10 - " + record.get("chemical_formula", "") + " - " + file_data["filename"],
            "acl": ["public"],

#            "tags": ,
#            "description": ,
            
            "composition": record.get("chemical_formula", ""),
#            "raw": ,

            "links": {
                "landing_page": "https://data.materialsdatafacility.org/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["no_root_path"] + "/" if file_data["no_root_path"] else "" + file_data["filename"],

#                "publication": ,
#                "dataset_doi": ,

#                "related_id": ,

                "xyz": {
 
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"],
                    },
                "energy": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"].replace(".xyz", "") + ".energy.dat"
                    }
                },

#            "citation": ,
#            "data_contact": {

#                "given_name": ,
#                "family_name": ,

#                "email": ,
#                "institution":,

                # IDs
#                },

#            "author": ,

#            "license": ,
#            "collection": ,
#            "data_format": ,
#            "data_type": ,
#            "year": ,

#            "mrr":

#            "processing": ,
#            "structure":,
            },
            "qm_mdt_c": {
            "temperature" : {
                "value": 500,
                "unit": "kelvin"
                },
            "resolution" : {
                "value" : 1,
                "unit" : "femtosecond"
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])


    if verbose:
        print("Finished converting")