def main(): property_names_units = get_property_names_units() parser = argparse.ArgumentParser() parser.add_argument("--input_file_dir", type=str, required=True, help="the input file directory") parser.add_argument("--output_json_path", type=str, required=True, help="the output json file path") args = parser.parse_args() input_path = args.input_file_dir output_path = args.output_json_path files = os.listdir(input_path) f = open(output_path, 'w') num = 0 for file in files: #convert each file to pd.DataFrame then output with pif if '.xyz' in file: df = file_to_pd(file, input_path, property_names_units) if df is not None: chemical_system = pd_to_pifmat(df) pif.dump(chemical_system, f, indent=4) num += 1 if num % 5 == 0: print('%d files have been processed' % num) print('In total %d files have been processed' % num) f.close()
def test_upload_pif(): """ Tests that a PIF can be created, serialized, uploaded then downloaded and deserialized """ pif = System() pif.id = 0 uid = random_string() pif.uid = uid with open("tmp.json", "w") as fp: dump(pif, fp) assert client.upload(dataset_id, "tmp.json").successful() tries = 0 while True: try: pif = client.get_pif(dataset_id, uid) break except ResourceNotFoundException: if tries < 10: tries += 1 time.sleep(1) else: raise status = client.get_ingest_status(dataset_id) assert status == "Finished" with open("tmp.json", "r") as fp: assert json.loads(fp.read())["uid"] == pif.uid
def run(self): cl = self.inputs['client'] dsid = self.inputs['dsid'] p = self.inputs['pif'] json_dir = self.inputs['json_dirpath'] json_file = self.inputs['json_filename'] if cl is None or dsid is None or p is None \ or json_dir is None or json_file is None: return if not os.path.splitext(json_file)[1] == 'json': json_file = json_file+'.json' json_file = os.path.join(json_dir,json_file) json_flag = self.inputs['keep_json'] ship_flag = self.inputs['ship_flag'] try: # make p an array of pifs to get a big json that has all records pif.dump(p, open(json_file,'w')) if ship_flag: r = cl.upload_file(json_file,dataset_id = dsid) else: r = 'dry run: no shipment occurred. pif object: {}'.format(pif.dumps(p)) if not json_flag: os.remove(json_file) except Exception as ex: r = 'An error occurred while shipping. Error message: {}'.format(ex.message) self.outputs['response'] = r
def main (): global args samples = [] # #################################### # read # #################################### for source in args.sources: try: subset = { 'faustson-plate1-build1' : P001B001().samples, 'faustson-plate2-build1' : P002B001().samples, 'faustson-plate3-build1' : P003B001().samples, 'faustson-plate4-build1' : P004B001().samples, 'faustson-plate5-build1' : P005B001().samples, 'faustson-plate5-build2' : P005B002().samples, 'faustson-plate6-build1' : P006B001().samples }[source.lower()] except KeyError: raise ValueError('{source:} is not a recognized source.'.format( source=source)) samples.extend(subset) # #################################### # write # #################################### # To improve traceability of the samples and their history, each sample # should be uploaded separately, i.e. as a separate file. So rather than # storing these in a single file, create a directory to store each sample # as a separate file in that directory, then tar and zip the directory. directory = args.output directory = make_directory(directory, retry=0) for sample in samples: # generate JSON string jstr = pif.dumps(sample, indent=4) # create a filename from the contents of the record try: ofile = filename_from(jstr, directory=directory) except IOError: msg = 'Sample {} is duplicated.'.format(ofile) if not args.duplicate_error: sys.stdout.write('WARNING: {}' \ 'Skipping.\n'.format(msg)) continue else: msg = 'ERROR: {} To skip duplicates, invoke the ' \ '--duplicate-warning flag.'.format(msg) shutil.rmtree(directory) raise IOError(msg) # Add the UID to the record urn = get_urn(jstr) sample.uid = urn # write the file with open(ofile, 'w') as ofs: pif.dump(sample, ofs) # tarball and gzip the new directory if args.create_archive: tarball = '{}.tgz'.format(directory) with tarfile.open(tarball, 'w:gz') as tar: tar.add(directory) shutil.rmtree(directory)
def test_file_list(mark10_no_stress, generate_output): pifs = converter([SOURCE]) if generate_output: with open('{}/data/mark10-no-stress.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True).strip() assert pifs == mark10_no_stress
def test_mises_with_time(aramis_mises_with_time, generate_output): pifs = converter(MISES, timestep=0.5) if generate_output: with open('{}/data/aramis-mises-with-time.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True) assert pifs == aramis_mises_with_time
def test_ey_strain_with_time(aramis_ey_strain_with_time, generate_output): pifs = converter(EYSTRAIN, timestep=0.5) if generate_output: with open('{}/data/aramis-ey_strain-with-time.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True) assert pifs == aramis_ey_strain_with_time
def make_pif(filename): """ Method to turn a CSV file to PIF for this BMG-based dataset. Can edit it to pull out whatever properties needed. :param filename: Path to the CSV file you'd like to convert to PIF. :type filename: str :return: Path to the JSON output from converting to PIF :rtype: str """ # Read in csv file using pandas to make df new_data = pd.read_csv(filename) # Pull out the desired properties and turn them to lists. # Edit this to include each of the properties you would like to upload for # small numbers of columns in your csv or use the commented out block of code below to get all properties you # would like to upload for a larger number of files. Requires some lists to be made for storing the values associated # with said properties that's not written here. form = new_data['formula'] energy = new_data['PROPERTY: Nearest DFT Formation Energy (eV)'] tg = new_data['PROPERTY: Tg (K)'] # headers = [] # with open(filename, "rb") as f: # reader = csv.reader(f, delimiter=",") # for i in enumerate(reader): # headers.append(i) # headers = headers[0] input = [] # Make pifs for i in range(0, len(form)): # Create a new chemical system (from the pypif package) chemical_system = ChemicalSystem() # Set the formula chemical_system.chemical_formula = form[i] # Create some properties to add # If you have conditions, the format is slightly different and the appropriate info can # be found the on the Citrination knowledgebase dft_energy = Property(name='Nearest DFT Formation Energy', units='eV', scalars=float(energy[i])) tg_prop = Property(name='Tg', units='K', scalars=float(tg[i])) # add the properties to the chemical system chemical_system.properties = [dft_energy, tg_prop] # add the system to the list input.append(chemical_system) # Dictionary to PIF # Write the string that was dumped to a json using pif.dump from pypif outfile = "pif.json" with open(outfile, 'w') as fp: pif.dump(input, fp) # Return the name of the file you wrote the pif to return outfile
def test_stress(mark10_with_stress, generate_output): area=12.9 units='mm^2' pifs = converter(SOURCE, area=area) if generate_output: with open('{}/data/mark10-with-stress.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True).strip() assert pifs == mark10_with_stress
def test_upload_pif(): client = CitrinationClient(environ['CITRINATION_API_KEY'], environ['CITRINATION_SITE']) dataset = loads(client.create_data_set(name="Tutorial dataset", description="Dataset for tutorial", share=0).content.decode('utf-8'))['id'] pif = System() pif.id = 0 with open("tmp.json", "w") as fp: dump(pif, fp) response = loads(client.upload_file("tmp.json", dataset)) assert response["message"] == "Upload is complete."
def run(self): p = self.inputs['pif'] dp = self.inputs['dirpath'] fn = self.inputs['filename'] if dp is None or fn is None or p is None: return if not os.path.splitext(fn)[1] == 'json': fn = fn + '.json' json_file = os.path.join(dp, fn) pif.dump(p, open(json_file, 'w'))
def test_linkages_sagittariidae(self): # try: with open('data/package.json') as ifs: pifdata = pif.load(ifs) add_link = sagittariidae.link_factory( projectID='nq3X4-concept-inconel718', host='http://sagittariidae.adapt.mines.edu') for p in pifdata: add_link(p) with open('delme.json', 'w') as ofs: pif.dump(pifdata, ofs)
def test_frame_to_pif(frame, file='./test.json', nest_sub_systems=False): """ Save a PIF file from a pandas DataFrame. Args: frame (PifFrame) DataFrame containing System entries. file (str) File name to write to. """ systems = frame.to_pif_systems(nest_sub_systems=nest_sub_systems) with open(file, 'w') as fp: pif.dump(systems, fp, indent=4)
def test_upload_pif(): client = CitrinationClient(environ['CITRINATION_API_KEY'], 'https://stage.citrination.com') dataset = loads(client.create_data_set(name="Tutorial dataset", description="Dataset for tutorial", share=0).content.decode('utf-8'))['id'] pif = System() pif.id = 0 with TemporaryDirectory() as tmpdir: tempname = join(tmpdir, "pif.json") with open(tempname, "w") as fp: dump(pif, fp) response = loads(client.upload_file(tempname, dataset)) assert response["message"] == "Upload is complete."
def test_upload_pif(): client = CitrinationClient(environ['CITRINATION_API_KEY'], environ['CITRINATION_SITE']) dataset = loads( client.create_data_set(name="Tutorial dataset", description="Dataset for tutorial", share=0).content.decode('utf-8'))['id'] pif = System() pif.id = 0 with open("tmp.json", "w") as fp: dump(pif, fp) response = loads(client.upload_file("tmp.json", dataset)) assert response["message"] == "Upload is complete."
def ship_dataset(self,pifs): # Create the data set response = self.ctn_client.create_data_set() dsid = response.json()['id'] # TODO: Note that the entire data set can be one json, # of an array of pif records, and this will lead to a faster upload. for p in pifs: try: json_file = pawstools.scratchdir+'/'+p.uid+'.json' pif.dump(p, open(json_file,'w')) #print 'add DATA SET {} to tags'.format(dsid) #p.tags.append('DATA SET {}'.format(dsid)) #print 'dump {} to data set {}'.format(json_file,dsid) cl.upload_file(json_file,data_set_id = dsid) #print 'NOT SHIPPING {} (this is a test)'.format(json_file) self.return_codes[p.uid]=1 # delete dataset json #print 'deleting file {}'.format(json_file) os.remove(json_file) except: # TODO: Pass along some return code from the server? self.return_codes[p.uid]=-1
def make_pif(filename): """ Method to turn a CSV file to PIF for this BMG-based dataset. Can edit it to pull out whatever properties needed. :param filename: Path to the CSV file you'd like to convert to PIF. :type filename: str :return: Path to the JSON output from converting to PIF :rtype: str """ # Read in csv file using pandas to make df new_data = pd.read_csv(filename) # Pull out the desired properties and turn them to lists. form = new_data['formula'] energy = new_data['PROPERTY: Nearest DFT Formation Energy (eV)'] tg = new_data['PROPERTY: Tg (K)'] tl = new_data['PROPERTY: Tl (K)'] tx = new_data['PROPERTY: Tx (K)'] input = [] # Make pifs for i in range(0, len(form)): chemical_system = ChemicalSystem() chemical_system.chemical_formula = form[i] dft_energy = Property(name = 'Nearest DFT Formation Energy', units = 'eV', scalars = float(energy[i])) tg_prop = Property(name = 'Tg', units = 'K', scalars = float(tg[i])) tl_prop = Property(name = 'Tl', units = 'K', scalars = float(tl[i])) tx_prop = Property(name = 'Tx', units = 'K', scalars = float(tx[i])) chemical_system.properties = [dft_energy, tg_prop, tl_prop, tx_prop] input.append(chemical_system) # Dictionary to PIF # Write the string that was dumped to a json outfile = "pif.json" with open(outfile, 'w') as fp: pif.dump(input, fp) return outfile
def _handle_pif(path, ingest_name, convert_args, enrich_args, ingest_manager): """Ingest and enrich pifs from a path, returning affected paths""" # Run an ingest extension pifs = ingest_manager.run_extension(ingest_name, path, convert_args) # Perform enrichment add_tags(pifs, enrich_args['tags']) add_license(pifs, enrich_args['license']) add_contact(pifs, enrich_args['contact']) # Write the pif if os.path.isfile(path): pif_name = "{}_{}".format(path, "pif.json") res = [path, pif_name] else: pif_name = os.path.join(path, "pif.json") res = [path] with open(pif_name, "w") as f: pif.dump(pifs, f, indent=2) logging.info("Created pif at {}".format(pif_name)) return res
if file_type[2] == "tif": jpeg_path = convert_tif_to_jpeg(image_path) image_to_pif(jpeg_path) return my_pif = ChemicalSystem() my_pif.ids = [os.path.basename(image_path).split("_")[0]] my_pif.names = [os.path.basename(image_path).rpartition(".")[0]] my_pif.properties = [ Property(name="SEM", files=FileReference(mime_type="image/" + file_type[2], relative_path=image_path)) ] return [my_pif] if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('images', nargs='*', help='path to SEM images (.tif, .jpeg, .bmp)') args = parser.parse_args() for f in args.images: system = image_to_pif(f) f_out = f.rpartition(".")[0] + ".json" print(f_out) pif.dump(system, open(f_out, "w"), indent=4)
# Even if one of the numbers is not convertible to float, skip line try: floats = [float(num) for num in x_y_e] except ValueError: continue x.append(Scalar(value=x_y_e[0])) y.append(Scalar(value=x_y_e[1], uncertainty=x_y_e[2])) intensity = Property(name="Intensity", scalars=y, conditions=[Value(name="2$\\theta$", scalars=x, units="degrees")], methods=[Method(instruments= Instrument(name="11-BM", url="http://11bm.xray.aps.anl.gov/"))] ) chem_sys.properties.append(intensity) return chem_sys if __name__ == "__main__": # file_name = "LuFe2O4_700Air_hold3-00059.xye" file_name = "11bmb_2144_AA0037_YbFeO_red.xye" # file_name = "NOM_LuFe2O4_Ex_situ_20C-5.xye" # file_name = "PG3_27954-3.xye" chem_system = convert(files=["../test_files/" + file_name], sample_id="001", chemical_formula="NaCl") with open('../test_files/' + file_name.replace('.xye', '.json'), 'w') as fw: pif.dump(chem_system, fw, indent=4)
def to_pif(dataframe, filename): """ Constructs a PIF-formatted data file appropriately formatted for import into the citrination platform. Several columns are treated as special: Sample Name --> uid Parent Sample Name --> subsystem.uid Contact --> contacts :param df: pandas.DataFrame containing the tabulated information collected from previous steps. :return: None """ from pypif import pif def to_system(row): def url_friendly(name): return re.sub(r"\W", "_", name) def wolf_contact(): return pif.Person(name='JP H. Paul', email='*****@*****.**', tags='Lincoln Electric (Wolf Robotics)') def cmu_contact(): return pif.Person(name='Anthony Rollett', email='*****@*****.**', tags='Carnegie Mellon University') def mines_contact(): return pif.Person(name='Branden Kappes', email='*****@*****.**', tags='Colorado School of Mines') def lmco_contact(): return pif.Person(name='Edward A. Pierson', email='*****@*****.**', tags='Lockheed Martin Corporation') # Every PIF record is a System object system = pif.System() # create a unique identifier. This must be URL friendly. system.uid = url_friendly(str(row['Sample Name'])) # name the PIF system.names = str(row['Sample Name']) # record the parent sample name system.sub_systems = pif.System(uid=url_friendly( str(row['Parent Sample Name'])), names=str(row['Sample Name'])) # set the contact information. By default, I set this as LMCO. system.contacts = { 'Wolf': wolf_contact, 'CMU': cmu_contact, 'Mines': mines_contact, 'LM': lmco_contact }.get(row['Contact'], lmco_contact)() # Certain fields we treat as special special = ['Sample Name', 'Parent Sample Name', 'Contact'] filelist = None for column, value in row.items(): # special columns have already been handled. if column in special: continue # no need to record empty record. if is_null(value): continue # handle FILEs. if column.startswith('FILE'): # add the FILE column to the list of special columns # so we don't add this as a property. special.append(column) # split on colon -- we don't need "FILE" in the name split = column.split(':') # Convert the filename (or the string-representation of # a list of filenames, e.g. [file1.png, file2.png]) to # a list of files value = str(value).strip("[]").split(",") # create a file reference for each file and store these as # a list of FileReference objects. for path in value: file = pif.FileReference(relative_path=str(path), tags=':'.join(split[1:]).strip()) try: filelist.append(file) except AttributeError: filelist = [file] # create a special property that holds all the files pty = pif.Property(name="Files", files=filelist) try: system.properties.append(pty) except AttributeError: # if this is the first property, append to a None value will # fail. Handle this edge case. system.properties = [pty] # everything else is a property for column, value in row.items(): # special columns have already been handled. if column in special: continue # ignore this value if empty if is_null(value): continue # scalar can only contain lists, dict, string, or Number value = { pd.Timestamp: str, tuple: list }.get(type(value), type(value))(value) # otherwise, construct a property value. try: pty = pif.Property(name=column, scalars=value) except: pty = pif.Property(name=column, scalars=str(value)) # print(f"{column}: {value}") # raise # units are stored, by convention in parentheses, e.g. # laser speed (mm/s). This regular expression extracts the the # last term surrounded by parentheses. try: pty.units = re.search('.*\(([^)]+)\)\s*$', column).group(1) except AttributeError: # no units were found... pass try: # add the property system.properties.append(pty) except AttributeError: # if this is the first property, append to a None value will # fail. Handle this edge case. system.properties = [pty] # done return system dataframe = dataframe.fillna('') records = [to_system(row) for i, row in dataframe.iterrows()] with open(filename, 'w') as ofs: pif.dump(records, ofs)
else: raise IOError( 'Filetype provided is not compatible with this parser. Please upload a .csv or .tsv file.\n' ) if not _check_table_size(table, (100 * 100000 + 1)): continue input_file.seek(0) for i, row in enumerate(table): if not any(row): continue if i == 0: headers = row else: row_pif = create_pif(headers, row) yield row_pif if __name__ == '__main__': result = convert(files=[sys.argv[1]]) with open( sys.argv[1].replace('.{}'.format(sys.argv[1].rpartition('.')[-1]), '-pif.json'), 'w') as output_file: pif.dump(list(result), output_file, indent=2)
from pypif import pif from quality_made_xlsx.read_excel import read_excel from quality_made_xlsx.process_archive import process_archive def convert(files=[]): """ Convert files into a pif :param files: to convert :param important_argument: an important argument, must be provided :param whatever_argument: a less important argument with default 1 :param do_some_extra_thing: :param kwargs: any other arguments :return: the pif produced by this conversion """ return read_excel(files[0]) if __name__ == '__main__': import sys with open(sys.argv[1].replace('.xlsx', '-pif.json'), 'w') as ofs: pif.dump(convert(files=[sys.argv[1]]), ofs, indent=4)
spectra_basename + file_index(scan_num[i]) + '_Qchi.png')), Property(name='XRD Intensity', scalars=IntAve, conditions=[ Value(name='Q, (Angstrom$^{-1}$)', scalars=Qlist), Value(name='Temperature', scalars='25', units='$^\\circ$C'), Value(name='Exposure time', scalars='30', units='seconds') ], methods=Method(instruments=(Instrument( name='MARCCD, 2048 pixels x 2048 pixels, 79 microns')))), Property(name='Maximum intensity/average intensity', scalars=round(np.nanmax(IntAve) / np.nanmean(IntAve), 2)), Property(name='Full width half maximum (FWHM) of FSDP', scalars=round(peak_width[i], 2)), Property(name='First sharp diffraction peak (FSDP) position', scalars=round(peak_position[i], 2)), Property(name='Textured', scalars=0) ] # specify a unique uid for each sample alloy.uid = 'Fe' + str(int(Fe[i])) + 'Ti' + str(int(Ti[i])) + 'Nb' + str( int(Nb[i])) + 'low_power' + str(scan_num[i]) # print pif.dumps(alloy) alloys += [alloy] pif.dump(alloys, open('..//..//data//Json_files_Citrine//SampleB2_21.json', 'w'))
def test_converter(generate_output): astm_pif = astm_converter([STRAIN, STRESS]) if generate_output: with open('{}/data/astm-mark10-aramis.json'.format(HERE), 'w') as ofs: pif.dump(astm_pif, ofs)
def converter(*args): """ Ingest .xyz files Args (one of these is required): listd: directory full path path and a comma separated list of filenames to process in that directory alld: directory full path path files: comma separated list of filenames with their full paths Returns: a pif chemical system (JSON-encoded text file) from each ingested .xyz file """ parser = argparse.ArgumentParser(description='convert xyz file(s) to pif') group = parser.add_mutually_exclusive_group(required=True) group.add_argument( '-l', '--listd', help= '2 arguments: the source directory full path and a comma separated list of filenames to process in that directory', nargs=2) group.add_argument( '-a', '--alld', help= '1 argument: the source directory full path path where all files of .xyz type reside', nargs=1) group.add_argument( '-f', '--files', help= '1 argument: a comma separated list of filenames with their full paths to process', nargs=1) args = parser.parse_args() # Several containers defined # stores the extracted data in pif my_pif = [] # list of supplied filenames to be processed filename_list = [] # same list with each file with its full path filename_fullpath_list = [] # data source directory data_directory = [] # check if destination directory existd and if not create one if not os.path.exists(DEST_DIR): os.makedirs(DEST_DIR) # clean destination directory for fileName in os.listdir(DEST_DIR): os.remove(DEST_DIR + "/" + fileName) # Depending on the command line input, process data and create a PIF file # # Below is the case when the user specifies a directory and a list of files to be processed # from that directory if args.listd: data_directory = args.listd[0] filename_list = [str(item) for item in args.listd[1].split(',')] for i in range(len(filename_list)): if check(filename_list[i]): continue filename_fullpath_list = data_directory + '/' + filename_list[i] my_pif = parser_xyz(filename_fullpath_list) with open(DEST_DIR + filename_list[i].replace('.xyz', '.json'), 'w') as fw: pif.dump(my_pif, fw, indent=4) # Belwo is the case when the user specifies a directory and all files # from that directory need to be processed elif args.alld: data_directory = str(args.alld[0]) filename_list = os.listdir(data_directory) for i in range(len(filename_list)): filename_fullpath_list = data_directory + '/' + filename_list[i] check(str(filename_fullpath_list)) my_pif = parser_xyz(str(filename_fullpath_list)) with open(DEST_DIR + filename_list[i].replace('.xyz', '.json'), 'w') as fw: pif.dump(my_pif, fw, indent=4) # Below is the case when the users specifies a list of distinct files with their full path # that need to be processed elif args.files: filename_fullpath_list = [ str(item) for item in args.files[0].split(',') ] for i in range(len(filename_fullpath_list)): filename_list = filename_fullpath_list[i].split('/')[-1] check(str(filename_fullpath_list[i])) my_pif = parser_xyz(str(filename_fullpath_list[i])) with open(DEST_DIR + filename_list.replace('.xyz', '.json'), 'w') as fw: pif.dump(my_pif, fw, indent=4) else: print("Nothing entered")
return my_pif except IOError as e: print(e) print("RAW V4 FILE NOT PARSED") return None if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('files', nargs='*', help='path to XRD files (.raw, .txt)') args = parser.parse_args() for f in args.files: if ".txt" in f: print ("PARSING: {}".format(f)) pifs = raw4_txt_to_pif(f) f_out = f.replace(".txt", ".json") print ("OUTPUT: {}".format(f_out)) pif.dump(pifs, open(f_out, "w"), indent=4) if ".raw" in f: print ("PARSING: {}".format(f)) pifs = raw_to_pif(f) if pifs: f_out = f.replace(".raw", ".json") print ("OUTPUT: {}".format(f_out)) pif.dump(pifs, open(f_out, "w"), indent=4)
print("Defect curves: ", v) if len(v) >= 2: intersection_points = calculate_intersect_points(v) print("INTERSECTION POINTS: ", intersection_points) low_energy_line = find_min_energy_overlap(v, intersection_points) print("LOWEST ENERGY LINE: ", low_energy_line) system.properties.append(Property(name="$\Delta$H_2", scalars=low_energy_line[0], conditions=[Value(name="E$_F$_2", scalars=low_energy_line[1])])) else: print("LOWEST ENERGY LINE: ", [[v[0][0][0], v[0][1][0]], [v[0][0][1], v[0][1][1]]]) system.properties.append(Property(name="$\Delta$H_2", scalars=[v[0][0][1], v[0][1][1]], conditions=[Value(name="E$_F$_2", scalars=[v[0][0][0], v[0][1][0]])])) systems.append(system) print("=====") return systems if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("csv", nargs="*", help="path to template file") args = parser.parse_args() for f in args.csv: pifs = parse_template(f) outfile = f.replace(".csv", ".json") pif.dump(pifs, open(outfile, "w")) print("PIF DUMPED: ", outfile)
import sys from pypif import pif from .parse_cif_pmg import parse_cif def convert(files=[], **kwargs): """ Convert files into a pif :param files: to convert :param kwargs: any other arguments :return: the pif produced by this conversion """ print('Converting {} CIFs'.format(len(files))) systems = [] for f in files: converted_pif = parse_cif(f) if converted_pif: systems.append(converted_pif) return systems if __name__ == '__main__': with open(sys.argv[1].replace('.cif', '-pif.json'), 'w') as output_file: pif.dump(convert(files=[sys.argv[1]]), output_file, indent=4)
def harmonic_max_json(xyz_panda): my_pif = System() b = str(xyz_panda.loc[1, 0]) a = b.strip(" ") n_atom = int(xyz_panda.loc[0, 0]) smiles = n_atom + 3 inchi = n_atom + 4 atom_line = [] harmony = n_atom + 2 harmony_np = xyz_panda.loc[harmony, :].dropna().values harmony_line = list(np.ndarray.tolist(harmony_np)) harmonic_max = [harmony_line] #this loop extracts the atomic coordinates and Mulliken charges from the compound. for i in range(2, n_atom + 2): atom_line.append([ xyz_panda.loc[i, 0], xyz_panda.loc[i, 1], xyz_panda.loc[i, 2], xyz_panda.loc[i, 3], xyz_panda.loc[i, 4] ]) #These lines take each of the properties encoded in the second line of the xyz file into individual instances of the Property() class. Inchi = Property(name="InChI Key Identifier", scalars=xyz_panda.loc[inchi, 0]) SMILES = Property(name="(SMILES) Chemical Identifier", scalars=xyz_panda.loc[smiles, 0]) Atom_Number = Property(name="Number of Atoms", scalars=xyz_panda.loc[0, 0]) Tag = Property(name="Database Tag + Id", scalars=xyz_panda.loc[1, 0]) Rotational_Constant_A = Property(name="Rotational Constant A", scalars=xyz_panda.loc[1, 1], units="GHz", data_type="COMPUTATIONAL") Rotational_Constant_B = Property(name="Rotational Constant B", scalars=xyz_panda.loc[1, 2], units="GHz", data_type="COMPUTATIONAL") Rotational_Constant_C = Property(name="Rotational Constant C", scalars=xyz_panda.loc[1, 3], units="GHZ", data_type="COMPUTATIONAL") Dipole_Moment = Property(name="Dipole Moment", scalars=xyz_panda.loc[1, 4], units="Debye", data_type="COMPUTATIONAL") Isotropic_polarizability = Property(name="Isotropic polarizability", scalars=xyz_panda.loc[1, 5], units="Å^3", data_type="COMPUTATIONAL") Energy_of_HOMO = Property(name="Energy of H**O", scalars=xyz_panda.loc[1, 6], units="Ha", data_type="COMPUTATIONAL") Energy_of_LUMO = Property(name="Energy of LUMO", scalars=xyz_panda.loc[1, 7], units="Ha", data_type="COMPUTATIONAL") Band_Gap = Property(name="Band Gap", scalars=xyz_panda.loc[1, 8], units="Ha", data_type="COMPUTATIONAL") Electronic_spactial_extent = Property(name="Electronic spactial extent", scalars=xyz_panda.loc[1, 9], units="Ha", data_type="COMPUTATIONAL") Zero_point_vibrational_energy = Property( name="Zero point vibrational energy", scalars=xyz_panda.loc[1, 10], units="Ha", data_type="COMPUTATIONAL") Internal_energy_at_0K = Property(name="Internal energy at 0K", scalars=xyz_panda.loc[1, 11], units="Ha", data_type="COMPUTATIONAL") Internal_energy_at_298_K = Property(name="Internal energy at 298 K", scalars=xyz_panda.loc[1, 12], units="Ha", data_type="COMPUTATIONAL") Enthalpy_at_298_K = Property(name="Enthalpy at 298 K", scalars=xyz_panda.loc[1, 13], units="Ha", data_type="COMPUTATIONAL") Free_energy_at_298_K = Property(name="Free energy at 298 K", scalars=xyz_panda.loc[1, 14], units="Ha", data_type="COMPUTATIONAL") Heat_capacity_at_298_K = Property(name="Heat capacity at 298 K", scalars=xyz_panda.loc[1, 15], units="Ha", data_type="COMPUTATIONAL") Atomic_coordinates = Property( name="Atom Coordinates for (Atom, Position Vector, Mulliken Charge)", matrices=atom_line, units="Chemical, (x,y,z), e", data_type="COMPUTATIONAL") Ref = Reference( title= "Quantum chemistry structures and properties of 134 kilo molecules.", publisher="Nature", url="https://www.nature.com/articles/sdata201422", authors="R. Ramakrishnan, Dral P. O., Rupp, M.") Harmonic = Property(name="Harmonic Vibrational Frequencies", matrices=harmonic_max, units="cm^-1", data_type="COMPUTATIONAL") #collecting all the features for the PIF my_pif.properties = [ SMILES, Inchi, Atom_Number, Tag, Rotational_Constant_A, Rotational_Constant_B, Rotational_Constant_C, Dipole_Moment, Isotropic_polarizability, Energy_of_HOMO, Energy_of_LUMO, Band_Gap, Electronic_spactial_extent, Zero_point_vibrational_energy, Internal_energy_at_0K, Internal_energy_at_298_K, Enthalpy_at_298_K, Free_energy_at_298_K, Heat_capacity_at_298_K, Harmonic, Atomic_coordinates ] my_pif.resources = [Ref] with open(a + ".json", "w") as outfile: pif.dump(my_pif, outfile)