def test_property_add_Ifile(self): rval, out, err = execute('pifmod -i data/pif.json -o data/test.pif ' \ 'property foo=bar') with open('data/out.pif') as ifs: expected = pif.load(ifs) with open('data/test.pif') as ifs: received = pif.load(ifs) assert rval == 0 assert empty(out) assert empty(err) assert compare_dictionaries(expected.as_dictionary(), received.as_dictionary()), \ '\n'.join(strdiff(pif.dumps(expected), pif.dumps(received)))
def test_property_add_tags_Ifile(self): # with tags... rval, out, err = execute('pifmod -i data/pif.json -o data/test.pif ' \ 'property --tag="Hello World" foo=bar') with open('data/out_tags.pif') as ifs: expected = pif.load(ifs) with open('data/test.pif') as ifs: received = pif.load(ifs) assert rval == 0 assert empty(out) assert empty(err) assert compare_dictionaries(expected.as_dictionary(), received.as_dictionary()), \ '{}'.format(strdiff(pif.dumps(expected), pif.dumps(received)))
def test_property_add_volume_Ifile(self): # with tags... rval, out, err = execute('pifmod -i data/pif.json -o data/test.pif ' \ 'property data/pore-distribution.json volume') with open('data/out_volume.pif') as ifs: expected = pif.load(ifs) with open('data/test.pif') as ifs: received = pif.load(ifs) assert rval == 0 assert empty(out) assert empty(err) assert compare_dictionaries(expected.as_dictionary(), received.as_dictionary()), \ '{}'.format(strdiff(pif.dumps(expected), pif.dumps(received)))
def test_property_add_contacts_Ifile(self): # with contacts... rval, out, err = execute('pifmod -i data/pif.json -o data/test.pif ' \ 'property --contact="Branden Kappes,[email protected]" ' \ 'foo=bar') with open('data/out_contact.pif') as ifs: expected = pif.load(ifs) with open('data/test.pif') as ifs: received = pif.load(ifs) assert rval == 0 assert empty(out) assert empty(err) assert compare_dictionaries(expected.as_dictionary(), received.as_dictionary()), \ '{}'.format(strdiff(pif.dumps(expected), pif.dumps(received)))
def run(self): cl = self.inputs['client'] dsid = self.inputs['dsid'] p = self.inputs['pif'] json_dir = self.inputs['json_dirpath'] json_file = self.inputs['json_filename'] if cl is None or dsid is None or p is None \ or json_dir is None or json_file is None: return if not os.path.splitext(json_file)[1] == 'json': json_file = json_file+'.json' json_file = os.path.join(json_dir,json_file) json_flag = self.inputs['keep_json'] ship_flag = self.inputs['ship_flag'] try: # make p an array of pifs to get a big json that has all records pif.dump(p, open(json_file,'w')) if ship_flag: r = cl.upload_file(json_file,dataset_id = dsid) else: r = 'dry run: no shipment occurred. pif object: {}'.format(pif.dumps(p)) if not json_flag: os.remove(json_file) except Exception as ex: r = 'An error occurred while shipping. Error message: {}'.format(ex.message) self.outputs['response'] = r
def write_dataset_from_func(test_function:Callable[[np.ndarray], float], filename:str, input_vals:List[np.ndarray]) -> None: '''Given a function, write a dataset evaluated on given input values :param test_function: Function for generating dataset :type test_function: Callable[[np.ndarray], float] :param filename: Name of file for saving CSV dataset :type filename: str :param input_vals: List of input values to eval function over :type input_vals: np.ndarray :return: Doesn't return anything :rtype: None ''' pif_systems = [] for i, val_row in enumerate(input_vals): system = System() system.names = f'{test_function.__name__}_{i}' system.properties = [] for j, x_val in enumerate(val_row): func_input = Property() func_input.name = f"x{j+1}" func_input.scalars = x_val system.properties.append(func_input) func_output = Property() func_output.name = f"y" func_output.scalars = test_function(val_row) system.properties.append(func_output) pif_systems.append(system) with open(filename, "w") as f: f.write(pif.dumps(pif_systems, indent=4))
def search(self, pif_query): """ Run a PIF query against Citrination. :param pif_query: :class:`.PifQuery` to execute. :return: :class:`.PifSearchResult` object with the results of the query. """ if pif_query.size is None and pif_query.from_index is None: total = 1; time = 0.0; hits = []; first = True while len(hits) < min(total, 10000): if first: first = False else: sleep(3) sub_query = deepcopy(pif_query) sub_query.from_index = len(hits) partial_results = self.search(sub_query) total = partial_results.total_num_hits time += partial_results.took if partial_results.hits is not None: hits.extend(partial_results.hits) return PifSearchResult(hits=hits, total_num_hits=total, took=time) response = requests.post(self.pif_search_url, data=pif.dumps(pif_query), headers=self.headers) if response.status_code != requests.codes.ok: raise RuntimeError('Received ' + str(response.status_code) + ' response: ' + str(response.reason)) return PifSearchResult(**keys_to_snake_case(response.json()['results']))
def search(self, pif_query): """ Run a PIF query against Citrination. :param pif_query: :class:`.PifQuery` to execute. :return: :class:`.PifSearchResult` object with the results of the query. """ if pif_query.size is None and pif_query.from_index is None: total = 1 time = 0.0 hits = [] first = True while len(hits) < min(total, 10000): if first: first = False else: sleep(3) sub_query = deepcopy(pif_query) sub_query.from_index = len(hits) partial_results = self.search(sub_query) total = partial_results.total_num_hits time += partial_results.took if partial_results.hits is not None: hits.extend(partial_results.hits) return PifSearchResult(hits=hits, total_num_hits=total, took=time) response = requests.post(self.pif_search_url, data=pif.dumps(pif_query), headers=self.headers) if response.status_code != requests.codes.ok: raise RuntimeError('Received ' + str(response.status_code) + ' response: ' + str(response.reason)) return PifSearchResult( **keys_to_snake_case(response.json()['results']))
def create_pif(headers, row): """ Creates PIFs from lists of table row :param headers: header data from the table :param row: the row of data :return: ChemicalSystem containing the data from that row """ sys_dict = {} keywords, names, units, systs = get_header_info(headers) sys_dict, all_condition = add_fields(keywords, names, units, systs, sys_dict, row) main_system = sys_dict['main'] main_system.sub_systems = [] if main_system.properties: main_system.properties = format_main_prop(main_system.properties, all_condition) if main_system.preparation: main_system.preparation = [ step for step in main_system.preparation if step.name != '' ] for item in sys_dict: if item != 'main': if len(pif.dumps(sys_dict[item])): main_system.sub_systems.append(sys_dict[item]) return main_system
def test_round_robin(): foo = Value(name="foo", units="eV") assert foo.name == "foo", "Value object couldn't store name" assert foo.units == "eV", "Value object couldn't store units" round_robin = pif.loads(pif.dumps(foo), class_=Value) assert round_robin.name == "foo", "Name didn't survive json round robin" assert round_robin.units == "eV", "Units didn't survive json round robin"
def predict(self, model_name, candidates): """ Predict endpoint :param model_name: The model path :param candidates: A list of candidates :return: list of predicted candidates as a map {property: [value, uncertainty]} """ # If a single candidate is passed, wrap in a list for the user if not isinstance(candidates, list): candidates = [candidates] url = self._get_predict_url(model_name) body = pif.dumps({ "predictionRequest": { "predictionSource": "scalar", "usePrior": True, "candidates": candidates } }) response = requests.post(url, data=body, headers=self.headers) if response.status_code != requests.codes.ok: raise RuntimeError('Received ' + str(response.status_code) + ' response: ' + str(response.reason)) return response.json()
def test_full_round_robin(): pif = System(properties=[ Property( name="foo", scalars=[Scalar(value=np.float32(2.4)), Scalar(value=np.int64(2))]), Property(name="bar", scalars=[Scalar(value=2.4), Scalar(value=2)]), Property(name="spam", files=[FileReference(relative_path="/tmp/file")]) ], preparation=[ ProcessStep(name="processed", details=[ Value(name="temp", scalars=[Scalar(value=1.0)]) ]) ], contacts=[Person(name=Name(family="Einstein"))]) pif2 = loads(dumps(pif)) assert pif2.as_dictionary() == pif.as_dictionary( ), "PIF contents are not the same" assert pif.properties[0].scalars[0].value == pif2.properties[0].scalars[ 0].value assert pif.properties[1].scalars[0].value == pif2.properties[1].scalars[ 0].value assert pif.properties[2].files[0].relative_path == pif2.properties[ 2].files[0].relative_path assert pif.preparation[0].details[0].scalars[0].value == pif2.preparation[ 0].details[0].scalars[0].value assert pif.contacts[0].name.family == pif2.contacts[0].name.family
def main (): global args samples = [] # #################################### # read # #################################### for source in args.sources: try: subset = { 'faustson-plate1-build1' : P001B001().samples, 'faustson-plate2-build1' : P002B001().samples, 'faustson-plate3-build1' : P003B001().samples, 'faustson-plate4-build1' : P004B001().samples, 'faustson-plate5-build1' : P005B001().samples, 'faustson-plate5-build2' : P005B002().samples, 'faustson-plate6-build1' : P006B001().samples }[source.lower()] except KeyError: raise ValueError('{source:} is not a recognized source.'.format( source=source)) samples.extend(subset) # #################################### # write # #################################### # To improve traceability of the samples and their history, each sample # should be uploaded separately, i.e. as a separate file. So rather than # storing these in a single file, create a directory to store each sample # as a separate file in that directory, then tar and zip the directory. directory = args.output directory = make_directory(directory, retry=0) for sample in samples: # generate JSON string jstr = pif.dumps(sample, indent=4) # create a filename from the contents of the record try: ofile = filename_from(jstr, directory=directory) except IOError: msg = 'Sample {} is duplicated.'.format(ofile) if not args.duplicate_error: sys.stdout.write('WARNING: {}' \ 'Skipping.\n'.format(msg)) continue else: msg = 'ERROR: {} To skip duplicates, invoke the ' \ '--duplicate-warning flag.'.format(msg) shutil.rmtree(directory) raise IOError(msg) # Add the UID to the record urn = get_urn(jstr) sample.uid = urn # write the file with open(ofile, 'w') as ofs: pif.dump(sample, ofs) # tarball and gzip the new directory if args.create_archive: tarball = '{}.tgz'.format(directory) with tarfile.open(tarball, 'w:gz') as tar: tar.add(directory) shutil.rmtree(directory)
def matmeta_to_pif(metadata): try: from matmeta.payload_metaclass import CITPayload except ImportError as e: print(e) raise ImportError("Install package `matmeta` in order to use meta-data input") payload = CITPayload(**metadata).metapayload return pif.loads(pif.dumps(payload))
def test_file_list(mark10_no_stress, generate_output): pifs = converter([SOURCE]) if generate_output: with open('{}/data/mark10-no-stress.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True).strip() assert pifs == mark10_no_stress
def test_mises_with_time(aramis_mises_with_time, generate_output): pifs = converter(MISES, timestep=0.5) if generate_output: with open('{}/data/aramis-mises-with-time.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True) assert pifs == aramis_mises_with_time
def test_round_robin(): url = "https://citrination.com/datasets/1160/version/3/file/308882" path = "/tmp/foo.bar" fr = FileReference(url=url, relative_path=path) assert fr.relative_path == path, "Python object couldn't store relative path" assert fr.url == url, "Python object couldn't store URL" round_robin = pif.loads(pif.dumps(fr), class_=FileReference) assert round_robin.relative_path == path, "Relative path didn't survive json round robin" assert round_robin.url == url, "URL dind't survive json round robin"
def test_ey_strain_with_time(aramis_ey_strain_with_time, generate_output): pifs = converter(EYSTRAIN, timestep=0.5) if generate_output: with open('{}/data/aramis-ey_strain-with-time.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True) assert pifs == aramis_ey_strain_with_time
def test_stress(mark10_with_stress, generate_output): area=12.9 units='mm^2' pifs = converter(SOURCE, area=area) if generate_output: with open('{}/data/mark10-with-stress.json'.format(HERE), 'w') as ofs: pif.dump(pifs, ofs, sort_keys=True) assert False pifs = pif.dumps(pifs, sort_keys=True).strip() assert pifs == mark10_with_stress
def search(self, pif_query): """ Run a PIF query against Citrination. :param pif_query: :class:`.PifQuery` to execute. :return: :class:`.PifSearchResult` object with the results of the query. """ response = requests.post(self.pif_search_url, data=pif.dumps(pif_query), headers=self.headers) if response.status_code != requests.codes.ok: raise RuntimeError('Received ' + str(response.status_code) + ' response: ' + str(response.reason)) return PifSearchResult(**keys_to_snake_case(response.json()['results']))
def set_uids(pifs, uids=None): """ Set the uids in a PIF, explicitly if the list of UIDs is passed in :param pifs: to set UIDs in :param uids: to set; defaults to a hash of the object :return: """ if not uids: uids = [str(hash(dumps(x))) for x in pifs] for pif, uid in zip(pifs, uids): pif.uid = uid return pifs
def save_pif(pif_data, out_file=None, data_dir=os.getcwd()): """ Saves PIF data to disk as PIF JSON file format :param: pif_data - PIF System object :param: out_file - Output file name, will default to pif GDB9 id number :param: data_dir - Path to write output file, will default to current directory """ if not out_file: out_file = os.path.join(data_dir, '{}.json'.format(pif_data.ids[0].value)) with open(out_file, 'w') as fp: fp.write(pif.dumps(pif_data, indent=4))
def convert_from_tarfile(): # Create a temporary directory to save the files and cleanup when # finished with it temp_dir_name = '/tmp/' + str(uuid.uuid4()) os.makedirs(temp_dir_name) try: data = json.loads(request.get_data(as_text=True)) response = requests.get(data['url'], stream=True) filename = temp_dir_name + '/file_to_process' with open(filename, 'wb') as output: shutil.copyfileobj(response.raw, output) return pif.dumps({'system': tarfile_to_pif(filename, '/tmp/')}) finally: shutil.rmtree(temp_dir_name)
def uid(pifdata): """ Gets/sets the UID for the PIF-formatted data. Parameters ---------- :pifdata, PIF: PIF-formatted data read using pif.load. Returns ------- There are three anticipated use cases: # returns the UID pifmod -i INPUT.pif -o OUTPUT.pif uid # sets the UID to "jasdk-2132-asdfkasf". If UID already exists, # returns os._exit(0) if the UIDs match. pifmod -i INPUT.pif -o OUTPUT.pif uid jasdk-2132-asdfkasf # sets the UID to "jasdk-2132-asdfkasf", overwriting the UID # if it already exists. This can break things. pifmod -i INPUT.pif -o OUTPUT.pif uid -f jasdk-2132-asdfkasf For the first case, this returns the UID, if it exists, or None. For the second or third case, returns nothing. """ try: # handle cases two and three uidval = args.arglist[0] if check_tag(pifdata, 'uid'): # exit if UID already exists and force not specified # check if the UIDs match if not args.force: if pifdata.uid == uidval: os._exit(0) else: os._exit(1) pifdata.uid = uidval return '{}\n'.format(pif.dumps(pifdata)) except IndexError: # handle case one rval = getattr(pifdata, 'uid', None) rval = '' if rval is None else rval return '{}\n'.format(rval)
def from_pif_systems(cls, systems, expose_sub_systems=True): """ Constructor from a list of PIF System objects. Args: systems (list of System) A list of System instances. expose_sub_systems (bool) Whether to expose nested sub-Systems. Returns: (PifFrame) A DataFrame representation of those Systems. """ if expose_sub_systems: # flatten sub-Systems systems = flattened_sub_systems(systems) df_rows = [] for system in systems: system = json.loads(pif.dumps(system)) df_rows.append(DataFrame.from_dict([system], orient='columns')) df = concat(df_rows, axis=0, sort=False) return cls(df)
def predict(self, model_name, candidates): """ Predict endpoint :param model_name: The model path :param candidates: A list of candidates :return: list of predicted candidates as a map {property: [value, uncertainty]} """ # If a single candidate is passed, wrap in a list for the user if not isinstance(candidates, list): candidates = [candidates] url = self._get_predict_url(model_name) body = pif.dumps( {"predictionRequest": {"predictionSource": "scalar", "usePrior": True, "candidates": candidates}} ) response = requests.post(url, data=body, headers=self.headers) if response.status_code != requests.codes.ok: raise RuntimeError('Received ' + str(response.status_code) + ' response: ' + str(response.reason)) return response.json()
def pif_to_mdf_record(pif_obj, dataset_hit, mdf_acl): """Convert a PIF into partial MDF record""" res = {} res["mdf"] = _to_meta_data(pif_obj, dataset_hit, mdf_acl) res[res["mdf"]["source_name"]] = _to_user_defined(pif_obj) return dumps(res)
n_atoms = int(fh.readline().strip()) # first line is n_atoms comments = fh.readline().strip().split() # second line is comments molecule = [] for i in range(n_atoms): # next n_atom lines are element positions molecule.append(fh.readline().strip().replace('*^', 'e').split()) vibrations = [ # next line is vibration frequencies float(i) for i in fh.readline().strip().split()] smiles = fh.readline().strip().split() # next line is smiles strings inchl = fh.readline().strip().split() # next line is InChl strings return { 'n_atoms': n_atoms, 'comments': comments, 'molecule': molecule, 'vibrations': vibrations, 'smiles': smiles, 'inchl': inchl } if __name__ == '__main__': from pypif import pif # loads system from a file sys = XMolMolecularSystem.from_file( '../../data/data_subset/dsgdb9nsd_000116.xyz') print(sys) print(pif.dumps(sys))
def __ne__(self, other): if not isinstance(other, ReadView): return True return dumps(other.raw) != dumps(self.raw)
def property(pifdata): """ Gets/sets a property in the PIF-formatted data. There are several anticipated use cases: 1. pifmod -i material.pif property NAME # This returns the Property named NAME, if it exists. 2. pifmod -i material.pif property --units=mm NAME=VALUE # Create a *new* Property named NAME with value VALUE. # VALUE can take the form of a scalar (string or number); # a 1D list (vector), e.g. [1, 2, 3] or ['a', 'b', 'c']; or # a 2D list of lists (matrix), e.g. [[1, 2, 3], [4, 5, 6]]. # Note that this will not overwrite an existing property with # the same name. For that, see use case #3. Select Property # keywords may be passed as options, e.g. # --condition="KEY=VALUE[=UNITS]" # Creates a new Value object named KEY with value # VALUE and optional UNITS. Multiple conditions may be # specified. # --data-type={MACHINE_LEARNING|COMPUTATIONAL|EXPERIMENTAL} # Sets the type of data the makes up this Property. # START HERE -- describe how to parse name: last,first=email then # -- implement # --contact="NAME[=EMAIL]" # Person to contact, with optional email, for more # information on this property. Multiple contacts may be # specified. NAME can be "GIVEN FAMILY" or "GIVEN", but # should not include prefixes, suffixes, etc. GIVEN names # with spaces will not parse correctly. # --tag="TAG" # Any string TAG stored alongside the data. Multiple tags # may be specified. 3. pifmod -i material.pif property -f --units=mm PROPERTY=VALUE # As above, but if the property already exists, it will be # overwritten. 4. pifmod -i material.pif property [-f] PIF NAME # Extracts a property from the PIF-formatted file PIF named # NAME and adds it to material.pif. If the force flag (-f) is # specified, an existing property will be overwritten. 5. pifmod -i material.pif property --list [PIF] # Lists the property names in material.pif and, if specified, # in the PIF-formatted PIF file as well. Note that this only # lists the keys and not the values. Parameters ---------- :pifdata, PIF: PIF-formatted data. Returns ------- Use case: 1. Matching property as a PIF Property 2-4. pifdata string with Property added as appropriate. """ def get_property(plist, name, case_sensitive=False): """Returns the first property whose name matches NAME.""" #plist = pdata.properties if pdata.properties else [] name = name if case_sensitive else name.lower() for prop in plist: nom = prop.name if case_sensitive else prop.name.lower() if nom == name: return prop return None def property_adder(plist, prop_exists): """ Generator that abstracts away how new properties are added to an existing property list. """ def no_conflict(padd): plist.append(padd) def conflict(padd): if args.force: for i,entry in enumerate(plist): if entry.name == padd.name: plist[i] = padd return else: # Do not overwrite. msg = 'A property named "{}" already exists.'.format(padd.name) raise EntryExistsError(msg) return conflict if prop_exists else no_conflict def infer_value(vstr): """ Infer whether the value string is a scalar, vector, or matrix. scalars: single value vectors: one or more scalars in square brackets, e.g. [1, 2, 3] matrices: vector of vectors (row-dominant). Returns ------- (value type, value) """ # ensure all character strings are quoted, otherwise they will # be treated as variables and raise an Exception quoteRE = re.compile(r"""(\b[a-zA-Z_]\w*)""") vstr = re.sub(quoteRE, r'"\1"', str(vstr)) # do not use the built in eval as this is a huge security # vulnerability. ast.literal_eval only allows evaluation to # basic types, lists, tuples, dicts and None. value = ast.literal_eval(vstr) if not isinstance(value, list): # is value a scalar? return ('scalars', value) elif not isinstance(value[0], list): # is value a list of scalars, e.g. vector? return ('vectors', value) else: # value must be a vector of vectors return ('matrices', value) def parse_units(units): return units def parse_condition(cond): kwds = {} if cond is None: return None try: k,v,u = cond.strip().split('=') kwds['units'] = u except ValueError: k,v = cond.strip().split('=') vtype, v = infer_value(v) kwds['name'] = k kwds[vtype] = v return Value(**kwds) def parse_data_type(dtype): allowedRE = re.compile(r'(MACHINE_LEARNING|COMPUTATIONAL|EXPERIMENTAL)', re.IGNORECASE) if dtype is None: return None elif re.match(allowedRE, dtype): return dtype else: msg = 'Data type must be one of: MACHINE_LEARNING, ' \ 'COMPUTATIONAL, or EXPERIMENTAL.' UnrecognizedOptionValueError(msg) def parse_contact(cont): kwds = {} if cont is None: return None # split name and email try: name,email = cont.strip().split(',') kwds['email'] = email.strip() except ValueError: name = cont.strip() # split given name from family name try: name = name.split() given = name[0] family = ' '.join(name[1:]) except ValueError: given = name.strip() family = None kwds['given'] = given kwds['family'] = family return Person(**kwds) def parse_tag(tag): return tag def parse_json(filename): # which key/keys are equivalent to scalar values? valueRE = re.compile(r'value.*?\b', re.IGNORECASE) # TODO: which key/keys are equivalent to vectors? matrices? # load the json file with open(filename) as ifs: jdata = json.load(ifs) # create an empty list of properties props = [] for k,v in iter(jdata.items()): try: # if v is a dictionary, i.e. has # [scalar/vector/matrix equivalent][, units[, ...]] # create a Property from this data. for key in v.keys(): if re.match(valueRE, key): vtype, val = infer_value(v[key]) v[vtype] = v[key] del v[key] prop = Property(k, **v) except AttributeError: # list, scalar, etc. -- something that doesn't have the # map defining the characteristics of the entry. vtype, val = infer_value(v) kwds = { vtype : val } prop = Property(k, **kwds) props.append(prop) return props # parse command line options if pifdata.properties is None: pifdata.properties = [] proplist = pifdata.properties # only list the available property names if args.list: ostream = StringIO() ostream.write("PIF data\n") for prop in proplist: ostream.write("- {}\n".format(prop.name)) try: properties = args.arglist[0] properties = parse_json(properties) ostream.write("{}\n".format(args.arglist[0])) for prop in properties: ostream.write("- {}\n".format(prop.name)) except IndexError: pass result = ostream.getvalue() ostream.close() return result nargs = len(args.arglist) # were any other parameters specified? kwds = {} units = parse_units(args.units) if units is not None: kwds['units'] = units conditions = [parse_condition(c) for c in args.conditions] if conditions != [] : kwds['conditions'] = conditions dataType = parse_data_type(args.datatype) if dataType is not None: kwds['data_type'] = dataType contacts = [parse_contact(c) for c in args.contacts] if contacts != [] : kwds['contacts'] = contacts tags = [parse_tag(t) for t in args.tags] if tags != []: kwds['tags'] = tags # This is longer than it should be, and could use some refactoring. # But the logic is this: # 1. If only one argument was given, we are getting or setting a property # PROPERTY --> getting, PROPERTY=VALUE --> setting # 2. If two arguments were given, we are getting a property from a file. if nargs == 1: kv = args.arglist[0] # are we getting/setting a property from the command line? kv = kv.strip() # are we setting or getting? try: # setting: NAME=VALUE k,v = kv.split('=') except ValueError: # getting: NAME k,v = kv, None if v is not None: # setting a property # does the property exist? prop = get_property(proplist, k, case_sensitive=False) vtype,v = infer_value(v) # scalar, vector, matrix # construct the arguments to Property... # ... name kwds['name'] = k # ... scalars, vectors, or matrices, as appropriate kwds[vtype] = v # add the property to pifdata newprop = Property(**kwds) property_adder(proplist, prop_exists=(prop is not None))(newprop) return '{}'.format(pif.dumps(pifdata)) else: prop = get_property(proplist, k, case_sensitive=False) return pif.dumps(prop) elif nargs == 2: # reading a property from a json-formatted source file ifile, propname = args.arglist # get desired property from input file dst = get_property(proplist, propname, case_sensitive=False) # parse the properties present in the source file props = parse_json(ifile) src = get_property(props, propname, case_sensitive=False) for k,v in iter(kwds.items()): setattr(src, k, v) if src is None: msg = '{} was not found in {}.'.format(propname, ifile) raise ValueError(msg) # create a function to add the property property_adder(proplist, prop_exists=(dst is not None))(src) return '{}'.format(pif.dumps(pifdata)) else: # should never get here if the parser custom action did its job. msg = "If you're seeing this, the custom parser didn't do its job." raise RuntimeError(msg)
def parse_template(defect_template): """ Main parsing function. Produces pifs. Args: defect_template (closed_file): closed csv file containing user submitted template Returns: sytems (pifs): list of pifs created from template """ systems = [] atoms = [] corners = [] enthalpies = [] entries = get_values(defect_template) band_gap = float(entries['bg']) for k, v in entries.items(): if len(k.split("-")) > 1: if k.split("-")[0] in elements_dict.values() and k.split("-")[0] not in atoms: atoms.append(k.split("-")[0]) if k.split("-")[1].isdigit() and k.split("-")[1] not in corners: corners.append(k.split("-")[1]) for corner in corners: enthaplies_at_corner = {} for atom in atoms: enthaplies_at_corner[atom] = entries[atom+"-"+corner] enthalpies.append(enthaplies_at_corner) count = 1 print("NUMBER OF CORNERS: ", len(enthalpies)) for corner in enthalpies: print("\n=====") system = ChemicalSystem() system.chemical_formula = "".join(atoms) system.properties = [] system.ids = [] system.ids.append(Id(name="Corner", value=count)) system.ids.append(Id(name="Corner", value=max(corner.items(), key=operator.itemgetter(1))[0]+"-rich")) count += 1 print("CORNER:", corner, pif.dumps(system.ids)) # initialize dict. k=defect, v=list of energy values for that defect len == number of charges that defect can take unique_defects = {} for k, v in entries.items(): if len(k.split("_")) > 3: defect_type = k.split("_")[0] site = k.split("_")[1] charge = k.split("_")[2] index = k.split("_")[3] y1_enthalpy_at_0 = float(calc_defect_enthalpy(corner, v, defect_type, site)) y2_enthalpy_at_ef = round(float(charge)*float(band_gap)+float(y1_enthalpy_at_0), 4) try: unique_defects[defect_type+"_"+site].append([[0, y1_enthalpy_at_0], [band_gap, y2_enthalpy_at_ef]]) except KeyError: unique_defects[defect_type+"_"+site] = [[[0, y1_enthalpy_at_0], [band_gap, y2_enthalpy_at_ef]]] print("Defect key: ", k, " Enthalpy at x = 0: ", y1_enthalpy_at_0, " Enthalpy at x = band gap: ", y2_enthalpy_at_ef) # create properties and append to system system.properties.append(Property(name="$\Delta$H", scalars=[y1_enthalpy_at_0, y2_enthalpy_at_ef], conditions=[Value(name="E$_F$", scalars=[0, band_gap])])) defect_enthalpy_prop = Property(name="Defect Enthalpy", scalars=y1_enthalpy_at_0) defect_enthalpy_prop.conditions = [] defect_enthalpy_prop.conditions.append(Value(name="Defect type", scalars=defect_type)) defect_enthalpy_prop.conditions.append(Value(name="Defect site", scalars=site)) defect_enthalpy_prop.conditions.append(Value(name="Defect charge", scalars=charge)) defect_enthalpy_prop.conditions.append(Value(name="Defect index", scalars=index)) defect_enthalpy_prop.conditions.append(Value(name="Defect label", scalars=k)) system.properties.append(defect_enthalpy_prop) # calc intersection points for overlapping lines for k, v in unique_defects.items(): print("\n-----CALCULATING INTERSECTION POINTS-----") print("Defect: ", k, " Number of charge states: ", len(v)) print("Defect curves: ", v) if len(v) >= 2: intersection_points = calculate_intersect_points(v) print("INTERSECTION POINTS: ", intersection_points) low_energy_line = find_min_energy_overlap(v, intersection_points) print("LOWEST ENERGY LINE: ", low_energy_line) system.properties.append(Property(name="$\Delta$H_2", scalars=low_energy_line[0], conditions=[Value(name="E$_F$_2", scalars=low_energy_line[1])])) else: print("LOWEST ENERGY LINE: ", [[v[0][0][0], v[0][1][0]], [v[0][0][1], v[0][1][1]]]) system.properties.append(Property(name="$\Delta$H_2", scalars=[v[0][0][1], v[0][1][1]], conditions=[Value(name="E$_F$_2", scalars=[v[0][0][0], v[0][1][0]])])) systems.append(system) print("=====") return systems
def update(first, second, extend=False): old_d = first.as_dictionary() new_d = second.as_dictionary() _deep_update(old_d, new_d, extend) return loads(dumps(old_d))
# chemical_system = ChemicalSystem() # chemical_system.chemical_formula = 'A_30(A\')_2 B_70(B\')_2 C_30(C\')_2' # resolution = Property() # resolution.name = 'NPW' # resolution.vectors = [32,32,32] # FreeEnergy = Property() # FreeEnergy.name = 'Free Energy' # FreeEnergy.scaler = 3.2 chemical_system.properties = [resolution,FreeEnergy] test = pif.dumps(chemical_system, indent=4) for file in listoffiles: start = time.time() op = open(file,'r') text = op.read().splitlines() op.close() DataDictionary = OutputParser(text) chemical_system = ChemicalSystem()
def test_basic_round_robin(): pif = System() pif.uid = "foo" pif2 = loads(dumps(pif)) assert pif2.uid == pif.uid
def update_pif(old, new): old_d = old.as_dictionary() new_d = new.as_dictionary() _deep_update(old_d, new_d) return pif.loads(pif.dumps(old_d))
def dict_to_pif(Dictionary): chemical_system = ChemicalSystem() #START to Create Objects chemical_system.chemical_formula = 'N_{sc}='+str(Dictionary['NSC'])+'-f_{A/C}='+str(Dictionary['fAfC'])+'_'+Dictionary['PHASE'] SMILES = Property() SMILES.name = 'SMILES' SMILES.scaler = Dictionary['SMILES'] SMILES.datatype="Computational" RESOLUTION = Property() RESOLUTION.name = 'Number of Planewaves' RESOLUTION.vectors = Dictionary['NPW'].tolist() RESOLUTION.datatype="Computational" NSC = Property() NSC.name = 'Side Chain Length' if len(Dictionary['ARCH'])==1: NSC.scaler = 0 else: NSC.scaler = Dictionary['ARCH'][1][3][0] NSC.datatype="Computational" CS = Property() CS.name = 'Chain Statistics' CS.scaler = Dictionary['ARCH'][0][0] CS.datatype="Computational" SPACEGroup = Property() SPACEGroup.name = 'Space Group' SPACEGroup.scaler = Dictionary['Space_Group'] SPACEGroup.datatype="Computational" FreeEnergy = Property() FreeEnergy.name = 'Free Energy' FreeEnergy.scaler = Dictionary['FREE_ENERGY'] FreeEnergy.datatype="Computational" B = Property() B.name = 'Statistical Segment Length' B.vectors = Dictionary['ARCH'][0][2].tolist() B.datatype="Computational" B.units = 'b/sqrt(6)' CHI = Property() CHI.name = 'Flory-Huggin Interaction Matrix' CHI.matrices = Dictionary['CHI'].tolist() CHI.datatype="Computational" CELL = Property() CELL.name = 'Cell Tensor' CELL.matrices = Dictionary['CELL'].tolist() CELL.units = 'b/sqrt(6)' CELL.datatype="Computational" fAfC = Property() fAfC.name = 'Volume Fraction of A/C' fAfC.scaler = Dictionary['fAfC'] fAfC.datatype="Computational" PHASE = Property() PHASE.name = 'Phase' PHASE.scaler = Dictionary['PHASE'] PHASE.datatype="Computational" REFERENCE = Reference(Dictionary['DOI']) PERSON = Person(Dictionary['NAME'],Dictionary['EMAIL']) chemical_system.classifications = Dictionary['CLASSIFICATION'] chemical_system.properties = [SMILES,PHASE, RESOLUTION,FreeEnergy,CS,SPACEGroup,B,FreeEnergy,CHI,CELL,fAfC,NSC] chemical_system.references = REFERENCE chemical_system.person = PERSON return pif.dumps(chemical_system, indent=4)