def write(self, filepath): """ Converts `blocks` to a gemmi Document and writes to a starfile at the given filepath. """ # create an empty Document _doc = cif.Document() filepath = str(filepath) for name, block in self.blocks.items(): # construct new empty block _block = _doc.add_new_block(name) # if this block (loop or pair) is empty, continue if len(block) == 0: continue # are we constructing a loop (DataFrame) or a pair (Dictionary)? if isinstance(block, dict): for key, value in block.items(): # simply assign one pair item for each dict entry # write out as str because we do not want type conversion _block.set_pair(key, str(value)) elif isinstance(block, pd.DataFrame): # initialize loop with column names _loop = _block.init_loop("", list(block.columns)) for row in block.values.tolist(): # write out as str because we do not want type conversion row = [str(row[x]) for x in range(len(row))] _loop.add_row(row) else: raise StarFileError( f"Unsupported type for block {name}: {type(block)}") _doc.write_file(filepath)
def convert_new_to_old(dataframe_, optics_group, filename, magnification='100000'): if optics_group == {}: print('File is already in Relion 3.0 format. No conversion needed!') quit() # change the Origin from Angstoms to pixels dataframe_['_rlnOriginXAngst'] = dataframe_['_rlnOriginXAngst'].astype( float) / optics_group['_rlnImagePixelSize'].astype(float) dataframe_['_rlnOriginYAngst'] = dataframe_['_rlnOriginYAngst'].astype( float) / optics_group['_rlnImagePixelSize'].astype(float) dataframe_ = dataframe_.rename(columns={ '_rlnOriginXAngst': '_rlnOriginX', '_rlnOriginYAngst': '_rlnOriginY' }) # add columns which are in the optics group dataframe_['_rlnVoltage'] = np.zeros( dataframe_.shape[0]) + optics_group['_rlnVoltage'].astype(float) dataframe_['_rlnSphericalAberration'] = np.zeros( dataframe_.shape[0]) + optics_group['_rlnSphericalAberration'].astype( float) dataframe_['_rlnDetectorPixelSize'] = np.zeros( dataframe_.shape[0]) + optics_group['_rlnImagePixelSize'].astype(float) dataframe_['_rlnMagnification'] = np.zeros( dataframe_.shape[0]) + int(magnification) dataframe_['_rlnSphericalAberration'] = np.zeros( dataframe_.shape[0]) + optics_group['_rlnSphericalAberration'].astype( float) # remove non used columns for tag in ['_rlnOpticsGroup', '_rlnHelicalTrackLengthAngst']: try: dataframe_ = dataframe_.drop(columns=[tag]) except: pass # Row number is required for the column names column_names = dataframe_.columns column_names_to_star = [] for n, name in enumerate(column_names): column_names_to_star.append(name + ' #{}'.format(n + 1)) out_doc = cif.Document() out_particles = out_doc.add_new_block('', pos=-1) loop = out_particles.init_loop('', column_names_to_star) # to save cif all list values must be str data_rows = dataframe_.to_numpy().astype(str).tolist() for row in data_rows: loop.add_row(row) out_name = filename.replace('.star', '_v30.star') out_doc.write_file(out_name) print('File "{}" saved.'.format(out_name))
def test_set_mmcif_category(self): doc = cif.Document() block = doc.add_new_block('b') block.set_mmcif_category('_c', { 'one': ('?', 'ab', ';text field\n;'), 'two': [-1, 4. / 3, '"double quoted"'] }, raw=True) self.assertEqual(block.find_values('_c.one')[0], '?') self.assertEqual(block.find_values('_c.one').str(1), 'ab') self.assertEqual(block.find_values('_c.one').str(2), 'text field') self.assertEqual(block.find_values('_c.two').str(0), '-1') self.assertEqual(block.find_values('_c.two').str(2), 'double quoted') block.set_mmcif_category('_d', { 'one': (None, 'a b', 'text\nfield'), 'two': [-1, '?', False] }) def check_d(): self.assertEqual(block.find_values('_d.one')[0], '?') self.assertEqual(block.find_values('_d.one').str(1), 'a b') self.assertEqual(block.find_values('_d.one').str(2), 'text\nfield') self.assertEqual(block.find_values('_d.one')[2], ';text\nfield\n;') self.assertEqual(block.find_values('_d.two').str(0), '-1') self.assertEqual(block.find_values('_d.two').str(1), '?') self.assertEqual(block.find_values('_d.two')[2], '.') check_d() block.set_mmcif_category('_d', { 'one': ('?', "'a b'", ';text\nfield\n;'), 'two': ['-1', "'?'", '.'] }, raw=True) check_d() block.set_mmcif_category('_d', { 'one': (None, "'a b'", ';text\nfield\n;'), 'two': [-1, "'?'", False] }, raw=True) check_d() block.set_mmcif_category('_d', block.get_mmcif_category('_d')) check_d() block.set_mmcif_category('_d', block.get_mmcif_category('_d', raw=True), raw=True) check_d()
def save_star(dataframe_, filename='out.star'): out_doc = cif.Document() out_particles = out_doc.add_new_block('particles', pos=-1) # Row number is required for the column names to save the STAR file e.g. _rlnNrOfSignificantSamples #33 column_names = dataframe_.columns column_names_to_star = [] for n, name in enumerate(column_names): column_names_to_star.append(name + ' #{}'.format(n + 1)) loop = out_particles.init_loop('', column_names_to_star) data_rows = dataframe_.to_numpy().astype(str).tolist() for row in data_rows: loop.add_row(row) out_doc.write_file(filename) print('File "{}" saved.'.format(filename))
def export_property_template(self, filename: str = '') -> None: """ Exports the currently selected property entry to a file. """ selected_row_text = self.app.ui.PropertiesTemplatesListWidget.currentIndex( ).data() if not selected_row_text: return prop_data = self.settings.load_settings_list('property', selected_row_text) table_data = [] cif_key = '' if prop_data: cif_key = prop_data[0] with suppress(Exception): table_data = prop_data[1] if not cif_key: return doc = cif.Document() blockname = '__'.join(selected_row_text.split()) block = doc.add_new_block(blockname) try: loop = block.init_loop(cif_key, ['']) except RuntimeError: # Not a valid loop key show_general_warning( '"{}" is not a valid cif keyword.'.format(cif_key)) return for value in table_data: if value: loop.add_row([cif.quote(utf8_to_str(value))]) if not filename: filename = cif_file_save_dialog( blockname.replace('__', '_') + '.cif') if not filename.strip(): return try: doc.write_file(filename, style=cif.Style.Indent35) # Path(filename).write_text(doc.as_string(cif.Style.Indent35)) except PermissionError: if Path(filename).is_dir(): return show_general_warning('No permission to write file to {}'.format( Path(filename).resolve()))
def save_star_31(dataframe_optics, dataframe_particles, filename='out.star'): # For now only Relion star 3.1+ can be saved as 3.1 star. Adding optics will be implemented later. out_doc = cif.Document() out_particles = out_doc.add_new_block('optics', pos=-1) # Row number is required for the column names to save the STAR file e.g. _rlnNrOfSignificantSamples #33 dataframe_optics = pd.DataFrame.from_dict(dataframe_optics) column_names = dataframe_optics.columns column_names_to_star = [] for n, name in enumerate(column_names): column_names_to_star.append(name + ' #{}'.format(n + 1)) loop = out_particles.init_loop('', column_names_to_star) data_rows = dataframe_optics.to_numpy().astype(str).tolist() # save optics loop for row in data_rows: loop.add_row(row) out_particles = out_doc.add_new_block('particles', pos=-1) column_names = dataframe_particles.columns column_names_to_star = [] for n, name in enumerate(column_names): column_names_to_star.append(name + ' #{}'.format(n + 1)) loop = out_particles.init_loop('', column_names_to_star) data_rows = dataframe_particles.to_numpy().astype(str).tolist() # save particles loop for row in data_rows: loop.add_row(row) out_doc.write_file(filename) print('File "{}" saved.'.format(filename))
#!/usr/bin/env python # This example shows how to put pythonic data structure into an mmCIF file. # Two paths are required as arguments: input (mmJSON file) and output. from __future__ import print_function import json import sys from gemmi import cif file_in, file_out = sys.argv[1:] with open(file_in) as f: json_data = json.load(f) assert len(json_data) == 1 # data_1ABC (block_name, block_data), = json_data.items() assert block_name.startswith('data_') # Now block_data is a dictionary that maps category names to dictionaries # that in turn map column names to lists with values. doc = cif.Document() block = doc.add_new_block(block_name[5:]) for cat, data in block_data.items(): block.set_mmcif_category('_' + cat, data) doc.write_file(file_out)
elif filetype == "BEL-csv_JIS": from parsers import BEL_csv_JIS data_meta, data_ads, data_des = BEL_csv_JIS.parse(filename) elif filetype == "quantachrome": from parsers import quantachrome data_meta, data_ads, data_des = quantachrome.parse(filename) elif filetype == "micromeritics": from parsers import micromeritics data_meta, data_ads, data_des = micromeritics.parse(filename) else: raise Exception("This file type is unknown or currently not supported.") # write adsorption file # initialize aif block d = cif.Document() d.add_new_block('data_raw2aifv005') block = d.sole_block() # write metadata if data_meta["user"] == '': block.set_pair('_exptl_operator', 'unknown') else: block.set_pair('_exptl_operator', "'" + data_meta["user"] + "'") block.set_pair('_exptl_date', data_meta["date"]) block.set_pair('_exptl_instrument', "'" + data_meta["apparatus"] + "'") block.set_pair('_exptl_adsorptive', data_meta["adsorbate"]) block.set_pair('_exptl_temperature', str(data_meta["temperature"])) block.set_pair('_exptl_sample_mass', str(data_meta["mass"]))
def json2aif(json_dict): """Convert NIST json_dict to AIF""" # pylint: disable-msg=too-many-branches # initialize aif block d = cif.Document() d.add_new_block(json_dict['filename']) #fix this block = d.sole_block() for inkey in json_dict: if inkey != 'isotherm_data': outkey, _ = crossreference_keys(equivalency_table, inkey, informat='JSON') if json_dict[inkey] == '': #Ignore blank keys continue if '_unsupported_' in outkey: # ignore unknown datanames from JSON format? continue if inkey == 'adsorbates': # Temporary kludge for adsorptives if len(json_dict[inkey]) == 1: outkey = '_exptl_adsorptive' outstring = json_dict[inkey][0]['name'] block.set_pair(outkey, outstring) else: raise Exception( 'This script is only for pure component adsorption right now' ) elif isinstance(json_dict[inkey], (str, float, int)): block.set_pair(outkey, str(json_dict[inkey])) elif isinstance(json_dict[inkey], dict): # Temporary kludge for adsorbents if 'name' in json_dict[inkey]: block.set_pair(outkey, str(json_dict[inkey]['name'])) block.set_pair('_sample_id', str(json_dict[inkey]['hashkey'])) else: print(inkey, json_dict[inkey], outkey) raise Exception('Script unable to handle this key set') # Measurements # Default to adsorption branch, state as desorption ONLY if specified pressure_adsorp = [] amount_adsorp = [] pressure_desorp = [] amount_desorp = [] for point in json_dict['isotherm_data']: if 'branch' in point: if point['branch'] == 'adsorp': pressure_adsorp.append(point['pressure']) amount_adsorp.append(point['species_data'][0]['adsorption']) elif point['branch'] == 'desorp': pressure_desorp.append(point['pressure']) amount_desorp.append(point['species_data'][0]['adsorption']) else: raise Exception('ERROR: unknown branch type:', point['branch']) else: #default=adsorp pressure_adsorp.append(point['pressure']) amount_adsorp.append(point['species_data'][0]['adsorption']) loop_ads = block.init_loop('_adsorp_', ['pressure', 'amount']) loop_ads.set_all_values([ list(np.array(pressure_adsorp).astype(str)), list(np.array(amount_adsorp).astype(str)) ]) if len(pressure_desorp) != 0: loop_ads = block.init_loop('_desorp_', ['pressure', 'amount']) loop_ads.set_all_values([ list(np.array(pressure_desorp).astype(str)), list(np.array(amount_desorp).astype(str)) ]) return d
def convert(filename, material_id, filetype): if filetype == "BELSORP-max": from parsers import BEL data_meta, data_ads, data_des = BEL.parse(filename) elif filetype == "BEL-csv": from parsers import BEL_csv data_meta, data_ads, data_des = BEL_csv.parse(filename) elif filetype == "BEL-csv_JIS": from parsers import BEL_csv_JIS data_meta, data_ads, data_des = BEL_csv_JIS.parse(filename) elif filetype == "quantachrome": from parsers import quantachrome data_meta, data_ads, data_des = quantachrome.parse(filename) elif filetype == "micromeritics": from parsers import micromeritics data_meta, data_ads, data_des = micromeritics.parse(filename) else: raise Exception("This file type is unknown or currently not supported.") # write adsorption file # initialize aif block d = cif.Document() d.add_new_block('data_raw2aifv005') block = d.sole_block() # write metadata if data_meta["user"] == '': block.set_pair('_exptl_operator', 'unknown') else: block.set_pair('_exptl_operator', "'"+data_meta["user"]+"'") block.set_pair('_exptl_date', data_meta["date"]) block.set_pair('_exptl_instrument', "'" + data_meta["apparatus"] + "'") block.set_pair('_exptl_adsorptive', data_meta["adsorbate"]) block.set_pair('_exptl_temperature', str(data_meta["temperature"])) block.set_pair('_exptl_sample_mass', str(data_meta["mass"])) block.set_pair('_sample_id', "'" + data_meta["sample_id"] + "'") block.set_pair('_sample_material_id', "'" + material_id + "'") block.set_pair('_units_temperature', data_meta["temperature_unit"]) block.set_pair('_units_pressure', data_meta["pressure_unit"]) block.set_pair('_units_mass', data_meta["adsorbent_unit"]) block.set_pair('_units_loading',"'"+data_meta["loading_unit"]+"'") # write adsorption data loop_ads = block.init_loop('_adsorp_', ['pressure', 'p0', 'amount']) loop_ads.set_all_values([ list(data_ads['pressure'].astype(str)), list(data_ads['pressure_saturation'].astype(str)), list(data_ads['loading'].astype(str)) ]) # write desorption data if len(data_des > 0): loop_des = block.init_loop('_desorp_', ['pressure', 'p0', 'amount']) loop_des.set_all_values([ list(data_des['pressure'].astype(str)), list(data_des['pressure_saturation'].astype(str)), list(data_des['loading'].astype(str)) ]) outputfilename = os.path.splitext(filename)[0]+'.aif' print (f'Writing output to {outputfilename}') d.write_file(outputfilename)
def makeAIF(data_meta, data_ads, data_des, material_id, filename): # initialize aif block d = cif.Document() d.add_new_block('data_raw2aifv006') block = d.sole_block() # write metadata if data_meta["user"] == '': block.set_pair('_exptl_operator', 'unknown') else: block.set_pair('_exptl_operator', "'" + data_meta["user"] + "'") block.set_pair('_exptl_date', data_meta["date"]) if "apparatus" not in data_meta: block.set_pair('_exptl_instrument', 'unknown') else: block.set_pair('_exptl_instrument', "'" + data_meta["apparatus"] + "'") block.set_pair('_exptl_adsorptive', data_meta["adsorbate"]) block.set_pair('_exptl_temperature', str(data_meta["temperature"])) block.set_pair('_exptl_sample_mass', str(data_meta["mass"])) block.set_pair('_sample_id', "'" + data_meta["sample_id"] + "'") block.set_pair('_sample_material_id', "'" + material_id + "'") block.set_pair('_units_temperature', data_meta["temperature_unit"]) block.set_pair('_units_pressure', data_meta["pressure_unit"]) block.set_pair('_units_mass', data_meta["adsorbent_unit"]) block.set_pair('_units_loading', "'" + data_meta["loading_unit"] + "'") #check if saturation pressure is for every point if 'pressure_saturation' in data_ads: # write adsorption data loop_ads = block.init_loop('_adsorp_', ['pressure', 'p0', 'amount']) loop_ads.set_all_values([ list(data_ads['pressure'].astype(str)), list(data_ads['pressure_saturation'].astype(str)), list(data_ads['loading'].astype(str)) ]) # write desorption data if len(data_des > 0): loop_des = block.init_loop('_desorp_', ['pressure', 'p0', 'amount']) loop_des.set_all_values([ list(data_des['pressure'].astype(str)), list(data_des['pressure_saturation'].astype(str)), list(data_des['loading'].astype(str)) ]) elif len(list(data_meta['pressure_saturation'])) == 1: block.set_pair('_exptl_p0', str(data_meta["pressure_saturation"][0])) # write adsorption data loop_ads = block.init_loop('_adsorp_', ['pressure', 'amount']) loop_ads.set_all_values([ list(data_ads['pressure'].astype(str)), list(data_ads['loading'].astype(str)) ]) # write desorption data if len(data_des > 0): loop_des = block.init_loop('_desorp_', ['pressure', 'amount']) loop_des.set_all_values([ list(data_des['pressure'].astype(str)), list(data_des['loading'].astype(str)) ]) outputfilename = os.path.splitext(filename)[0] + '.aif' d.write_file(outputfilename) return
#!/usr/bin/env python import sys from gemmi import cif greeted = set() for path in sys.argv[1:]: try: doc = cif.Document(path) # copy all the data from mmCIF file block = doc.sole_block() # mmCIF has exactly one block for s in block.find_loop("_atom_site.type_symbol"): if s not in greeted: print("Hello " + s) greeted.add(s) except Exception as e: print("Oops. %s" % e) sys.exit(1)