def test_misc(self): cif_doc = cif.read_file(self.basename + '.cif') json_str = cif_doc.as_json() json_from_cif = json.loads(json_str) with io.open(self.basename + '.json', encoding='utf-8') as f: reference_json = json.load(f) self.assertEqual(json_from_cif, reference_json)
def import_equipment_from_file(self, filename='') -> None: """ Import an equipment entry from a cif file. """ if not filename: filename = cif_file_open_dialog( filter="CIF file (*.cif *.cif_od *.cfx)") if not filename: print('No file given') return try: doc = cif.read_file(filename) except RuntimeError as e: show_general_warning(str(e)) return block = doc.sole_block() table_data = [] for item in block: if item.pair is not None: key, value = item.pair if filename.endswith( '.cif_od') and key not in include_equipment_imports: continue table_data.append([ key, retranslate_delimiter( cif.as_string(value).strip('\n\r ;')) ]) if filename.endswith('.cif_od'): name = Path(filename).stem else: name = block.name.replace('__', ' ') self.settings.save_settings_list('equipment', name, table_data) self.show_equipment()
def parse_star_selected_columns(file_path, col1_name, col2_name): doc = cif.read_file(file_path) optics_data = {} # 3.1 star files have two data blocks Optics and particles _new_star_ = True if len(doc) == 2 else False if _new_star_: print('Found Relion 3.1+ star file.') optics = doc[0] particles = doc[1] for item in optics: for optics_metadata in item.loop.tags: value = optics.find_loop(optics_metadata) optics_data[optics_metadata] = np.array(value)[0] else: print('Found Relion 3.0 star file.') particles = doc[0] particles_data = pd.DataFrame() print('Reading star file:') for particle_metadata in [col1_name, col2_name]: loop = particles.find_loop(particle_metadata) particles_data[particle_metadata] = np.array(loop) return optics_data, particles_data
def parse_star_model(file_path, loop_name): doc = cif.read_file(file_path) # block 1 is the per class information loop = doc[1].find_loop(loop_name) class_data = np.array(loop) return class_data
def cif_to_dict(cif_file: str, mmjson: bool = False) -> tp.Generator: """Convert cif file to a dictionary.""" cif_path = Path(cif_file) doc = cif.read_file(str(cif_path)) dct: dict = json.loads(doc.as_json(mmjson=mmjson)) if not mmjson: for block_name, block_dct in dct.items(): block_dct['name'] = block_name block_dct['cif_file'] = str(cif_path.absolute()) yield block_dct else: yield dct
def parse_mmcif(self, fileName): """parse the mmcif and return a dictionary file""" # from http://gemmi.readthedocs.io/en/latest/cif-parser.html#python-module if fileName and os.path.exists(fileName): try: self.cifObj = cif.read_file( fileName) # copy all the data from mmCIF file if self.cifObj: return True except Exception as e: logging.error(e) return False
def parse_mmcif(self): """parse the mmcif and return a dictionary file""" # from http://gemmi.readthedocs.io/en/latest/cif-parser.html#python-module if self.f and os.path.exists(self.f): try: self.cifObj = cif.read_file(self.f) # copy all the data from mmCIF file if self.cifObj: # self.getDataBlockWithMostCat() self.getDataBlockWithAtomSite() # self.getDatablock() return True except Exception as e: logging.error(e) return False
def import_author(self, filename=''): """ Import an author from a cif file. """ cif_auth_to_str = { '_publ_contact_author_name': 'name', '_publ_contact_author_address': 'address', '_publ_contact_author_email': 'email', '_publ_contact_author_phone': 'phone', '_publ_contact_author_id_orcid': 'orcid', # '_publ_author_name': 'name', '_publ_author_address': 'address', '_publ_author_email': 'email', '_publ_author_phone': 'phone', '_publ_author_id_orcid': 'orcid', '_publ_author_footnote': 'footnote', } if not filename: filename = cif_file_open_dialog(filter="CIF file (*.cif)") if not filename: return try: doc = read_file(filename) except RuntimeError as e: show_general_warning(str(e)) return block = doc.sole_block() table_data = {} for item in block: if item.pair is not None: key, value = item.pair if key not in cif_auth_to_str: continue key = cif_auth_to_str.get(key) table_data.update({ key: retranslate_delimiter(as_string(value).strip('\n\r ;')) }) name = block.name.replace('__', ' ') if 'contact author' in name: table_data.update({'contact': True}) if not table_data.get('name'): return None self.general_author_save(table_data) self.show_author_loops()
def import_property_from_file(self, filename: str = '') -> None: """ Imports a cif file as entry of the property templates list. """ if not filename: filename = cif_file_open_dialog(filter="CIF file (*.cif)") if not filename: return try: doc = cif.read_file(filename) except RuntimeError as e: show_general_warning(str(e)) return property_list = self.settings.settings.value('property_list') if not property_list: property_list = [''] block = doc.sole_block() template_list = [] loop_column_name = '' for i in block: if i.loop is not None: if len(i.loop.tags) > 0: loop_column_name = i.loop.tags[0] for n in range(i.loop.length()): value = i.loop.val(n, 0) template_list.append( retranslate_delimiter( cif.as_string(value).strip("\n\r ;"))) block_name = block.name.replace('__', ' ') # This is the list shown in the Main menu: property_list.append(block_name) table = self.app.ui.PropertiesEditTableWidget table.setRowCount(0) self.app.ui.cifKeywordLineEdit.setText(loop_column_name) newlist = [x for x in list(set(property_list)) if x] newlist.sort() # this list keeps track of the property items: self.settings.save_template_list('property_list', newlist) template_list.insert(0, '') template_list = list(set(template_list)) # save as dictionary for properties to have "_cif_key : itemlist" # for a table item as dropdown menu in the main table. table_data = [loop_column_name, template_list] self.settings.save_template_list('property/' + block_name, table_data) self.show_properties()
def parse_star_data(file_path, loop_name): do_again = True while do_again: try: doc = cif.read_file(file_path) if len(doc) == 2: particles_block = 1 else: particles_block = 0 # block 1 is the per class information loop = doc[particles_block].find_loop(loop_name) class_data = np.array(loop) do_again = False return class_data except RuntimeError: print('*star file is busy') time.sleep(5)
def parse_star(file_path): import tqdm doc = cif.read_file(file_path) optics_data = {} # 3.1 star files have two data blocks Optics and particles _new_star_ = True if len(doc) == 2 else False if _new_star_: print('Found Relion 3.1+ star file.') optics = doc[0] particles = doc[1] for item in optics: for optics_metadata in item.loop.tags: value = optics.find_loop(optics_metadata) optics_data[optics_metadata] = np.array(value)[0] else: print('Found Relion 3.0 star file.') particles = doc[0] particles_data = pd.DataFrame() print('Reading star file:') for item in particles: for particle_metadata in tqdm.tqdm(item.loop.tags): # If don't want to use tqdm uncomment bottom line and remove 'import tqdm' # for particle_metadata in item.loop.tags: loop = particles.find_loop(particle_metadata) particles_data[particle_metadata] = np.array(loop) return optics_data, particles_data
#!/usr/bin/env python import sys from gemmi import cif greeted = set() for path in sys.argv[1:]: try: doc = cif.read_file(path) # copy all the data from mmCIF file block = doc.sole_block() # mmCIF has exactly one block for s in block.find_loop("_atom_site.type_symbol"): if s not in greeted: print("Hello " + s) greeted.add(s) except Exception as e: print("Oops. %s" % e) sys.exit(1)
from gemmi import cif import pandas as pd import sys import ntpath if __name__ == '__main__': filename = sys.argv[1] doc = cif.read_file(filename) block = doc.sole_block() # Map author chain IDs to sequences chain_ids = block.find_values('_entity_poly.pdbx_strand_id') chain_seqs = block.find_values('_entity_poly.pdbx_seq_one_letter_code_can') chain_seq_map = {} for idx, seq in zip(chain_ids, chain_seqs): clean_seq = seq.replace(';', '').replace('\n', '') for chain_id in idx.split(','): chain_seq_map[chain_id] = clean_seq # Extract atom info standard_res = list(block.find_values('_atom_site.label_comp_id')) standard_chain = list(block.find_values('_atom_site.label_asym_id')) standard_seq_pos = list(block.find_values('_atom_site.label_seq_id')) auth_res = list(block.find_values('_atom_site.auth_comp_id')) auth_chain = list(block.find_values('_atom_site.auth_asym_id'))
import numpy as np from gemmi import cif aif = cif.read_file( 'database/DUT-6/NK_DUT-6_LP_N2_114PKT (Raw Analysis Data).aif') block = aif.sole_block() ads_press = np.array(block.find_loop('_adsorp_pressure'), dtype=float) ads_p0 = np.array(block.find_loop('_adsorp_p0'), dtype=float) ads_amount = np.array(block.find_loop('_adsorp_amount'), dtype=float) des_press = np.array(block.find_loop('_desorp_pressure'), dtype=float) des_p0 = np.array(block.find_loop('_desorp_p0'), dtype=float) des_amount = np.array(block.find_loop('_desorp_amount'), dtype=float) import matplotlib as mpl import matplotlib.pyplot as plt mpl.rcParams['pdf.fonttype'] = 42 plt.rcParams.update({'font.size': 6}) f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) f.tight_layout() f.set_size_inches(3, 2.2) ax2.semilogx(ads_press / ads_p0, ads_amount, 'o', color='tab:blue', ms=5) ax2.plot(des_press / des_p0, des_amount, 'o', markerfacecolor='white', color='tab:blue',
import os # Directory whose cif data has to be changed directory = '../data/data_86_FE_BG' # directory = "data/sample" # Fraction how much ( e.g. 0.99 mean the final bond length will be ( 1-0.99) * original ) fraction = 0.99 mod_dir = directory+'_'+str(fraction)+'/' if __name__ == '__main__': # make directory if not existed if not os.path.exists(mod_dir): os.makedirs(mod_dir) for file in os.listdir(directory): # read all the files and copy if not cif ( like material_id_hash.csv, id_prop.csv etc.) if file.endswith(".cif"): print(file) doc = cif.read_file(directory+"/"+file) block = doc.sole_block() len_a = str(fraction*float(block.find_pair('_cell_length_a')[1])) len_b = str(fraction * float(block.find_pair('_cell_length_b')[1])) len_c = str(fraction * float(block.find_pair('_cell_length_c')[1])) block.set_pair('_cell_length_a',len_a) block.set_pair('_cell_length_b',len_b) block.set_pair('_cell_length_c',len_c) # saving the modified cif file doc.write_file(mod_dir+file) else: os.popen('cp ' + directory + "/" + file + ' ' + mod_dir + file)
def _initialize_blocks(self): """ Converts a gemmi Document object representing the .star file at self.filepath into an OrderedDict of pandas dataframes, each of which represents one block in the .star file """ logger.info(f"Parsing star file at: {self.filepath}") gemmi_doc = cif.read_file(self.filepath) # iterate over gemmi Block objects in the gemmi Document for gemmi_block in gemmi_doc: # iterating over gemmi Block objects yields Item objects # Items can have a Loop object and/or a Pair object # Loops correspond to the regular loop_ structure in a STAR file # Pairs have type List[str[2]] and correspond to a non-loop key value # pair in a STAR file, e.g. # _field1 \t 'value' #1 # Our model of the .star file only allows a block to be one or the other block_has_pair = False block_has_loop = False # populated if this block has a pair pairs = {} # populated if this block as a loop loop_tags = [] loop_data = [] # correct for GEMMI default behavior # if a block is called 'data_' in the .star file, GEMMI names it '#' # but we want to name it '' for consistency if gemmi_block.name == "#": gemmi_block.name = "" for gemmi_item in gemmi_block: if gemmi_item.pair is not None: block_has_pair = True # if we find both a pair and a loop raise an error if block_has_loop: raise StarFileError( "Blocks with multiple loops and/or pairs are not supported" ) # assign key-value pair to dictionary pair_key, pair_val = gemmi_item.pair if pair_key not in pairs: # read in as str because we do not want type conversion pairs[pair_key] = str(pair_val) else: raise StarFileError( f"Duplicate key in pair: {gemmi_item.pair[0]}") if gemmi_item.loop is not None: block_has_loop = True # if we find both a pair and a loop raise an error if block_has_pair: raise StarFileError( "Blocks with multiple loops and/or pairs are not supported" ) loop_tags = gemmi_item.loop.tags # convert loop data to a list of lists # using the .val(row, col) method of gemmi's Loop class loop_data = [None] * gemmi_item.loop.length() for row in range(gemmi_item.loop.length()): loop_data[row] = [ gemmi_item.loop.val(row, col) for col in range(gemmi_item.loop.width()) ] if block_has_pair: if gemmi_block.name not in self.blocks: # represent a set of pairs by a dictionary self.blocks[gemmi_block.name] = pairs else: # enforce unique block names (keys of StarFile.block OrderedDict) raise StarFileError( f"Attempted overwrite of existing data block: {gemmi_block.name}" ) elif block_has_loop: if gemmi_block.name not in self.blocks: # initialize DF from list of lists # read in with dtype=str because we do not want type conversion self.blocks[gemmi_block.name] = pd.DataFrame( loop_data, columns=loop_tags, dtype=str) else: # enforce unique block names (keys of StarFile.block OrderedDict) raise StarFileError( f"Attempted overwrite of existing data block: {gemmi_block.name}" )