def compare_mofids(mofid1, mofid2, names=None): # Compares MOFid strings to identify sources of difference, if any if names is None: names = ['mof1', 'mof2'] if mofid1 is None or mofid2 is None: mof_name = 'Undefined' for x in [mofid1, mofid2]: if x is not None: mof_name = parse_mofid(x)['name'] return { 'match': 'NA', 'errors': ['Undefined composition'], 'topology': None, 'smiles': None, 'cat': None, names[0]: mofid1, names[1]: mofid2, 'name': mof_name } parsed = [parse_mofid(x) for x in [mofid1, mofid2]] comparison = dict() comparison['match'] = True comparison['errors'] = [] comparison[names[0]] = mofid1 comparison[names[1]] = mofid2 for key in parsed[0]: expected = parsed[0][key] if parsed[1][key] == expected: comparison[key] = expected continue elif key == 'topology': # Handling multiple, alternate topological definitions other_topologies = parsed[1][key].split(',') matched_topology = False for topology in other_topologies: if topology == expected: # If any of them match comparison[key] = topology matched_topology = True if matched_topology: continue # Else, it's a mismatch, so report an error as 'err_<KEY TYPE>', # e.g. 'err_topology' comparison[key] = False comparison['match'] = False comparison['errors'].append('err_' + key) # Deeper investigation of SMILES-type errors if 'err_smiles' in comparison['errors']: comparison['errors'].remove('err_smiles') for err in diff(parsed[0]['smiles'], parsed[1]['smiles']): comparison['errors'].append('err_' + err) return comparison
def _test_generated(self, cif_path, generated_mofid, start_time=None, generation_type='from_generated', mofkey=None): # Compares an arbitrary MOFid string against the value generated, # either locally in the script (cif2mofid) or from an external .smi file. # Also tests for common classes of error mofid_from_name = self.expected_mofid(cif_path) if mofid_from_name is None: # missing SBU info in the DB file return None # Currently, skip reporting of structures with undefined nodes/linkers if (py2 and type(mofid_from_name) in [str, unicode]) or ( not py2 and type(mofid_from_name) is str): orig_mofid = mofid_from_name mofid_from_name = dict() mofid_from_name['default'] = orig_mofid default = parse_mofid(mofid_from_name['default']) fragments = default['smiles'].split('.') # Define sources of error when the program exits with errors. # Without these definitions, the validator would return a generic # class of error, e.g. 'err_topology', instead of actually indicating # the root cause from program error or timeout. mofid_from_name['err_timeout'] = assemble_mofid( fragments, 'TIMEOUT', default['cat'], mof_name=default['name']) mofid_from_name['err_systre_error'] = assemble_mofid( fragments, 'ERROR', default['cat'], mof_name=default['name']) mofid_from_name['err_cpp_error'] = assemble_mofid( ['*'], 'NA', None, mof_name=default['name']) mofid_from_name['err_no_mof'] = assemble_mofid( ['*'], 'NA', 'no_mof', mof_name=default['name']) # Run transformations on the generated MOFid from CIF or smi database, if applicable (e.g. GA hMOFs) test_mofid = self.transform_mofid(generated_mofid) if test_mofid != generated_mofid: generation_type += '_transformed' if test_mofid is None and generated_mofid is not None: comparison = self.compare_multi_mofid( mofid_from_name, generated_mofid, ['from_name', generation_type]) comparison['errors'] = ['err_missing_transform'] comparison['match'] = False else: # Calculate the MOFid derived from the CIF structure itself comparison = self.compare_multi_mofid( mofid_from_name, test_mofid, ['from_name', generation_type]) if start_time is None: comparison['time'] = 0 else: comparison['time'] = time.time() - start_time if mofkey is not None: comparison['mofkey_from_cif'] = mofkey comparison['name_parser'] = self.__class__.__name__ return comparison
def parse(self, filename): with open(filename, 'r') as f: for line in f: parsed = parse_mofid(line) name = parsed['name'] del parsed['name'] parsed['smiles_part'] = parsed['smiles'].split('.') parsed['base_topology'] = parsed['topology'].split(',')[0] parsed['extra_topology'] = ','.join( parsed['topology'].split(',')[1:]) # '' if empty self.tables[name] = copy.deepcopy(parsed) self.datatypes = parsed.keys() return self
def transform_mofid(self, mofid): # De-functionalize MOFids read from CIFs or a database. # This will allow easy comparison between the linker skeletons found and expected. if mofid is None: return None cif_path = parse_mofid(mofid)['name'] codes = self.parse_filename(cif_path) fg = codes['functionalization'] if fg == '0': return mofid if fg not in self.mof_db['functionalization']: return None # Raises a transform error fragments = mofid.split()[0] fancy_name = ' '.join(mofid.split()[1:]) pattern = self.mof_db['functionalization'][fg] if not openbabel_contains(fragments, pattern): return None # will raise a transform error in the output skeletons = [ openbabel_replace(x, pattern, '[#1:1]') for x in fragments.split('.') ] skeletons = '.'.join(skeletons).split( '.' ) # Handle transformations that split apart building blocks into multiple parts skeletons = list( set(skeletons) ) # Only keep unique backbones if they have different functionalization patterns if '' in skeletons: # null linker from defunctionalization on a lone functional group skeletons.remove('') skeletons.sort() return ' '.join(['.'.join(skeletons), fancy_name]) # Reconstruct the defunctionalized MOFid
def cif2mofid(cif_path, output_path=DEFAULT_OUTPUT_PATH): # Assemble the MOFid string from all of its pieces. # Also export the MOFkey in an output dict for convenience. cif_path = os.path.abspath(cif_path) output_path = os.path.abspath(output_path) node_fragments, linker_fragments, cat, base_mofkey = extract_fragments( cif_path, output_path) if cat is not None: sn_topology = extract_topology( os.path.join(output_path, 'SingleNode', 'topology.cgd')) an_topology = extract_topology( os.path.join(output_path, 'AllNode', 'topology.cgd')) if sn_topology == an_topology: topology = sn_topology else: topology = sn_topology + ',' + an_topology else: topology = 'NA' mof_name = os.path.splitext(os.path.basename(cif_path))[0] mofkey = base_mofkey if topology != 'NA': base_topology = topology.split(',')[0] mofkey = assemble_mofkey(mofkey, base_topology) all_fragments = [] all_fragments.extend(node_fragments) all_fragments.extend(linker_fragments) all_fragments.sort() mofid = assemble_mofid(all_fragments, topology, cat, mof_name=mof_name) parsed = parse_mofid(mofid) identifiers = { 'mofid': mofid, 'mofkey': mofkey, 'smiles_nodes': node_fragments, 'smiles_linkers': linker_fragments, 'smiles': parsed['smiles'], 'topology': parsed['topology'], 'cat': parsed['cat'], 'cifname': parsed['name'] } # Write MOFid and MOFkey output to files, as well as node/linker info with open(os.path.join(output_path, 'python_mofid.txt'), 'w') as f: f.write(identifiers['mofid'] + '\n') with open(os.path.join(output_path, 'python_mofkey.txt'), 'w') as f: f.write(identifiers['mofkey'] + '\n') with open(os.path.join(output_path, 'python_smiles_parts.txt'), 'w') as f: for smiles in node_fragments: f.write('node' + '\t' + smiles + '\n') for smiles in linker_fragments: f.write('linker' + '\t' + smiles + '\n') with open(os.path.join(output_path, 'python_molec_formula.txt'), 'w') as f: f.write( openbabel_GetSpacedFormula( os.path.join(output_path, 'orig_mol.cif'), ' ', False) + '\n') return identifiers
def test_mofid(self, mofid): assert 'MOFid' in mofid parser = self._choose_parser(parse_mofid(mofid)['name']) if parser is None: return None return parser.test_mofid(mofid)
def test_mofid(self, mofid): # Test a generated MOFid string against the expectation based on the CIF filename start = time.time() cif_path = parse_mofid(mofid)['name'] return self._test_generated(cif_path, mofid, start, 'from_mofid')