Пример #1
0
def compare_mofids(mofid1, mofid2, names=None):
    # Compares MOFid strings to identify sources of difference, if any
    if names is None:
        names = ['mof1', 'mof2']
    if mofid1 is None or mofid2 is None:
        mof_name = 'Undefined'
        for x in [mofid1, mofid2]:
            if x is not None:
                mof_name = parse_mofid(x)['name']
        return {
            'match': 'NA',
            'errors': ['Undefined composition'],
            'topology': None,
            'smiles': None,
            'cat': None,
            names[0]: mofid1,
            names[1]: mofid2,
            'name': mof_name
        }
    parsed = [parse_mofid(x) for x in [mofid1, mofid2]]
    comparison = dict()
    comparison['match'] = True
    comparison['errors'] = []
    comparison[names[0]] = mofid1
    comparison[names[1]] = mofid2
    for key in parsed[0]:
        expected = parsed[0][key]
        if parsed[1][key] == expected:
            comparison[key] = expected
            continue
        elif key == 'topology':  # Handling multiple, alternate topological definitions
            other_topologies = parsed[1][key].split(',')
            matched_topology = False
            for topology in other_topologies:
                if topology == expected:  # If any of them match
                    comparison[key] = topology
                    matched_topology = True
            if matched_topology:
                continue
        # Else, it's a mismatch, so report an error as 'err_<KEY TYPE>',
        # e.g. 'err_topology'
        comparison[key] = False
        comparison['match'] = False
        comparison['errors'].append('err_' + key)

    # Deeper investigation of SMILES-type errors
    if 'err_smiles' in comparison['errors']:
        comparison['errors'].remove('err_smiles')
        for err in diff(parsed[0]['smiles'], parsed[1]['smiles']):
            comparison['errors'].append('err_' + err)

    return comparison
Пример #2
0
    def _test_generated(self,
                        cif_path,
                        generated_mofid,
                        start_time=None,
                        generation_type='from_generated',
                        mofkey=None):
        # Compares an arbitrary MOFid string against the value generated,
        # either locally in the script (cif2mofid) or from an external .smi file.
        # Also tests for common classes of error
        mofid_from_name = self.expected_mofid(cif_path)

        if mofid_from_name is None:  # missing SBU info in the DB file
            return None  # Currently, skip reporting of structures with undefined nodes/linkers

        if (py2 and type(mofid_from_name) in [str, unicode]) or (
                not py2 and type(mofid_from_name) is str):
            orig_mofid = mofid_from_name
            mofid_from_name = dict()
            mofid_from_name['default'] = orig_mofid
        default = parse_mofid(mofid_from_name['default'])
        fragments = default['smiles'].split('.')

        # Define sources of error when the program exits with errors.
        # Without these definitions, the validator would return a generic
        # class of error, e.g. 'err_topology', instead of actually indicating
        # the root cause from program error or timeout.
        mofid_from_name['err_timeout'] = assemble_mofid(
            fragments, 'TIMEOUT', default['cat'], mof_name=default['name'])
        mofid_from_name['err_systre_error'] = assemble_mofid(
            fragments, 'ERROR', default['cat'], mof_name=default['name'])
        mofid_from_name['err_cpp_error'] = assemble_mofid(
            ['*'], 'NA', None, mof_name=default['name'])
        mofid_from_name['err_no_mof'] = assemble_mofid(
            ['*'], 'NA', 'no_mof', mof_name=default['name'])

        # Run transformations on the generated MOFid from CIF or smi database, if applicable (e.g. GA hMOFs)
        test_mofid = self.transform_mofid(generated_mofid)
        if test_mofid != generated_mofid:
            generation_type += '_transformed'

        if test_mofid is None and generated_mofid is not None:
            comparison = self.compare_multi_mofid(
                mofid_from_name, generated_mofid,
                ['from_name', generation_type])
            comparison['errors'] = ['err_missing_transform']
            comparison['match'] = False
        else:
            # Calculate the MOFid derived from the CIF structure itself
            comparison = self.compare_multi_mofid(
                mofid_from_name, test_mofid, ['from_name', generation_type])

        if start_time is None:
            comparison['time'] = 0
        else:
            comparison['time'] = time.time() - start_time
        if mofkey is not None:
            comparison['mofkey_from_cif'] = mofkey
        comparison['name_parser'] = self.__class__.__name__
        return comparison
Пример #3
0
 def parse(self, filename):
     with open(filename, 'r') as f:
         for line in f:
             parsed = parse_mofid(line)
             name = parsed['name']
             del parsed['name']
             parsed['smiles_part'] = parsed['smiles'].split('.')
             parsed['base_topology'] = parsed['topology'].split(',')[0]
             parsed['extra_topology'] = ','.join(
                 parsed['topology'].split(',')[1:])  # '' if empty
             self.tables[name] = copy.deepcopy(parsed)
             self.datatypes = parsed.keys()
     return self
Пример #4
0
    def transform_mofid(self, mofid):
        # De-functionalize MOFids read from CIFs or a database.
        # This will allow easy comparison between the linker skeletons found and expected.
        if mofid is None:
            return None

        cif_path = parse_mofid(mofid)['name']
        codes = self.parse_filename(cif_path)
        fg = codes['functionalization']

        if fg == '0':
            return mofid
        if fg not in self.mof_db['functionalization']:
            return None  # Raises a transform error

        fragments = mofid.split()[0]
        fancy_name = ' '.join(mofid.split()[1:])
        pattern = self.mof_db['functionalization'][fg]
        if not openbabel_contains(fragments, pattern):
            return None  # will raise a transform error in the output

        skeletons = [
            openbabel_replace(x, pattern, '[#1:1]')
            for x in fragments.split('.')
        ]
        skeletons = '.'.join(skeletons).split(
            '.'
        )  # Handle transformations that split apart building blocks into multiple parts
        skeletons = list(
            set(skeletons)
        )  # Only keep unique backbones if they have different functionalization patterns
        if '' in skeletons:  # null linker from defunctionalization on a lone functional group
            skeletons.remove('')
        skeletons.sort()

        return ' '.join(['.'.join(skeletons),
                         fancy_name])  # Reconstruct the defunctionalized MOFid
Пример #5
0
def cif2mofid(cif_path, output_path=DEFAULT_OUTPUT_PATH):
    # Assemble the MOFid string from all of its pieces.
    # Also export the MOFkey in an output dict for convenience.
    cif_path = os.path.abspath(cif_path)
    output_path = os.path.abspath(output_path)

    node_fragments, linker_fragments, cat, base_mofkey = extract_fragments(
        cif_path, output_path)
    if cat is not None:
        sn_topology = extract_topology(
            os.path.join(output_path, 'SingleNode', 'topology.cgd'))
        an_topology = extract_topology(
            os.path.join(output_path, 'AllNode', 'topology.cgd'))
        if sn_topology == an_topology:
            topology = sn_topology
        else:
            topology = sn_topology + ',' + an_topology
    else:
        topology = 'NA'

    mof_name = os.path.splitext(os.path.basename(cif_path))[0]
    mofkey = base_mofkey

    if topology != 'NA':
        base_topology = topology.split(',')[0]
        mofkey = assemble_mofkey(mofkey, base_topology)

    all_fragments = []
    all_fragments.extend(node_fragments)
    all_fragments.extend(linker_fragments)
    all_fragments.sort()
    mofid = assemble_mofid(all_fragments, topology, cat, mof_name=mof_name)
    parsed = parse_mofid(mofid)

    identifiers = {
        'mofid': mofid,
        'mofkey': mofkey,
        'smiles_nodes': node_fragments,
        'smiles_linkers': linker_fragments,
        'smiles': parsed['smiles'],
        'topology': parsed['topology'],
        'cat': parsed['cat'],
        'cifname': parsed['name']
    }

    # Write MOFid and MOFkey output to files, as well as node/linker info
    with open(os.path.join(output_path, 'python_mofid.txt'), 'w') as f:
        f.write(identifiers['mofid'] + '\n')
    with open(os.path.join(output_path, 'python_mofkey.txt'), 'w') as f:
        f.write(identifiers['mofkey'] + '\n')
    with open(os.path.join(output_path, 'python_smiles_parts.txt'), 'w') as f:
        for smiles in node_fragments:
            f.write('node' + '\t' + smiles + '\n')
        for smiles in linker_fragments:
            f.write('linker' + '\t' + smiles + '\n')
    with open(os.path.join(output_path, 'python_molec_formula.txt'), 'w') as f:
        f.write(
            openbabel_GetSpacedFormula(
                os.path.join(output_path, 'orig_mol.cif'), ' ', False) + '\n')

    return identifiers
Пример #6
0
 def test_mofid(self, mofid):
     assert 'MOFid' in mofid
     parser = self._choose_parser(parse_mofid(mofid)['name'])
     if parser is None:
         return None
     return parser.test_mofid(mofid)
Пример #7
0
 def test_mofid(self, mofid):
     # Test a generated MOFid string against the expectation based on the CIF filename
     start = time.time()
     cif_path = parse_mofid(mofid)['name']
     return self._test_generated(cif_path, mofid, start, 'from_mofid')