Пример #1
0
    def test_mmcif_file(self):
        path = os.path.join(os.path.dirname(__file__), '5i55.cif')
        block = cif.read(path).sole_block()
        self.assertEqual(len(block.get_mmcif_category_names()), 54)
        entry_cat = block.get_mmcif_category('_entry')
        self.assertEqual(entry_cat, {'id': ['5I55']})
        drw_cat = block.get_mmcif_category('_diffrn_radiation_wavelength.')
        self.assertEqual(
            drw_cat, {
                'id': ['1', '2', '3'],
                'wavelength': ['0.9792', '0.9794', '0.9796'],
                'wt': ['1.0'] * 3
            })
        cc_cat = block.get_mmcif_category('_chem_comp.')
        self.assertEqual(cc_cat['mon_nstd_flag'][:2], [False, 'y'])
        self.assertEqual(cc_cat['pdbx_synonyms'][:2], [None, None])

        # test __delitem__
        del block.find(['_entry.id'])[0]
        entry_cat = block.get_mmcif_category('_entry')
        self.assertEqual(entry_cat, {'id': []})

        def nums():
            return list(block.find_values('_entity_poly_seq.num'))

        tab = block.find(['_entity_poly_seq.mon_id'])
        self.assertEqual(nums(), [str(i) for i in range(1, 23)])
        del tab[1::2]
        self.assertEqual(nums(), [str(i) for i in range(1, 23, 2)])
        del tab[3:]
        self.assertEqual(nums(), ['1', '3', '5'])
        del tab[:-1]
        self.assertEqual(nums(), ['5'])
Пример #2
0
 def test_reading_gzipped_file(self):
     path = os.path.join(os.path.dirname(__file__), '1pfe.cif.gz')
     cif_doc = cif.read(path)
     block = cif_doc.sole_block()
     categories = block.get_mmcif_category_names()
     self.assertEqual(categories[0], '_entry.')
     self.assertEqual(len(categories), 72)
     exptl = block.find_mmcif_category('_exptl')
     self.assertEqual(exptl.get_prefix(), '_exptl.')
     self.assertEqual(
         list(exptl.tags),
         ['_exptl.entry_id', '_exptl.method', '_exptl.crystals_number'])
     self.assertEqual(len(exptl), 1)
     self.assertEqual(exptl.width(), 3)
     exptl = block.find_mmcif_category('_exptl')
     self.assertEqual(len(exptl), 1)
     self.assertEqual(exptl.width(), 3)
     self.assertEqual(exptl[0].str(1), 'X-RAY DIFFRACTION')
     struct_asym = block.find_mmcif_category('_struct_asym')
     self.assertEqual(len(struct_asym), 7)
     self.assertEqual(struct_asym.width(), 5)
     self.assertListEqual(list(struct_asym[3]), ['D', 'N', 'N', '4', '?'])
     nonexistent = block.find_mmcif_category('_nonexistent')
     self.assertRaises(RuntimeError, nonexistent.get_prefix)
     self.assertEqual(len(nonexistent), 0)
     self.assertEqual(nonexistent.width(), 0)
def find_elements(root, name):
    try:
        doc = cif.read(os.path.join(root, name))
        block = doc.sole_block()
        elems = set(block.find_loop("_atom_site.type_symbol"))
        print(name + ' ' + ' '.join(elems))
    except Exception as e:
        print("Oops. %s" % e)
Пример #4
0
def read_rst(path):
    result = []
    for row in cif.read(path)['restraints'].find('_restr.', COLUMNS):
        if row[0] in ['MONO', 'LINK']:
            result.append((row[0].lower(), row[2].lower(), []))
        else:
            data = Restraint(*[x.lower() if x != '.' else None for x in row])
            result[-1][2].append(data)
    return [r for r in result if r[2]]
def operate_cif_file():
    doc = cif.read(sys.argv[1])
    block = doc[0]
    return block.find(
        ["_atom_site.group_PDB", "_atom_site.label_entity_id", "_atom_site.label_seq_id", "_atom_site.label_atom_id",
         "_atom_site.Cartn_x",
         "_atom_site.Cartn_y", "_atom_site.Cartn_z", "_atom_site.label_alt_id", "_atom_site.auth_asym_id",
         "_atom_site.auth_comp_id", "_atom_site.pdbx_PDB_model_num", "_atom_site.auth_seq_id",
         "_atom_site.pdbx_PDB_ins_code"])
Пример #6
0
def read_crd(path):
    block = cif.read(path).sole_block()
    sites = block.find('_atom_site.', ['id', 'label_atom_id', 'label_alt_id',
                                       'label_comp_id', 'occupancy',
                                       'calc_flag'])
    atoms = [a for a in sites if a[-1] != 'M']
    real_serial = {None: None, '.': '.'}
    for a in atoms:
        real_serial[a[0]] = len(real_serial) - 1
    return Crd(atoms, real_serial)
def aif2json(infile):
    """Convert AIF to JSON"""
    data = cif.read(infile).sole_block()

    data_dict = {}

    # wrapper for metadata
    for item in data:
        if item.pair is not None:
            #print('a', item.pair)
            inkey = item.pair[0]
            outkey, dtype = crossreference_keys(equivalency_table,
                                                inkey,
                                                informat='AIF')
            if dtype == float:
                data_dict[outkey] = float(item.pair[-1])
            elif dtype == str:
                data_dict[outkey] = str(item.pair[-1])
            elif dtype == int:
                data_dict[outkey] = int(item.pair[-1])

    # wrapper for isotherm loop
    isotherm_data = []
    ads_press = np.array(data.find_loop('_adsorp_pressure'), dtype=float)
    ads_amount = np.array(data.find_loop('_adsorp_amount'), dtype=float)
    try:
        ads_p0 = np.array(data.find_loop('_adsorp_p0'), dtype=float)
        output_p0 = True
    except ValueError:
        output_p0 = False

    # single component only
    for p, a in zip(ads_press, ads_amount):
        isotherm_data.append({
            'pressure':
            p,
            'branch':
            'adsorp',
            'species_data': [{
                'name': data_dict['adsorbate'],
                'composition': 1.0,
                'adsorption': a
            }]
        })
        if output_p0:
            pindex = np.where(ads_press == p)
            isotherm_data[-1]['p0'] = ads_p0[pindex][0]
    data_dict['isotherm_data'] = isotherm_data

    return json.dumps(data_dict, indent=4)
Пример #8
0
def gather_data():
    "read mmCIF files and write down a few numbers (one file -> one line)"
    writer = csv.writer(sys.stdout, dialect='excel-tab')
    writer.writerow(
        ['code', 'na_chains', 'vs', 'vm', 'd_min', 'date', 'group'])
    for path in util.get_file_paths_from_args():
        block = cif.read(path).sole_block()
        code = cif.as_string(block.find_value('_entry.id'))
        na = sum('nucleotide' in t[0] for t in block.find('_entity_poly.type'))
        vs = block.find_value('_exptl_crystal.density_percent_sol')
        vm = block.find_value('_exptl_crystal.density_Matthews')
        d_min = block.find_value('_refine.ls_d_res_high')
        dep_date_tag = '_pdbx_database_status.recvd_initial_deposition_date'
        dep_date = parse_date(block.find_values(dep_date_tag).str(0))
        group = block.find_value('_pdbx_deposit_group.group_id')
        writer.writerow([code, na, vs, vm, d_min, dep_date, group])
Пример #9
0
 def test_mmcif_file(self):
     path = os.path.join(os.path.dirname(__file__), '5i55.cif')
     block = cif.read(path).sole_block()
     self.assertEqual(len(block.get_mmcif_category_names()), 54)
     entry_cat = block.get_mmcif_category('_entry')
     self.assertEqual(entry_cat, {'id': ['5I55']})
     drw_cat = block.get_mmcif_category('_diffrn_radiation_wavelength.')
     self.assertEqual(
         drw_cat, {
             'id': ['1', '2', '3'],
             'wavelength': ['0.9792', '0.9794', '0.9796'],
             'wt': ['1.0'] * 3
         })
     cc_cat = block.get_mmcif_category('_chem_comp.')
     self.assertEqual(cc_cat['mon_nstd_flag'][:2], [False, 'y'])
     self.assertEqual(cc_cat['pdbx_synonyms'][:2], [None, None])
Пример #10
0
def compare_monlib_with_ccd(mon_path, ccd):
    'compare monomers from monomer library and CCD that have the same names'
    PRINT_MISSING_ENTRIES = False
    cnt = 0
    for path in get_monomer_cifs(mon_path):
        mon = cif.read(path)
        for mb in mon:
            if mb.name in ('', 'comp_list'):
                continue
            assert mb.name.startswith('comp_')
            name = mb.name[5:]
            cb = ccd.find_block(name)
            if cb:
                compare_chem_comp(mb, cb)
                cnt += 1
            elif PRINT_MISSING_ENTRIES:
                print('Not in CCD:', name)
    print('Compared', cnt, 'monomers.')
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('ccd_path', metavar='/path/to/components.cif[.gz]')
    parser.add_argument('-m', metavar='DIR',
                        help='monomer library path (default: $CLIBD_MON)')
    parser.add_argument('-f', action='store_true', help='check CCD formulas')
    parser.add_argument('-v', action='store_true', help='verbose')
    args = parser.parse_args()
    global verbose
    verbose = args.v
    mon_path = args.m or os.getenv('CLIBD_MON')
    if not mon_path and not args.f:
        sys.exit('Unknown monomer library path: use -m or set $CLIBD_MON.')
    ccd = cif.read(args.ccd_path)
    if args.f:
        check_formulas(ccd)
    if mon_path:
        compare_monlib_with_ccd(mon_path, ccd)
Пример #12
0
    def _get_mmcif_residue_list(self):
        """Get the list of residues from mmcif- in auth numbering to do residue check for json data 
         :returs a dictionary, where #key=chain, val= {k:auth_residue_number(withinscode),v:resname}
        """
        from gemmi import cif
        doc = cif.read(self.cif_file)
        block = doc.sole_block()
        polyseq = block.get_mmcif_category('_atom_site.')

        self.mmcif_data = {
        }  #key=chain, val= {k:auth_residue_number(withinscode),v:resname}
        for auth_resnum, auth_resname, inscode, chain in zip(
                polyseq["auth_seq_id"], polyseq["auth_comp_id"],
                polyseq["pdbx_PDB_ins_code"], polyseq["auth_asym_id"]):
            if inscode == False or inscode == None:
                inscode = ""
            auth_residue_number = "%s%s" % (auth_resnum, inscode)
            self.mmcif_data.setdefault(chain,
                                       {})[auth_residue_number] = auth_resname
Пример #13
0
from __future__ import print_function
import sys
from gemmi import cif, CifWalk

#ESD = 'Cartn_x_esd Cartn_y_esd Cartn_z_esd occupancy_esd B_iso_or_equiv_esd '
ESD = ''
USUAL_ORDER = ('group_PDB id type_symbol label_atom_id label_alt_id '
               'label_comp_id label_asym_id label_entity_id label_seq_id '
               'pdbx_PDB_ins_code Cartn_x Cartn_y Cartn_z occupancy '
               'B_iso_or_equiv ' + ESD + 'pdbx_formal_charge '
               'auth_seq_id auth_comp_id auth_asym_id auth_atom_id '
               'pdbx_PDB_model_num')
counts = {}
for arg in sys.argv[1:]:
    for path in CifWalk(arg):
        block = cif.read(path).sole_block()
        loop_tags = block.find_loop("_atom_site.id").get_loop().tags
        assert all(t.startswith("_atom_site.") for t in loop_tags)
        tags = ' '.join(t[11:] for t in loop_tags)
        if tags != USUAL_ORDER:
            print(tags)
            print(USUAL_ORDER)
            print(block.name, tags)
            counts[tags] = counts.get(tags, 0) + 1

for key, value in counts.items():
    print(value, key)

# Results: in v4 a few EM structures (5A9Z 5AA0 5FKI 4UDF)
# had different order, with ATOM/HETATM in the middle.
Пример #14
0
 def test_frame_reading(self):
     block = cif.read(full_path('mmcif_pdbx_v50_frag.dic')).sole_block()
     self.assertIsNone(block.find_frame('heyho'))
     frame = block.find_frame('_atom_site.auth_atom_id')
     code = frame.find_value('_item_type.code')
     self.assertEqual(code, 'atcode')
Пример #15
0
# - CENTROID CO-ORDINATES FOR EACH MOLECULE IN A UNIT CELL
# - SOME LIST OF LINKING NUMBERS
#---------------------------------------------------------------------
files = list(gemmi.CifWalk(inputdir))

#Creates list of file names

file_list = []
link_list = []
print(st + e)

for file_path in files[st:e]:
    file_path_split = file_path.split("/")
    file_name = file_path_split[len(file_path_split) - 1]
    file_list.append(file_name)
    doc = cif.read(file_path)
    block = doc[0]

    for b in doc:
        if (b.find_loop('_atom_site_') != None):
            block = b

    print("Processed file --> " + file_name)
    crystal_reader = CrystalReader(file_path)
    crystal = crystal_reader[0]
    crystal.assign_bonds()
    packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)),
                                       inclusion='CentroidIncluded')
    packed_molecules.normalise_labels()

    adta_molecules = []
# -*- coding: utf-8 -*-
"""Plot AIF from command line"""
import sys
import os
from gemmi import cif  # pylint: disable-msg=no-name-in-module
import matplotlib.pyplot as plt
import numpy as np

filename = sys.argv[1]

aif = cif.read(filename)
block = aif.sole_block()
ads_press = np.array(block.find_loop('_adsorp_pressure'), dtype=float)
ads_amount = np.array(block.find_loop('_adsorp_amount'), dtype=float)
des_press = np.array(block.find_loop('_desorp_pressure'), dtype=float)
des_amount = np.array(block.find_loop('_desorp_amount'), dtype=float)

material_id = block.find_pair('_sample_material_id')[-1]

plt.plot(ads_press, ads_amount, 'o-', color='C0')
plt.plot(des_press, des_amount, 'o-', color='C0', markerfacecolor='white')

plt.ylabel('quantity adsorbed / ' + block.find_pair('_units_loading')[-1])
plt.xlabel('pressure / ' + block.find_pair('_units_pressure')[-1])
plt.title(
    block.find_pair('_exptl_adsorptive')[-1] + ' on ' + material_id + ' at ' +
    block.find_pair('_exptl_temperature')[-1] + 'K')
plt.savefig(os.path.splitext(filename)[0] + '.pdf')
Пример #17
0
def main():
    for path in get_file_paths_from_args():
        block = cif.read(path).sole_block()
        check_chem_comp_formula_weight(block)
        check_entity_formula_weight(block)
Пример #18
0
#!/usr/bin/env python3

# Read CCD and fill-in one-letter-codes in gemmi/resinfo.hpp
# Usage: ./tools/resinfo.py > resinfo.hpp-new

import re
from sys import stderr
import gemmi
from gemmi import cif

ccd = cif.read('components.cif.gz')

STANDARD = [
    'ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLN', 'GLU', 'GLX', 'GLY',
    'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP',
    'TYR', 'UNK', 'VAL', 'SEC', 'PYL', 'A', 'C', 'G', 'I', 'U', 'DA', 'DC',
    'DG', 'DI', 'DT', 'DU'
]


def calculate_formula_weight(formula):
    total = 0.
    for elem_count in formula.split():
        if elem_count.isalpha():
            elem = elem_count
            count = 1
        else:
            n = 2 if elem_count[1].isalpha() else 1
            elem = elem_count[:n]
            count = int(elem_count[n:])
        total += count * gemmi.Element(elem).weight
Пример #19
0
 def test_file_not_found(self):
     with self.assertRaises(IOError):
         cif.read('file-that-does-not-exist.cif')