예제 #1
0
파일: matthews.py 프로젝트: dimar/gemmi
def gather_data():
    "read mmCIF files and write down a few numbers (one file -> one line)"
    writer = csv.writer(sys.stdout, dialect='excel-tab')
    writer.writerow(
        ['code', 'na_chains', 'vs', 'vm', 'd_min', 'date', 'group'])
    for path in util.get_file_paths_from_args():
        block = cif.read(path).sole_block()
        code = cif.as_string(block.find_value('_entry.id'))
        na = sum('nucleotide' in t[0] for t in block.find('_entity_poly.type'))
        vs = block.find_value('_exptl_crystal.density_percent_sol')
        vm = block.find_value('_exptl_crystal.density_Matthews')
        d_min = block.find_value('_refine.ls_d_res_high')
        dep_date_tag = '_pdbx_database_status.recvd_initial_deposition_date'
        dep_date = parse_date(block.find_values(dep_date_tag).str(0))
        group = block.find_value('_pdbx_deposit_group.group_id')
        writer.writerow([code, na, vs, vm, d_min, dep_date, group])
예제 #2
0
def gather_data():
    "read mmCIF files and write down a few numbers (one file -> one line)"
    writer = csv.writer(sys.stdout, dialect='excel-tab')
    writer.writerow(
        ['code', 'na_chains', 'vs', 'vm', 'd_min', 'date', 'group'])
    for path in util.get_file_paths_from_args():
        block = cif.read_any(path).sole_block()
        code = block.find_string('_entry.id')
        na = sum('nucleotide' in t[0] for t in block.find('_entity_poly.type'))
        vs = block.find_value('_exptl_crystal.density_percent_sol')
        vm = block.find_value('_exptl_crystal.density_Matthews')
        d_min = block.find_value('_refine.ls_d_res_high')
        dates = block.find('_database_PDB_rev.date_original')
        oldest_date = min(parse_date(d[0]) for d in dates if d[0] not in '?.')
        group = block.find_string('_pdbx_deposit_group.group_id')
        writer.writerow([code, na, vs, vm, d_min, oldest_date, group])
예제 #3
0
from __future__ import print_function
from gemmi import cif
from util import get_file_paths_from_args

# Check column presence and order in the _atom_site category.
# In mmCIF v5 esd _atom_site.*esd columns were removed.
ESD = 'Cartn_x_esd Cartn_y_esd Cartn_z_esd occupancy_esd B_iso_or_equiv_esd '
#ESD = ''
USUAL_ORDER = ('group_PDB id type_symbol label_atom_id label_alt_id '
               'label_comp_id label_asym_id label_entity_id label_seq_id '
               'pdbx_PDB_ins_code Cartn_x Cartn_y Cartn_z occupancy '
               'B_iso_or_equiv ' + ESD + 'pdbx_formal_charge '
               'auth_seq_id auth_comp_id auth_asym_id auth_atom_id '
               'pdbx_PDB_model_num')
counts = {}
for path in get_file_paths_from_args():
    block = cif.read(path).sole_block()
    loop_tags = block.find_loop("_atom_site.id").get_loop().tags
    assert all(t.startswith("_atom_site.") for t in loop_tags)
    tags = ' '.join(t[11:] for t in loop_tags)
    if tags != USUAL_ORDER:
        print(tags)
        print(USUAL_ORDER)
        print(block.name, tags)
        counts[tags] = counts.get(tags, 0) + 1

for key, value in counts.items():
    print(value, key)

# Results: in v4 a few EM structures (5A9Z 5AA0 5FKI 4UDF)
# had different order, with ATOM/HETATM in the middle.
예제 #4
0
파일: weight.py 프로젝트: mhemmit/gemmi
def main():
    for path in util.get_file_paths_from_args():
        block = cif.read(path).sole_block()
        check_chem_comp_formula_weight(block)
        check_entity_formula_weight(block)