示例#1
0
def run(args):
    max_n_it = 0
    for file_name in args:
        prev_tag = ""
        pl_counts = None
        for line in open(file_name).read().splitlines():
            if (not line.startswith(": ")): continue
            op = line.find(" (")
            cp = line.find(") ")
            assert op > 0
            assert cp > op
            pl = line[2:op].split()
            n_it = int(line[op + 2:cp])
            if (n_it > max_n_it): max_n_it = n_it
            tag = line[cp + 2:].split()[0]
            if (prev_tag != tag):
                if (len(prev_tag) != 0):
                    show(prev_tag, pl_counts)
                prev_tag = tag
                pl_counts = dict_with_default_0()
            pl_c = dict_with_default_0()
            for p in pl:
                pl_c[p] += 1
            for p, c in pl_c.items():
                pl_counts[p] = max(pl_counts[p], c)
        if (prev_tag is not None):
            show(prev_tag, pl_counts)
    print "max_n_it:", max_n_it
示例#2
0
文件: spacegroup.py 项目: RAPD/RAPD
def build_ccp4_symmetry_table():

    symbol_to_number = {}
    ccp4_to_number = {}

    # Open file
    file_iter = open(op.join(
        extract_from_symmetry_lib.ccp4io_lib_data, "symop.lib"))
    ccp4_id_counts = libtbx.dict_with_default_0()
    ccp4_symbol_counts = libtbx.dict_with_default_0()

    # Run through the file
    for line in file_iter:
        # print "\n", line.rstrip()
        flds = line.split(None, 4)
        ccp4_id = flds[0]
        ccp4_id_counts[ccp4_id] += 1
        space_group_number = int(ccp4_id[-3:])
        order_z = int(flds[1])
        given_ccp4_symbol = flds[3]


        symbol_to_number[given_ccp4_symbol] = space_group_number

        ccp4_symbol_counts[given_ccp4_symbol] += 1
        group = extract_from_symmetry_lib.collect_symops(
          file_iter=file_iter, order_z=order_z)
        assert group.order_z() == order_z
        space_group_info = sgtbx.space_group_info(group=group)
        retrieved_ccp4_symbol = extract_from_symmetry_lib.ccp4_symbol(
          space_group_info=space_group_info,
          lib_name="symop.lib")
        # print "retrieved_ccp4_symbol", retrieved_ccp4_symbol
        assert retrieved_ccp4_symbol == given_ccp4_symbol
        assert space_group_info.type().number() == space_group_number
        if (1):
            from iotbx.pdb import format_cryst1_sgroup
            sgroup = format_cryst1_sgroup(space_group_info=space_group_info)
            # if (len(sgroup) > 11):
            #     print "ccp4 symop.lib setting leads to pdb CRYST1 overflow:",\
            #       ccp4_id, given_ccp4_symbol, sgroup
            # print "sgroup", sgroup
            ccp4_to_number[sgroup] = space_group_number
    # for ccp4_id,count in ccp4_id_counts.items():
    #     if (count != 1):
    #         raise RuntimeError(
    #             'ccp4 id "%s" appears %d times (should be unique).'
    #               % (ccp4_id, count))
    # ccp4_symbol_counts = libtbx.dict_with_default_0()
    # for ccp4_symbol,count in ccp4_symbol_counts.items():
    #     if (count != 1):
    #         raise RuntimeError(
    #             'ccp4 symbol "%s" appears %d times (should be unique).'
    #               % (ccp4_symbol, count))
    return (symbol_to_number, ccp4_to_number)
示例#3
0
def build_ccp4_symmetry_table():

    symbol_to_number = {}
    ccp4_to_number = {}

    # Open file
    file_iter = open(
        op.join(extract_from_symmetry_lib.ccp4io_lib_data, "symop.lib"))
    ccp4_id_counts = libtbx.dict_with_default_0()
    ccp4_symbol_counts = libtbx.dict_with_default_0()

    # Run through the file
    for line in file_iter:
        # print "\n", line.rstrip()
        flds = line.split(None, 4)
        ccp4_id = flds[0]
        ccp4_id_counts[ccp4_id] += 1
        space_group_number = int(ccp4_id[-3:])
        order_z = int(flds[1])
        given_ccp4_symbol = flds[3]

        symbol_to_number[given_ccp4_symbol] = space_group_number

        ccp4_symbol_counts[given_ccp4_symbol] += 1
        group = extract_from_symmetry_lib.collect_symops(file_iter=file_iter,
                                                         order_z=order_z)
        assert group.order_z() == order_z
        space_group_info = sgtbx.space_group_info(group=group)
        retrieved_ccp4_symbol = extract_from_symmetry_lib.ccp4_symbol(
            space_group_info=space_group_info, lib_name="symop.lib")
        # print "retrieved_ccp4_symbol", retrieved_ccp4_symbol
        assert retrieved_ccp4_symbol == given_ccp4_symbol
        assert space_group_info.type().number() == space_group_number
        if (1):
            from iotbx.pdb import format_cryst1_sgroup
            sgroup = format_cryst1_sgroup(space_group_info=space_group_info)
            # if (len(sgroup) > 11):
            #     print "ccp4 symop.lib setting leads to pdb CRYST1 overflow:",\
            #       ccp4_id, given_ccp4_symbol, sgroup
            # print "sgroup", sgroup
            ccp4_to_number[sgroup] = space_group_number
    # for ccp4_id,count in ccp4_id_counts.items():
    #     if (count != 1):
    #         raise RuntimeError(
    #             'ccp4 id "%s" appears %d times (should be unique).'
    #               % (ccp4_id, count))
    # ccp4_symbol_counts = libtbx.dict_with_default_0()
    # for ccp4_symbol,count in ccp4_symbol_counts.items():
    #     if (count != 1):
    #         raise RuntimeError(
    #             'ccp4 symbol "%s" appears %d times (should be unique).'
    #               % (ccp4_symbol, count))
    return (symbol_to_number, ccp4_to_number)
示例#4
0
def exercise():
    list_cif = server.mon_lib_list_cif()
    srv = server.server(list_cif=list_cif)
    print("srv.root_path:", srv.root_path)
    table_of_contents = []
    n_get_comp_comp_id_successes = 0
    unknown_type_energy_counts = dict_with_default_0()
    missing_angle_definitions_counts = dict_with_default_0()
    missing_bond_values_counts = dict_with_default_0()
    missing_angle_values_counts = dict_with_default_0()
    for first_char in string.lowercase + string.digits:
        sub_dir = os.path.join(srv.root_path, first_char)
        if (not os.path.isdir(sub_dir)): continue
        for node in os.listdir(sub_dir):
            if (not node.lower().endswith(".cif")): continue
            comp_id = node[:-4]
            if (comp_id.endswith("_EL")): continue
            if (comp_id in ["CON_CON", "PRN_PRN"]):
                comp_id = comp_id[:3]
            if (comp_id.upper() != comp_id):
                print("Mixed case:", os.path.join(first_char, node))
            comp_comp_id = srv.get_comp_comp_id_direct(comp_id=comp_id)
            if (comp_comp_id is None):
                print("Error instantiating comp_comp_id %s (%s)" %
                      (comp_id, os.path.join(sub_dir, node)))
            else:
                n_get_comp_comp_id_successes += 1
                table_of_contents.append(" ".join(
                    [comp_id.upper(),
                     os.path.join(first_char, node)]))
                status = detect_unknown_type_energy(comp_id=comp_id,
                                                    comp_comp_id=comp_comp_id)
                unknown_type_energy_counts[status] += 1
                status = detect_missing_angle_definitions(
                    comp_id=comp_id, comp_comp_id=comp_comp_id)
                missing_angle_definitions_counts[status] += 1
                status = detect_missing_bond_values(comp_id=comp_id,
                                                    comp_comp_id=comp_comp_id)
                missing_bond_values_counts[status] += 1
                status = detect_missing_angle_values(comp_id=comp_id,
                                                     comp_comp_id=comp_comp_id)
                missing_angle_values_counts[status] += 1
                if (1 and status != "ok"):
                    print('svn rm "%s"' % os.path.join(first_char, node))
    print("number of cif files read successfully:",
          n_get_comp_comp_id_successes)
    print("unknown type_energy counts:", unknown_type_energy_counts)
    print("missing bond angle definitions counts:", \
      missing_angle_definitions_counts)
    print("missing bond values counts:", missing_bond_values_counts)
    print("missing angle values counts:", missing_angle_values_counts)
    print("writing file table_of_contents")
    open("table_of_contents", "w").write("\n".join(table_of_contents) + "\n")
def exercise():
  list_cif = server.mon_lib_list_cif()
  srv = server.server(list_cif=list_cif)
  print "srv.root_path:", srv.root_path
  table_of_contents = []
  n_get_comp_comp_id_successes = 0
  unknown_type_energy_counts = dict_with_default_0()
  missing_angle_definitions_counts = dict_with_default_0()
  missing_bond_values_counts = dict_with_default_0()
  missing_angle_values_counts = dict_with_default_0()
  for first_char in string.lowercase+string.digits:
    sub_dir = os.path.join(srv.root_path, first_char)
    if (not os.path.isdir(sub_dir)): continue
    for node in os.listdir(sub_dir):
      if (not node.lower().endswith(".cif")): continue
      comp_id = node[:-4]
      if (comp_id.endswith("_EL")): continue
      if (comp_id in ["CON_CON", "PRN_PRN"]):
        comp_id = comp_id[:3]
      if (comp_id.upper() != comp_id):
        print "Mixed case:", os.path.join(first_char, node)
      comp_comp_id = srv.get_comp_comp_id_direct(comp_id=comp_id)
      if (comp_comp_id is None):
        print "Error instantiating comp_comp_id %s (%s)" % (
          comp_id, os.path.join(sub_dir, node))
      else:
        n_get_comp_comp_id_successes += 1
        table_of_contents.append(
          " ".join([comp_id.upper(), os.path.join(first_char, node)]))
        status = detect_unknown_type_energy(
          comp_id=comp_id, comp_comp_id=comp_comp_id)
        unknown_type_energy_counts[status] += 1
        status = detect_missing_angle_definitions(
          comp_id=comp_id, comp_comp_id=comp_comp_id)
        missing_angle_definitions_counts[status] += 1
        status = detect_missing_bond_values(
          comp_id=comp_id, comp_comp_id=comp_comp_id)
        missing_bond_values_counts[status] += 1
        status = detect_missing_angle_values(
          comp_id=comp_id, comp_comp_id=comp_comp_id)
        missing_angle_values_counts[status] += 1
        if (1 and status != "ok"):
          print 'svn rm "%s"' % os.path.join(first_char, node)
  print "number of cif files read successfully:", n_get_comp_comp_id_successes
  print "unknown type_energy counts:", unknown_type_energy_counts
  print "missing bond angle definitions counts:", \
    missing_angle_definitions_counts
  print "missing bond values counts:", missing_bond_values_counts
  print "missing angle values counts:", missing_angle_values_counts
  print "writing file table_of_contents"
  open("table_of_contents", "w").write("\n".join(table_of_contents)+"\n")
示例#6
0
def run(args):
  assert len(args) == 0
  qstat_info = qstat_parse()
  user_states = {}
  for items in qstat_info:
    counts = user_states.setdefault(items.user, dict_with_default_0())
    counts[items.state] += items.counts()
  sum_counts = []
  for user,counts in user_states.items():
    sum_counts.append((user, sum(counts.values())))
  def cmp_sum_counts(a,b):
    result = cmp(b[1], a[1])
    if (result != 0): return result
    return cmp(b[0], a[0])
  sum_counts.sort(cmp_sum_counts)
  cpus=0
  for user,sc in sum_counts:
    counts = user_states[user]
    print "%-10s   %5d r   %5d qw" % (user, counts["r"], counts["qw"]),
    cpus+=counts["r"]
    for state,c in counts.items():
      if (state in ["r", "qw"]): continue
      print "  %5d %s" % (c, state),
    print "  %5d total" % sc
  print "-"*45
  print "%-10s   %5d r" % ("total", cpus)
  print "-"*45
def exercise_symop_lib_recycling():
    file_iter = open(
        op.join(extract_from_symmetry_lib.ccp4io_lib_data, "symop.lib"))
    ccp4_id_counts = libtbx.dict_with_default_0()
    ccp4_symbol_counts = libtbx.dict_with_default_0()
    for line in file_iter:
        flds = line.split(None, 4)
        ccp4_id = flds[0]
        ccp4_id_counts[ccp4_id] += 1
        space_group_number = int(ccp4_id[-3:])
        order_z = int(flds[1])
        given_ccp4_symbol = flds[3]
        ccp4_symbol_counts[given_ccp4_symbol] += 1
        group = extract_from_symmetry_lib.collect_symops(file_iter=file_iter,
                                                         order_z=order_z)
        assert group.order_z() == order_z
        space_group_info = sgtbx.space_group_info(group=group)
        retrieved_ccp4_symbol = extract_from_symmetry_lib.ccp4_symbol(
            space_group_info=space_group_info, lib_name="symop.lib")
        assert retrieved_ccp4_symbol == given_ccp4_symbol
        assert space_group_info.type().number() == space_group_number
        if (1):
            from iotbx.pdb import format_cryst1_sgroup
            sgroup = format_cryst1_sgroup(space_group_info=space_group_info)
            if (len(sgroup) > 11):
                print("ccp4 symop.lib setting leads to pdb CRYST1 overflow:",\
                  ccp4_id, given_ccp4_symbol, sgroup)
    file_iter.close()
    for ccp4_id, count in ccp4_id_counts.items():
        if (count != 1):
            raise RuntimeError(
                'ccp4 id "%s" appears %d times (should be unique).' %
                (ccp4_id, count))
    ccp4_symbol_counts = libtbx.dict_with_default_0()
    for ccp4_symbol, count in ccp4_symbol_counts.items():
        if (count != 1):
            raise RuntimeError(
                'ccp4 symbol "%s" appears %d times (should be unique).' %
                (ccp4_symbol, count))
def exercise_symop_lib_recycling():
  file_iter = open(op.join(
    extract_from_symmetry_lib.ccp4io_lib_data, "symop.lib"))
  ccp4_id_counts = libtbx.dict_with_default_0()
  ccp4_symbol_counts = libtbx.dict_with_default_0()
  for line in file_iter:
    flds = line.split(None, 4)
    ccp4_id = flds[0]
    ccp4_id_counts[ccp4_id] += 1
    space_group_number = int(ccp4_id[-3:])
    order_z = int(flds[1])
    given_ccp4_symbol = flds[3]
    ccp4_symbol_counts[given_ccp4_symbol] += 1
    group = extract_from_symmetry_lib.collect_symops(
      file_iter=file_iter, order_z=order_z)
    assert group.order_z() == order_z
    space_group_info = sgtbx.space_group_info(group=group)
    retrieved_ccp4_symbol = extract_from_symmetry_lib.ccp4_symbol(
      space_group_info=space_group_info,
      lib_name="symop.lib")
    assert retrieved_ccp4_symbol == given_ccp4_symbol
    assert space_group_info.type().number() == space_group_number
    if (1):
      from iotbx.pdb import format_cryst1_sgroup
      sgroup = format_cryst1_sgroup(space_group_info=space_group_info)
      if (len(sgroup) > 11):
        print "ccp4 symop.lib setting leads to pdb CRYST1 overflow:",\
          ccp4_id, given_ccp4_symbol, sgroup
  for ccp4_id,count in ccp4_id_counts.items():
    if (count != 1):
      raise RuntimeError(
        'ccp4 id "%s" appears %d times (should be unique).'
          % (ccp4_id, count))
  ccp4_symbol_counts = libtbx.dict_with_default_0()
  for ccp4_symbol,count in ccp4_symbol_counts.items():
    if (count != 1):
      raise RuntimeError(
        'ccp4 symbol "%s" appears %d times (should be unique).'
          % (ccp4_symbol, count))
示例#9
0
def run(args):
    if (len(args) != 2):
        raise Usage("""\
cctbx.python space_subgroups.py max_index space_group_symbol
  Example: cctbx.python space_subgroups.py 2 P41212""")
    #
    max_index = int(args[0])
    print("max_index:", max_index)
    assert max_index >= 1
    print()
    space_group_t_den = 144
    sginfo = sgtbx.space_group_info(symbol=args[1],
                                    space_group_t_den=space_group_t_den)
    sginfo.show_summary()
    print()
    cb_op_to_p = sginfo.change_of_basis_op_to_primitive_setting()
    sginfo_p = sginfo.change_basis(cb_op=cb_op_to_p)
    if (sginfo_p.group() != sginfo.group()):
        print("Primitive setting:")
        sginfo_p.show_summary()
        print()
    #
    all_subgroups = dict_with_default_0()
    sg_p = sginfo_p.group()
    sg_p_a = sg_p.build_derived_acentric_group()
    if (sg_p.is_centric()):
        inv_mx = sg_p(0, 1, 0).t()
    else:
        inv_mx = None
    for symx1 in sg_p_a:
        subgr1 = sgtbx.space_group(hall_symbol="P1", t_den=space_group_t_den)
        subgr1.expand_smx(symx1)
        for symx2 in sg_p_a:
            subgr2 = sgtbx.space_group(subgr1)
            subgr2.expand_smx(symx2)
            loop_over_super_cells(max_index=max_index,
                                  all_subgroups=all_subgroups,
                                  subgroup=subgr2)
            if (inv_mx is not None):
                subgr3 = sgtbx.space_group(subgr2)
                subgr3.expand_inv(inv_mx)
                loop_over_super_cells(max_index=max_index,
                                      all_subgroups=all_subgroups,
                                      subgroup=subgr3)
    #
    show_sorted_by_counts(label_count_pairs=list(all_subgroups.items()))
示例#10
0
 def show_images_per_miller_index(O, first_block_size=20):
   print "Images per Miller index:"
   from libtbx import dict_with_default_0
   counts = dict_with_default_0()
   for iiis in O.map:
     counts[len(iiis)] += 1
   n_seq = O.miller_indices.size()
   have_break = False
   for n_imgs in sorted(counts.keys()):
     if (n_imgs > first_block_size and n_imgs < len(counts)-5):
       if (not have_break):
         have_break = True
         print "        ..."
     else:
       c = counts[n_imgs]
       print "  %6d %6d %8.6f" % (n_imgs, c, c/n_seq)
   print
   sys.stdout.flush()
示例#11
0
 def show_images_per_miller_index(O, first_block_size=20):
   print "Images per Miller index:"
   from libtbx import dict_with_default_0
   counts = dict_with_default_0()
   for iiis in O.map:
     counts[len(iiis)] += 1
   n_seq = O.miller_indices.size()
   have_break = False
   for n_imgs in sorted(counts.keys()):
     if (n_imgs > first_block_size and n_imgs < len(counts)-5):
       if (not have_break):
         have_break = True
         print "        ..."
     else:
       c = counts[n_imgs]
       print "  %6d %6d %8.6f" % (n_imgs, c, c/n_seq)
   print
   sys.stdout.flush()
示例#12
0
def run(args):
  if (len(args) != 2):
    raise Usage("""\
cctbx.python space_subgroups.py max_index space_group_symbol
  Example: cctbx.python space_subgroups.py 2 P41212""")
  #
  max_index = int(args[0])
  print "max_index:", max_index
  assert max_index >= 1
  print
  space_group_t_den = 144
  sginfo = sgtbx.space_group_info(
    symbol=args[1], space_group_t_den=space_group_t_den)
  sginfo.show_summary()
  print
  cb_op_to_p = sginfo.change_of_basis_op_to_primitive_setting()
  sginfo_p = sginfo.change_basis(cb_op=cb_op_to_p)
  if (sginfo_p.group() != sginfo.group()):
    print "Primitive setting:"
    sginfo_p.show_summary()
    print
  #
  all_subgroups = dict_with_default_0()
  sg_p = sginfo_p.group()
  sg_p_a = sg_p.build_derived_acentric_group()
  if (sg_p.is_centric()):
    inv_mx = sg_p(0, 1, 0).t()
  else:
    inv_mx = None
  for symx1 in sg_p_a:
    subgr1 = sgtbx.space_group(hall_symbol="P1", t_den=space_group_t_den)
    subgr1.expand_smx(symx1)
    for symx2 in sg_p_a:
      subgr2 = sgtbx.space_group(subgr1)
      subgr2.expand_smx(symx2)
      loop_over_super_cells(
        max_index=max_index, all_subgroups=all_subgroups, subgroup=subgr2)
      if (inv_mx is not None):
        subgr3 = sgtbx.space_group(subgr2)
        subgr3.expand_inv(inv_mx)
        loop_over_super_cells(
          max_index=max_index, all_subgroups=all_subgroups, subgroup=subgr3)
  #
  show_sorted_by_counts(label_count_pairs=all_subgroups.items())
示例#13
0
def format_focus_input(tag, xray_structure, term_table):
  min_sym_nodes = 0
  for scatterer in xray_structure.scatterers():
    min_sym_nodes += scatterer.multiplicity()
  node_types = dict_with_default_0()
  for terms in term_table:
    node_types[terms[1]] += 1
  out = StringIO()
  print >> out, "Title Code", tag
  print >> out, "SpaceGroup", \
    str(xray_structure.space_group_info()).replace(" ","")
  print >> out, "UnitCell", \
    str(xray_structure.unit_cell())[1:-1].replace(",", "")
  print >> out, "MinNodeDistance 2.7"
  print >> out, "MinSymNodes", min_sym_nodes
  for node_type,count in node_types.items():
    print >> out, "NodeType  %d  %d" % (node_type, count)
  print >> out, "CheckTetrahedralGeometry Off"
  for scatterer in xray_structure.scatterers():
    print >> out, "%-4s" % scatterer.label, \
      "%8.5f %8.5f %8.5f" % scatterer.site
  print >> out, "End"
  return out.getvalue()
示例#14
0
def run(args):
    assert len(args) == 0
    qstat_info = qstat_parse()
    user_states = {}
    for items in qstat_info:
        counts = user_states.setdefault(items.user, dict_with_default_0())
        counts[items.state] += items.counts()
    sum_counts = []
    for user, counts in user_states.items():
        sum_counts.append((user, sum(counts.values())))
    sum_counts.sort(key=itemgetter(1, 0))
    cpus = 0
    for user, sc in sum_counts:
        counts = user_states[user]
        print("%-10s   %5d r   %5d qw" % (user, counts["r"], counts["qw"]),
              end=' ')
        cpus += counts["r"]
        for state, c in counts.items():
            if (state in ["r", "qw"]): continue
            print("  %5d %s" % (c, state), end=' ')
        print("  %5d total" % sc)
    print("-" * 45)
    print("%-10s   %5d r" % ("total", cpus))
    print("-" * 45)
示例#15
0
def exercise_angle_pair_asu_table(structure, distance_cutoff, connectivities,
                                  reference_apatanl, reference_cppc):
    sg_asu_mappings = structure.asu_mappings(buffer_thickness=2 *
                                             distance_cutoff)
    sg_pat = crystal.pair_asu_table(asu_mappings=sg_asu_mappings)
    sg_pat.add_all_pairs(distance_cutoff=distance_cutoff, min_cubicle_edge=0)
    # compare connectivities with reference
    assert list(sg_pat.pair_counts()) == connectivities
    #
    p1_structure = structure.expand_to_p1()
    p1_asu_mappings = p1_structure.asu_mappings(buffer_thickness=2 *
                                                distance_cutoff)
    p1_pat = crystal.pair_asu_table(asu_mappings=p1_asu_mappings)
    p1_pat.add_all_pairs(distance_cutoff=distance_cutoff, min_cubicle_edge=0)
    sg_labels = structure.scatterers().extract_labels()
    p1_labels = p1_structure.scatterers().extract_labels()
    label_connect = dict(zip(sg_labels, sg_pat.pair_counts()))
    for l, c in zip(p1_labels, p1_pat.pair_counts()):
        # compare connectivities in original space group and in P1
        assert label_connect[l] == c
    #
    sg_apat_py = py_pair_asu_table_angle_pair_asu_table(self=sg_pat)
    sg_apat = sg_pat.angle_pair_asu_table()
    assert sg_apat.as_nested_lists() == sg_apat_py.as_nested_lists()
    sg_counts = {}
    for i_seq, pair_asu_dict in enumerate(sg_apat.table()):
        lbl_i = sg_labels[i_seq]
        for j_seq, pair_asu_j_sym_groups in pair_asu_dict.items():
            lbl_j = sg_labels[j_seq]
            for j_sym_group in pair_asu_j_sym_groups:
                sg_counts.setdefault(
                    lbl_i, dict_with_default_0())[lbl_j] += len(j_sym_group)
    p1_apat = p1_pat.angle_pair_asu_table()
    p1_counts = {}
    for i_seq, pair_asu_dict in enumerate(p1_apat.table()):
        lbl_i = p1_labels[i_seq]
        for j_seq, pair_asu_j_sym_groups in pair_asu_dict.items():
            lbl_j = p1_labels[j_seq]
            for j_sym_group in pair_asu_j_sym_groups:
                p1_counts.setdefault(
                    lbl_i, dict_with_default_0())[lbl_j] += len(j_sym_group)
    # self-consistency check
    multiplicities = {}
    for sc in structure.scatterers():
        multiplicities[sc.label] = sc.multiplicity()
    assert sorted(p1_counts.keys()) == sorted(sg_counts.keys())
    for lbl_i, sg_lc in sg_counts.items():
        p1_lc = p1_counts[lbl_i]
        assert sorted(p1_lc.keys()) == sorted(sg_lc.keys())
        for lbl_j, sg_c in sg_lc.items():
            p1_c = p1_lc[lbl_j]
            assert p1_c == sg_c * multiplicities[lbl_i]
    # compare with reference
    apatanl = str(sg_apat.as_nested_lists()).replace(" ", "")
    if (reference_apatanl is not None):
        assert apatanl == reference_apatanl
    #
    counts = []
    for conserve_angles in [False, True]:
        proxies = structure.conservative_pair_proxies(
            bond_sym_table=sg_pat.extract_pair_sym_table(),
            conserve_angles=conserve_angles)
        counts.extend([proxies.bond.simple.size(), proxies.bond.asu.size()])
        if (not conserve_angles):
            assert proxies.angle is None
        else:
            counts.extend(
                [proxies.angle.simple.size(),
                 proxies.angle.asu.size()])
    cppc = ",".join([str(c) for c in counts])
    if (reference_cppc is not None):
        assert cppc == reference_cppc
示例#16
0
def check_comp(file_name):
  result = 0
  print "file name:", file_name
  cif_object = mmtbx.monomer_library.server.read_cif(file_name)
  for comp_comp_id in mmtbx.monomer_library.server.convert_comp_list(
                        source_info=file_name, cif_object=cif_object):
    result += 1
    atom_names = set()
    for atom in comp_comp_id.atom_list:
      atom_name = atom.atom_id
      assert atom_name.count(" ") == 0
      if (atom_name in atom_names):
        raise RuntimeError(
          "Duplicate atom name: %s" % show_string(atom_name))
      atom_names.add(atom_name)
    print "  number of atoms:", len(atom_names)
    #
    bond_atom_ids = set()
    for bond in comp_comp_id.bond_list:
      atom_ids = [bond.atom_id_1, bond.atom_id_2]
      for atom_name in atom_ids:
        if (atom_name not in atom_names):
          raise RuntimeError(
            "Unknown bond atom name: %s" % show_string(atom_name))
      atom_ids = tuple(sorted(atom_ids))
      if (atom_ids in bond_atom_ids):
        raise RuntimeError(
          "Duplicate bond: %s - %s" % tuple([show_string(s)
            for s in atom_ids]))
      bond_atom_ids.add(atom_ids)
    print "  number of bonds:", len(bond_atom_ids)
    #
    angle_atom_ids = set()
    for angle in comp_comp_id.angle_list:
      atom_ids = [angle.atom_id_1, angle.atom_id_2, angle.atom_id_3]
      for atom_name in atom_ids:
        if (atom_name not in atom_names):
          raise RuntimeError(
            "Unknown angle atom name: %s" % show_string(atom_name))
      atom_ids = tuple(sorted(atom_ids))
      if (atom_ids in angle_atom_ids):
        raise RuntimeError(
          "Duplicate angle: %s - %s - %s" % tuple([show_string(s)
            for s in atom_ids]))
      angle_atom_ids.add(atom_ids)
    print "  number of angles:", len(angle_atom_ids)
    #
    tor_atom_ids = set()
    for tor in comp_comp_id.tor_list:
      atom_ids = [tor.atom_id_1, tor.atom_id_2, tor.atom_id_3, tor.atom_id_4]
      for atom_name in atom_ids:
        if (atom_name not in atom_names):
          raise RuntimeError(
            "Unknown tor atom name: %s" % show_string(atom_name))
      atom_ids = tuple(sorted(atom_ids))
      if (atom_ids in tor_atom_ids):
        raise RuntimeError(
          "Duplicate tor: %s - %s - %s - %s" % tuple([show_string(s)
            for s in atom_ids]))
      tor_atom_ids.add(atom_ids)
    print "  number of tors:", len(tor_atom_ids)
    tor_atom_ids = {}
    for tor in comp_comp_id.tor_list:
      atom_ids = tuple(sorted([tor.atom_id_2, tor.atom_id_3]))
      tor_atom_ids.setdefault(atom_ids, []).append(tor)
    for atom_ids,tors in tor_atom_ids.items():
      if (len(tors) != 1):
        print "    redundant tors:", ", ".join([tor.id for tor in tors])
    #
    chir_atom_ids = set()
    for chir in comp_comp_id.chir_list:
      atom_ids = [
        chir.atom_id_1, chir.atom_id_2, chir.atom_id_3, chir.atom_id_centre]
      for atom_name in atom_ids:
        if (atom_name not in atom_names):
          raise RuntimeError(
            "Unknown chir atom name: %s" % show_string(atom_name))
      atom_ids = tuple(sorted(atom_ids))
      if (atom_ids in chir_atom_ids):
        raise RuntimeError(
          "Duplicate chir: %s - %s - %s - %s" % tuple([show_string(s)
            for s in atom_ids]))
      chir_atom_ids.add(atom_ids)
    print "  number of chirs:", len(chir_atom_ids)
    #
    plane_atom_counts = dict_with_default_0()
    for plane_atom in comp_comp_id.plane_atom_list:
      if (plane_atom.atom_id not in atom_names):
        raise RuntimeError(
          "Unknown plane atom name: %s" % show_string(plane_atom.atom_id))
      plane_atom_counts[plane_atom.plane_id] += 1
    print "  number of planes:", len(plane_atom_counts)
    if (len(plane_atom_counts) != 0):
      show_sorted_by_counts(
        label_count_pairs=plane_atom_counts.items(),
        prefix="    ")
      assert min(plane_atom_counts.values()) >= 3
    #
    rotamer_info = comp_comp_id.rotamer_info()
    if (rotamer_info is not None):
      print "  rotamer_info.tor_ids:", rotamer_info.tor_ids
      for tor_id in rotamer_info.tor_ids:
        assert tor_id.strip() == tor_id
        assert tor_id.split() == [tor_id]
      for tor_atom_ids in rotamer_info.tor_atom_ids:
        assert len(tor_atom_ids) == 5
        assert tor_atom_ids[0] in rotamer_info.tor_ids
        for atom_id in tor_atom_ids[1:]:
          assert atom_id.strip() == atom_id
          assert atom_id.split() == [atom_id]
      atom_ids = rotamer_info.atom_ids_not_handled
      if (atom_ids is not None):
        for atom_id in atom_ids:
          assert atom_id.strip() == atom_id
          assert atom_id.split() == [atom_id]
      assert (
           rotamer_info.constrain_dihedrals_with_sigma_less_than_or_equal_to
             is None
        or rotamer_info.constrain_dihedrals_with_sigma_less_than_or_equal_to
             > 0)
      print "  number of rotamers:", len(rotamer_info.rotamer)
      n_missing_frequencies = 0
      for rotamer in rotamer_info.rotamer:
        assert rotamer.id is not None
        assert len(rotamer.id.strip()) == len(rotamer.id)
        assert len(rotamer.id.split()) == 1
        if (rotamer.frequency is None):
          if (rotamer.frequency_annotation != "for more uniform sampling"):
            n_missing_frequencies += 1
        else:
          assert rotamer.frequency > 0
          assert rotamer.frequency < 1
        assert rotamer.angles is not None
        assert len(rotamer.angles) == len(rotamer_info.tor_ids)
        for angle in rotamer.angles:
          assert angle is None or -180 < angle <= 180
      if (n_missing_frequencies != 0):
        print "  WARNING: number of missing frequencies:", \
          n_missing_frequencies
  return result
示例#17
0
def eval_1(args):
    first_file = open(args[0]).read().splitlines()
    #
    for line in first_file:
        if (line.startswith("pdb_file = ")):
            pdb_file = line.split(" ", 2)[2]
            assert pdb_file[0] == pdb_file[-1]
            assert pdb_file[0] in ['"', "'"]
            pdb_file = op.basename(pdb_file[1:-1])
            break
    else:
        raise RuntimeError('pdb_file = "..." not found.')
    #
    for line in first_file:
        if (line ==
                "random_displacements_parameterization = *constrained cartesian"
            ):
            random_displacements_parameterization = "constrained"
            break
        elif (line ==
              "random_displacements_parameterization = constrained *cartesian"
              ):
            random_displacements_parameterization = "cartesian"
            break
    else:
        raise RuntimeError(
            "random_displacements_parameterization = constrained or cartesian"
            " not found.")
    #
    for line in first_file:
        if (line == "algorithm = *minimization annealing"):
            algorithm = "minimization"
            break
        elif (line == "algorithm = minimization *annealing"):
            algorithm = "annealing"
            break
    else:
        raise RuntimeError("algorithm = minimization or annealing not found.")
    #
    del first_file
    #
    tst_tardy_pdb_master_phil = tst_tardy_pdb.get_master_phil()
    tst_tardy_pdb_params = tst_tardy_pdb_master_phil.extract()
    if (algorithm == "minimization"):
        parameter_trial_table \
          = tst_tardy_comprehensive.common_parameter_trial_table
    elif (algorithm == "annealing"):
        parameter_trial_table \
          = tst_tardy_comprehensive.annealing_parameter_trial_table
    else:
        raise AssertionError
    cp_n_trials = tst_tardy_comprehensive.number_of_trials(
        table=parameter_trial_table)
    #
    random_seed_rmsd = []
    for cp_i_trial in xrange(cp_n_trials):
        random_seed_rmsd.append({})
    for file_name in args:
        for line in open(file_name).read().splitlines():
            if (not line.startswith("RESULT_cp_i_trial_random_seed_rmsd: ")):
                continue
            flds = line.split(None, 3)
            assert len(flds) == 4
            cp_i_trial = int(flds[1])
            random_seed = int(flds[2])
            rmsd = flex.double(eval(flds[3]))
            assert not random_seed_rmsd[cp_i_trial].has_key(random_seed)
            random_seed_rmsd[cp_i_trial][random_seed] = rmsd
    random_seeds_found = dict_with_default_0()
    for cp_i_trial in xrange(cp_n_trials):
        random_seeds_found[tuple(sorted(
            random_seed_rmsd[cp_i_trial].keys()))] += 1
    if (len(random_seeds_found) != 1):
        print random_seeds_found
        raise RuntimeError("Unexpected random_seeds_found (see output).")
    assert random_seeds_found.values()[0] == cp_n_trials
    random_seeds_found = random_seeds_found.keys()[0]
    assert random_seeds_found == tuple(range(len(random_seeds_found)))
    rmsds = []
    for cp_i_trial in xrange(cp_n_trials):
        rmsds.append([
            random_seed_rmsd[cp_i_trial][random_seed]
            for random_seed in xrange(len(random_seeds_found))
        ])
    #
    write_separate_pages = False
    if (algorithm == "minimization"):
        rmsd_start_final_plots_minimization(
            pdb_file=pdb_file,
            random_displacements_parameterization=
            random_displacements_parameterization,
            tst_tardy_pdb_params=tst_tardy_pdb_params,
            parameter_trial_table=parameter_trial_table,
            cp_n_trials=cp_n_trials,
            rmsds=rmsds,
            rmsd_n_n=50,
            write_separate_pages=write_separate_pages)
    elif (algorithm == "annealing"):
        rmsd_start_final_plots_annealing(
            pdb_file=pdb_file,
            random_displacements_parameterization=
            random_displacements_parameterization,
            tst_tardy_pdb_params=tst_tardy_pdb_params,
            parameter_trial_table=parameter_trial_table,
            cp_n_trials=cp_n_trials,
            rmsds=rmsds,
            write_separate_pages=write_separate_pages)
示例#18
0
def check_comp(file_name):
    result = 0
    print "file name:", file_name
    cif_object = mmtbx.monomer_library.server.read_cif(file_name)
    for comp_comp_id in mmtbx.monomer_library.server.convert_comp_list(
            source_info=file_name, cif_object=cif_object):
        result += 1
        atom_names = set()
        for atom in comp_comp_id.atom_list:
            atom_name = atom.atom_id
            assert atom_name.count(" ") == 0
            if (atom_name in atom_names):
                raise RuntimeError("Duplicate atom name: %s" %
                                   show_string(atom_name))
            atom_names.add(atom_name)
        print "  number of atoms:", len(atom_names)
        #
        bond_atom_ids = set()
        for bond in comp_comp_id.bond_list:
            atom_ids = [bond.atom_id_1, bond.atom_id_2]
            for atom_name in atom_ids:
                if (atom_name not in atom_names):
                    raise RuntimeError("Unknown bond atom name: %s" %
                                       show_string(atom_name))
            atom_ids = tuple(sorted(atom_ids))
            if (atom_ids in bond_atom_ids):
                raise RuntimeError("Duplicate bond: %s - %s" %
                                   tuple([show_string(s) for s in atom_ids]))
            bond_atom_ids.add(atom_ids)
        print "  number of bonds:", len(bond_atom_ids)
        #
        angle_atom_ids = set()
        for angle in comp_comp_id.angle_list:
            atom_ids = [angle.atom_id_1, angle.atom_id_2, angle.atom_id_3]
            for atom_name in atom_ids:
                if (atom_name not in atom_names):
                    raise RuntimeError("Unknown angle atom name: %s" %
                                       show_string(atom_name))
            atom_ids = tuple(sorted(atom_ids))
            if (atom_ids in angle_atom_ids):
                raise RuntimeError("Duplicate angle: %s - %s - %s" %
                                   tuple([show_string(s) for s in atom_ids]))
            angle_atom_ids.add(atom_ids)
        print "  number of angles:", len(angle_atom_ids)
        #
        tor_atom_ids = set()
        for tor in comp_comp_id.tor_list:
            atom_ids = [
                tor.atom_id_1, tor.atom_id_2, tor.atom_id_3, tor.atom_id_4
            ]
            for atom_name in atom_ids:
                if (atom_name not in atom_names):
                    raise RuntimeError("Unknown tor atom name: %s" %
                                       show_string(atom_name))
            atom_ids = tuple(sorted(atom_ids))
            if (atom_ids in tor_atom_ids):
                raise RuntimeError("Duplicate tor: %s - %s - %s - %s" %
                                   tuple([show_string(s) for s in atom_ids]))
            tor_atom_ids.add(atom_ids)
        print "  number of tors:", len(tor_atom_ids)
        tor_atom_ids = {}
        for tor in comp_comp_id.tor_list:
            atom_ids = tuple(sorted([tor.atom_id_2, tor.atom_id_3]))
            tor_atom_ids.setdefault(atom_ids, []).append(tor)
        for atom_ids, tors in tor_atom_ids.items():
            if (len(tors) != 1):
                print "    redundant tors:", ", ".join(
                    [tor.id for tor in tors])
        #
        chir_atom_ids = set()
        for chir in comp_comp_id.chir_list:
            atom_ids = [
                chir.atom_id_1, chir.atom_id_2, chir.atom_id_3,
                chir.atom_id_centre
            ]
            for atom_name in atom_ids:
                if (atom_name not in atom_names):
                    raise RuntimeError("Unknown chir atom name: %s" %
                                       show_string(atom_name))
            atom_ids = tuple(sorted(atom_ids))
            if (atom_ids in chir_atom_ids):
                raise RuntimeError("Duplicate chir: %s - %s - %s - %s" %
                                   tuple([show_string(s) for s in atom_ids]))
            chir_atom_ids.add(atom_ids)
        print "  number of chirs:", len(chir_atom_ids)
        #
        plane_atom_counts = dict_with_default_0()
        for plane_atom in comp_comp_id.plane_atom_list:
            if (plane_atom.atom_id not in atom_names):
                raise RuntimeError("Unknown plane atom name: %s" %
                                   show_string(plane_atom.atom_id))
            plane_atom_counts[plane_atom.plane_id] += 1
        print "  number of planes:", len(plane_atom_counts)
        if (len(plane_atom_counts) != 0):
            show_sorted_by_counts(label_count_pairs=plane_atom_counts.items(),
                                  prefix="    ")
            assert min(plane_atom_counts.values()) >= 3
        #
        rotamer_info = comp_comp_id.rotamer_info()
        if (rotamer_info is not None):
            print "  rotamer_info.tor_ids:", rotamer_info.tor_ids
            for tor_id in rotamer_info.tor_ids:
                assert tor_id.strip() == tor_id
                assert tor_id.split() == [tor_id]
            for tor_atom_ids in rotamer_info.tor_atom_ids:
                assert len(tor_atom_ids) == 5
                assert tor_atom_ids[0] in rotamer_info.tor_ids
                for atom_id in tor_atom_ids[1:]:
                    assert atom_id.strip() == atom_id
                    assert atom_id.split() == [atom_id]
            atom_ids = rotamer_info.atom_ids_not_handled
            if (atom_ids is not None):
                for atom_id in atom_ids:
                    assert atom_id.strip() == atom_id
                    assert atom_id.split() == [atom_id]
            assert (
                rotamer_info.
                constrain_dihedrals_with_sigma_less_than_or_equal_to is None
                or rotamer_info.
                constrain_dihedrals_with_sigma_less_than_or_equal_to > 0)
            print "  number of rotamers:", len(rotamer_info.rotamer)
            n_missing_frequencies = 0
            for rotamer in rotamer_info.rotamer:
                assert rotamer.id is not None
                assert len(rotamer.id.strip()) == len(rotamer.id)
                assert len(rotamer.id.split()) == 1
                if (rotamer.frequency is None):
                    if (rotamer.frequency_annotation !=
                            "for more uniform sampling"):
                        n_missing_frequencies += 1
                else:
                    assert rotamer.frequency > 0
                    assert rotamer.frequency < 1
                assert rotamer.angles is not None
                assert len(rotamer.angles) == len(rotamer_info.tor_ids)
                for angle in rotamer.angles:
                    assert angle is None or -180 < angle <= 180
            if (n_missing_frequencies != 0):
                print "  WARNING: number of missing frequencies:", \
                  n_missing_frequencies
    return result
def exercise_angle_pair_asu_table(
      structure,
      distance_cutoff,
      connectivities,
      reference_apatanl,
      reference_cppc):
  sg_asu_mappings = structure.asu_mappings(
    buffer_thickness=2*distance_cutoff)
  sg_pat = crystal.pair_asu_table(asu_mappings=sg_asu_mappings)
  sg_pat.add_all_pairs(
    distance_cutoff=distance_cutoff,
    min_cubicle_edge=0)
  # compare connectivities with reference
  assert list(sg_pat.pair_counts()) == connectivities
  #
  p1_structure = structure.expand_to_p1()
  p1_asu_mappings = p1_structure.asu_mappings(
    buffer_thickness=2*distance_cutoff)
  p1_pat = crystal.pair_asu_table(asu_mappings=p1_asu_mappings)
  p1_pat.add_all_pairs(
    distance_cutoff=distance_cutoff,
    min_cubicle_edge=0)
  sg_labels = structure.scatterers().extract_labels()
  p1_labels = p1_structure.scatterers().extract_labels()
  label_connect = dict(zip(sg_labels, sg_pat.pair_counts()))
  for l,c in zip(p1_labels, p1_pat.pair_counts()):
    # compare connectivities in original space group and in P1
    assert label_connect[l] == c
  #
  sg_apat_py = py_pair_asu_table_angle_pair_asu_table(self=sg_pat)
  sg_apat = sg_pat.angle_pair_asu_table()
  assert sg_apat.as_nested_lists() == sg_apat_py.as_nested_lists()
  sg_counts = {}
  for i_seq,pair_asu_dict in enumerate(sg_apat.table()):
    lbl_i = sg_labels[i_seq]
    for j_seq,pair_asu_j_sym_groups in pair_asu_dict.items():
      lbl_j = sg_labels[j_seq]
      for j_sym_group in pair_asu_j_sym_groups:
        sg_counts.setdefault(lbl_i, dict_with_default_0())[
                             lbl_j] += len(j_sym_group)
  p1_apat = p1_pat.angle_pair_asu_table()
  p1_counts = {}
  for i_seq,pair_asu_dict in enumerate(p1_apat.table()):
    lbl_i = p1_labels[i_seq]
    for j_seq,pair_asu_j_sym_groups in pair_asu_dict.items():
      lbl_j = p1_labels[j_seq]
      for j_sym_group in pair_asu_j_sym_groups:
        p1_counts.setdefault(lbl_i, dict_with_default_0())[
                             lbl_j] += len(j_sym_group)
  # self-consistency check
  multiplicities = {}
  for sc in structure.scatterers():
    multiplicities[sc.label] = sc.multiplicity()
  assert sorted(p1_counts.keys()) == sorted(sg_counts.keys())
  for lbl_i,sg_lc in sg_counts.items():
    p1_lc = p1_counts[lbl_i]
    assert sorted(p1_lc.keys()) == sorted(sg_lc.keys())
    for lbl_j,sg_c in sg_lc.items():
      p1_c = p1_lc[lbl_j]
      assert p1_c == sg_c * multiplicities[lbl_i]
  # compare with reference
  apatanl = str(sg_apat.as_nested_lists()).replace(" ","")
  if (reference_apatanl is not None):
    assert apatanl == reference_apatanl
  #
  counts = []
  for conserve_angles in [False, True]:
    proxies = structure.conservative_pair_proxies(
      bond_sym_table=sg_pat.extract_pair_sym_table(),
      conserve_angles=conserve_angles)
    counts.extend([proxies.bond.simple.size(), proxies.bond.asu.size()])
    if (not conserve_angles):
      assert proxies.angle is None
    else:
      counts.extend([proxies.angle.simple.size(), proxies.angle.asu.size()])
  cppc = ",".join([str(c) for c in counts])
  if (reference_cppc is not None):
    assert cppc == reference_cppc
示例#20
0
  def __init__(self, pdb_hierarchy,
               sequences,
               alignment_params=None,
               crystal_symmetry=None,
               coordinate_precision=5,
               occupancy_precision=3,
               b_iso_precision=5,
               u_aniso_precision=5):

    pdb_hierarchy_as_cif_block.__init__(
      self, pdb_hierarchy, crystal_symmetry=crystal_symmetry,
    coordinate_precision=coordinate_precision,
    occupancy_precision=occupancy_precision,
    b_iso_precision=b_iso_precision,
    u_aniso_precision=u_aniso_precision)

    import mmtbx.validation.sequence
    validation = mmtbx.validation.sequence.validation(
      pdb_hierarchy=pdb_hierarchy,
      sequences=sequences,
      params=alignment_params,
      extract_residue_groups=True,
      log=null_out(), # silence output
    )

    entity_loop = iotbx.cif.model.loop(header=(
      '_entity.id',
      '_entity.type',
      #'_entity.src_method',
      #'_entity.pdbx_description',
      '_entity.formula_weight',
      '_entity.pdbx_number_of_molecules',
      #'_entity.details',
      #'_entity.pdbx_mutation',
      #'_entity.pdbx_fragment',
      #'_entity.pdbx_ec'
    ))

    entity_poly_loop = iotbx.cif.model.loop(header=(
      '_entity_poly.entity_id',
      '_entity_poly.type',
      '_entity_poly.nstd_chirality',
      '_entity_poly.nstd_linkage',
      '_entity_poly.nstd_monomer',
      '_entity_poly.pdbx_seq_one_letter_code',
      '_entity_poly.pdbx_seq_one_letter_code_can',
      '_entity_poly.pdbx_strand_id',
      '_entity_poly.type_details'
    ))

    entity_poly_seq_loop = iotbx.cif.model.loop(header=(
      '_entity_poly_seq.entity_id',
      '_entity_poly_seq.num',
      '_entity_poly_seq.mon_id',
      '_entity_poly_seq.hetero',
    ))

    sequence_counts = OrderedDict()
    sequence_to_chain_ids = {}
    entity_id = 0
    sequence_to_entity_id = {}
    chain_id_to_entity_id = {}
    sequence_to_chains = {}
    residue_group_to_seq_num_mapping = {}
    aligned_pdb_chains = OrderedSet()
    non_polymer_counts = dict_with_default_0()
    non_polymer_resname_to_entity_id = OrderedDict()

    for chain in validation.chains:
      sequence = chain.alignment.b
      if sequence not in sequence_to_entity_id:
        entity_id += 1
        sequence_to_entity_id[sequence] = entity_id
      sequence_counts.setdefault(sequence, 0)
      sequence_counts[sequence] += 1
      sequence_to_chain_ids.setdefault(sequence, [])
      sequence_to_chain_ids[sequence].append(chain.chain_id)
      sequence_to_chains.setdefault(sequence, [])
      sequence_to_chains[sequence].append(chain)
      chain_id_to_entity_id[chain.chain_id] = sequence_to_entity_id[sequence]
      aligned_pdb_chains.add(chain.residue_groups[0].parent())
      unaligned_pdb_chains = OrderedSet(pdb_hierarchy.chains()) - aligned_pdb_chains

      assert len(chain.residue_groups) + chain.n_missing_start + chain.n_missing_end == len(sequence)
      residue_groups = [None] * chain.n_missing_start + chain.residue_groups + [None] * chain.n_missing_end
      i = chain.n_missing_start
      seq_num = 0
      for i, residue_group in enumerate(residue_groups):
        if residue_group is None and chain.alignment.b[i] == '-':
          # a deletion
          continue
        seq_num += 1
        if residue_group is not None:
          residue_group_to_seq_num_mapping[
            residue_group] = seq_num

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        for resname in residue_group.unique_resnames():
          if resname not in non_polymer_resname_to_entity_id:
            entity_id += 1
            non_polymer_resname_to_entity_id[resname] = entity_id
          non_polymer_counts[resname] += 1

    for sequence, count in sequence_counts.iteritems():
      entity_poly_seq_num = 0
      entity_id = sequence_to_entity_id[sequence]

      entity_loop.add_row((
        entity_id,
        'polymer', #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        len(sequence_to_chains[sequence]), # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

      # The definition of the cif item _entity_poly.pdbx_seq_one_letter_code
      # says that modifications and non-standard amino acids should be encoded
      # as 'X', however in practice the PDB seem to encode them as the three-letter
      # code in parentheses.
      pdbx_seq_one_letter_code = []
      pdbx_seq_one_letter_code_can = []

      chains = sequence_to_chains[sequence]

      from iotbx.pdb import amino_acid_codes

      chain = chains[0]
      matches = chain.alignment.matches()

      for i, one_letter_code in enumerate(sequence):

        #Data items in the ENTITY_POLY_SEQ category specify the sequence
        #of monomers in a polymer. Allowance is made for the possibility
        #of microheterogeneity in a sample by allowing a given sequence
        #number to be correlated with more than one monomer ID. The
        #corresponding ATOM_SITE entries should reflect this
        #heterogeneity.

        monomer_id = None
        if i >= chain.n_missing_start and i < (len(sequence) - chain.n_missing_end):
          monomer_id = chain.resnames[i-chain.n_missing_start]

        if monomer_id is None and one_letter_code == '-': continue

        pdbx_seq_one_letter_code_can.append(one_letter_code)

        if monomer_id is None:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            monomer_id = amino_acid_codes.three_letter_given_one_letter.get(
              one_letter_code, "UNK") # XXX
          else:
            monomer_id = one_letter_code
        else:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            one_letter_code = amino_acid_codes.one_letter_given_three_letter.get(
              monomer_id, "(%s)" %monomer_id)

        pdbx_seq_one_letter_code.append(one_letter_code)

        entity_poly_seq_num += 1

        entity_poly_seq_loop.add_row((
          entity_id,
          entity_poly_seq_num,
          monomer_id,
          'no', #XXX
        ))

      entity_poly_type = '?'
      entity_nstd_chirality = 'n'
      # we should probably determine the chirality more correctly by examining
      # the chirality of the backbone chain rather than relying on the residue
      # names to be correct
      if chain.chain_type == mmtbx.validation.sequence.PROTEIN:
        n_d_peptides = 0
        n_l_peptides = 0
        n_achiral_peptides = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname == "GLY":
            n_achiral_peptides += 1
          elif resname in iotbx.pdb.common_residue_names_amino_acid:
            n_l_peptides += 1
          elif resname in amino_acid_codes.three_letter_l_given_three_letter_d:
            n_d_peptides += 1
          else:
            n_unknown += 1
        n_total = sum([n_d_peptides, n_l_peptides, n_achiral_peptides, n_unknown])
        if (n_l_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(L)'
          if n_d_peptides > 0:
            entity_nstd_chirality = 'y'
        elif (n_d_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(D)'
          if n_l_peptides > 0:
            entity_nstd_chirality = 'y'
      elif chain.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID:
        n_dna = 0
        n_rna = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname is not None and resname.strip().upper() in (
            'AD', 'CD', 'GD', 'TD', 'DA', 'DC', 'DG', 'DT'):
            n_dna += 1
          elif resname is not None and resname.strip().upper() in (
            'A', 'C', 'G', 'T', '+A', '+C', '+G', '+T'):
            n_rna += 1
          else:
            n_unknown += 1
        n_total = sum([n_dna + n_rna + n_unknown])
        if n_dna/n_total > 0.5 and n_rna == 0:
          entity_poly_type = 'polydeoxyribonucleotide'
        elif n_rna/n_total > 0.5 and n_dna == 0:
          entity_poly_type = 'polyribonucleotide'
        elif (n_rna + n_dna)/n_total > 0.5:
          entity_poly_type = 'polydeoxyribonucleotide/polyribonucleotide hybrid'

      entity_poly_loop.add_row((
        entity_id,
        entity_poly_type,
        entity_nstd_chirality,
        'no',
        'no',
        wrap_always("".join(pdbx_seq_one_letter_code), width=80).strip(),
        wrap_always("".join(pdbx_seq_one_letter_code_can), width=80).strip(),
        ','.join(sequence_to_chain_ids[sequence]),
        '?'
      ))

    for resname, entity_id in non_polymer_resname_to_entity_id.iteritems():
      entity_type = "non-polymer"
      if resname == "HOH":
        entity_type = "water" # XXX
      entity_loop.add_row((
        entity_id,
        entity_type, #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        non_polymer_counts[resname], # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

    self.cif_block.add_loop(entity_loop)
    self.cif_block.add_loop(entity_poly_loop)
    self.cif_block.add_loop(entity_poly_seq_loop)
    self.cif_block.update(pdb_hierarchy.as_cif_block())

    label_entity_id = self.cif_block['_atom_site.label_entity_id']
    auth_seq_id = self.cif_block['_atom_site.auth_seq_id']
    ins_code = self.cif_block['_atom_site.pdbx_PDB_ins_code']
    auth_asym_id = self.cif_block['_atom_site.auth_asym_id']
    label_seq_id = flex.std_string(auth_seq_id.size(), '.')
    ins_code = ins_code.deep_copy()
    ins_code.set_selected(ins_code == '?', '')
    for residue_group, seq_num in residue_group_to_seq_num_mapping.iteritems():
      sel = ((auth_asym_id == residue_group.parent().id) &
             (ins_code == residue_group.icode.strip()) &
             (auth_seq_id == residue_group.resseq.strip()))
      label_seq_id.set_selected(sel, str(seq_num))
      label_entity_id.set_selected(
        sel, str(chain_id_to_entity_id[residue_group.parent().id]))

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        sel = ((auth_asym_id == residue_group.parent().id) &
               (ins_code == residue_group.icode.strip()) &
               (auth_seq_id == residue_group.resseq.strip()))
        label_entity_id.set_selected(
          sel, str(non_polymer_resname_to_entity_id[residue_group.unique_resnames()[0]]))

    self.cif_block['_atom_site.label_seq_id'] = label_seq_id

    # reorder the loops
    atom_site_loop = self.cif_block['_atom_site']
    atom_site_aniso_loop = self.cif_block.get('_atom_site_anisotrop')
    del self.cif_block['_atom_site']
    self.cif_block.add_loop(atom_site_loop)
    if atom_site_aniso_loop is not None:
      del self.cif_block['_atom_site_anisotrop']
      self.cif_block.add_loop(atom_site_aniso_loop)
示例#21
0
def eval_1(args):
  first_file = open(args[0]).read().splitlines()
  #
  for line in first_file:
    if (line.startswith("pdb_file = ")):
      pdb_file = line.split(" ", 2)[2]
      assert pdb_file[0] == pdb_file[-1]
      assert pdb_file[0] in ['"', "'"]
      pdb_file = op.basename(pdb_file[1:-1])
      break
  else:
    raise RuntimeError('pdb_file = "..." not found.')
  #
  for line in first_file:
    if   (line ==
            "random_displacements_parameterization = *constrained cartesian"):
      random_displacements_parameterization = "constrained"
      break
    elif (line ==
            "random_displacements_parameterization = constrained *cartesian"):
      random_displacements_parameterization = "cartesian"
      break
  else:
    raise RuntimeError(
      "random_displacements_parameterization = constrained or cartesian"
      " not found.")
  #
  for line in first_file:
    if (line == "algorithm = *minimization annealing"):
      algorithm = "minimization"
      break
    elif (line == "algorithm = minimization *annealing"):
      algorithm = "annealing"
      break
  else:
    raise RuntimeError("algorithm = minimization or annealing not found.")
  #
  del first_file
  #
  tst_tardy_pdb_master_phil = tst_tardy_pdb.get_master_phil()
  tst_tardy_pdb_params = tst_tardy_pdb_master_phil.extract()
  if (algorithm == "minimization"):
    parameter_trial_table \
      = tst_tardy_comprehensive.common_parameter_trial_table
  elif (algorithm == "annealing"):
    parameter_trial_table \
      = tst_tardy_comprehensive.annealing_parameter_trial_table
  else:
    raise AssertionError
  cp_n_trials = tst_tardy_comprehensive.number_of_trials(
    table=parameter_trial_table)
  #
  random_seed_rmsd = []
  for cp_i_trial in xrange(cp_n_trials):
    random_seed_rmsd.append({})
  for file_name in args:
    for line in open(file_name).read().splitlines():
      if (not line.startswith("RESULT_cp_i_trial_random_seed_rmsd: ")):
        continue
      flds = line.split(None, 3)
      assert len(flds) == 4
      cp_i_trial = int(flds[1])
      random_seed = int(flds[2])
      rmsd = flex.double(eval(flds[3]))
      assert not random_seed_rmsd[cp_i_trial].has_key(random_seed)
      random_seed_rmsd[cp_i_trial][random_seed] = rmsd
  random_seeds_found = dict_with_default_0()
  for cp_i_trial in xrange(cp_n_trials):
    random_seeds_found[tuple(sorted(random_seed_rmsd[cp_i_trial].keys()))] += 1
  if (len(random_seeds_found) != 1):
    print random_seeds_found
    raise RuntimeError("Unexpected random_seeds_found (see output).")
  assert random_seeds_found.values()[0] == cp_n_trials
  random_seeds_found = random_seeds_found.keys()[0]
  assert random_seeds_found == tuple(range(len(random_seeds_found)))
  rmsds = []
  for cp_i_trial in xrange(cp_n_trials):
    rmsds.append([random_seed_rmsd[cp_i_trial][random_seed]
      for random_seed in xrange(len(random_seeds_found))])
  #
  write_separate_pages = False
  if (algorithm == "minimization"):
    rmsd_start_final_plots_minimization(
      pdb_file=pdb_file,
      random_displacements_parameterization
        =random_displacements_parameterization,
      tst_tardy_pdb_params=tst_tardy_pdb_params,
      parameter_trial_table=parameter_trial_table,
      cp_n_trials=cp_n_trials,
      rmsds=rmsds,
      rmsd_n_n=50,
      write_separate_pages=write_separate_pages)
  elif (algorithm == "annealing"):
    rmsd_start_final_plots_annealing(
      pdb_file=pdb_file,
      random_displacements_parameterization
        =random_displacements_parameterization,
      tst_tardy_pdb_params=tst_tardy_pdb_params,
      parameter_trial_table=parameter_trial_table,
      cp_n_trials=cp_n_trials,
      rmsds=rmsds,
      write_separate_pages=write_separate_pages)