示例#1
0
def exercise():
    wavelength = 1.025
    mtz_file, pdb_file = generate_zinc_inputs(anonymize=False)
    null_out = libtbx.utils.null_out()

    cmdline = mmtbx.command_line.load_model_and_data(args=[
        pdb_file, mtz_file, "wavelength={}".format(wavelength),
        "use_phaser=False", "use_svm=True"
    ],
                                                     master_phil=master_phil(),
                                                     out=null_out,
                                                     process_pdb_file=True,
                                                     create_fmodel=True,
                                                     prefer_anomalous=True)

    os.remove(pdb_file)
    os.remove(mtz_file)
    os.remove(os.path.splitext(pdb_file)[0] + "_fmodel.eff")

    cmdline.xray_structure.set_inelastic_form_factors(
        photon=cmdline.params.input.wavelength, table="sasaki")

    cmdline.fmodel.update_xray_structure(cmdline.xray_structure,
                                         update_f_calc=True)

    manager = ions.identify.create_manager(
        pdb_hierarchy=cmdline.pdb_hierarchy,
        fmodel=cmdline.fmodel,
        geometry_restraints_manager=cmdline.geometry,
        wavelength=cmdline.params.input.wavelength,
        params=cmdline.params,
        nproc=cmdline.params.nproc,
        log=null_out)

    manager.validate_ions(out=null_out)

    for atom_props in manager.atoms_to_props.values():
        i_seq = atom_props.i_seq
        chem_env = ChemicalEnvironment(
            i_seq, manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5),
            manager)
        scatter_env = ScatteringEnvironment(
            i_seq,
            manager,
            fo_density=manager.get_map_gaussian_fit("mFo", i_seq),
            fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq),
            anom_density=manager.get_map_gaussian_fit("anom", i_seq),
        )
        vector = ion_vector(chem_env, scatter_env)
        resname = ion_class(chem_env)
        assert vector is not None
        assert resname != ""

    print "OK"
示例#2
0
def dump_sites(manager):
    """
  Iterate over all the ions and waters built into the model and dump out
  information about their properties.

  Parameters
  ----------
  manager : mmtbx.ions.identify.manager

  Returns
  -------
  list of tuple of mmtbx.ions.environment.ChemicalEnvironment, \
  mmtbx.ions.environment.ScatteringEnvironment
  """

    atoms = iterate_sites(
        manager.pdb_hierarchy,
        res_filter=ions.SUPPORTED + WATER_RES_NAMES,
        split_sites=True,
    )

    # Can't pickle entire AtomProperties because they include references to the
    # Atom object. Instead, gather what properties we want and store them in a
    # second list
    properties = []
    for atom in atoms:
        map_stats = manager.map_stats(atom.i_seq)
        fo_density = manager.get_map_gaussian_fit("mFo", atom.i_seq)
        chem_env = ChemicalEnvironment(
            atom.i_seq,
            manager.find_nearby_atoms(atom.i_seq, far_distance_cutoff=3.5),
            manager,
        )
        scatter_env = ScatteringEnvironment(
            atom.i_seq,
            manager,
            fo_density=fo_density,
            fofc_density=(map_stats.fofc, 0),
            anom_density=(map_stats.anom, 0),
        )
        properties.append((chem_env, scatter_env))

    return properties
示例#3
0
def exercise():
    fns = [generate_calcium_inputs, generate_zinc_inputs]
    wavelengths = [1.025, 1.54]

    for fn, wavelength in zip(fns, wavelengths):
        mtz_file, pdb_file = fn(anonymize=True)
        null_out = libtbx.utils.null_out()

        cmdline = mmtbx.command_line.load_model_and_data(
            args=[
                pdb_file, mtz_file, "wavelength={}".format(wavelength),
                "use_phaser=True", "use_svm=True"
            ],
            master_phil=master_phil(),
            out=null_out,
            process_pdb_file=True,
            create_fmodel=True,
            prefer_anomalous=True,
            set_inelastic_form_factors="sasaki",
        )

        os.remove(pdb_file)
        os.remove(mtz_file)
        os.remove(os.path.splitext(mtz_file)[0] + "_fmodel.eff")
        os.remove(os.path.splitext(mtz_file)[0] + ".pdb")

        manager = ions.identify.create_manager(
            pdb_hierarchy=cmdline.pdb_hierarchy,
            fmodel=cmdline.fmodel,
            geometry_restraints_manager=cmdline.geometry,
            wavelength=cmdline.params.input.wavelength,
            params=cmdline.params,
            nproc=cmdline.params.nproc,
            log=null_out,
            manager_class=ions.svm.manager,
        )

        # Build a list of properties of each water / ion site
        waters = []
        for chain in manager.pdb_hierarchy.only_model().chains():
            for residue_group in chain.residue_groups():
                atom_groups = residue_group.atom_groups()
                if (len(atom_groups) > 1):  # alt conf, skip
                    continue
                for atom_group in atom_groups:
                    # Check for non standard atoms in the residue
                    # Or a label indicating the residue is a water
                    resname = atom_group.resname.strip().upper()

                    if (resname in WATER_RES_NAMES):
                        atoms = atom_group.atoms()
                        if (len(atoms) == 1
                            ):  # otherwise it probably has hydrogens, skip
                            waters.append(atoms[0].i_seq)

        assert len(waters) > 0

        atom_props = [AtomProperties(i_seq, manager) for i_seq in waters]

        for atom_prop in atom_props:
            i_seq = atom_prop.i_seq
            chem_env = ChemicalEnvironment(
                i_seq,
                manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5),
                manager,
            )
            scatter_env = ScatteringEnvironment(
                i_seq,
                manager,
                fo_density=manager.get_map_gaussian_fit("mFo", i_seq),
                fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq),
                anom_density=manager.get_map_gaussian_fit("anom", i_seq),
            )
            resname = ion_class(chem_env)
            assert resname != ""

            predictions = predict_ion(chem_env,
                                      scatter_env,
                                      elements=["HOH", "ZN", "CA"])
            if predictions is None:
                print "Could not load SVM classifier"
                print "Skipping {}".format(os.path.split(__file__)[1])
                return

            if resname != predictions[0][0]:
                print "Prediction ({}) did not match expected: {}" \
                  .format(predictions[0][0], resname)
                for element, prob in predictions:
                    print "  {}: {:.2f}".format(element, prob)
                sys.exit()

    print "OK"
示例#4
0
    def analyze_water(self,
                      i_seq,
                      debug=True,
                      candidates=Auto,
                      filter_outputs=True):
        """
    Analyzes a single water site using a SVM to decide whether to re-assign it
    as an ion.

    Parameters
    ----------
    i_seq : int
    debug : bool, optional
    candidates : list of str, optional

    Returns
    -------
    svm_prediction or None
    """
        atom_props = mmtbx.ions.identify.AtomProperties(i_seq, self)
        expected_atom_type = atom_props.get_atom_type(params=self.params.water)
        if (expected_atom_type == mmtbx.ions.identify.WATER_POOR):
            return None
        auto_candidates = candidates is Auto
        if auto_candidates:
            candidates = mmtbx.ions.DEFAULT_IONS
        elif isinstance(candidates, str) or isinstance(candidates, unicode):
            candidates = candidates.replace(",", " ").split()
        candidates = [i.strip().upper() for i in candidates]
        if (candidates == ['X'
                           ]):  # XXX hack for testing - X is "dummy" element
            candidates = []
        if auto_candidates:
            candidates = None
        else:
            candidates.append("HOH")
        from mmtbx.ions.environment import ScatteringEnvironment, \
          ChemicalEnvironment
        chem_env = ChemicalEnvironment(i_seq, atom_props.nearby_atoms, self)
        scatter_env = ScatteringEnvironment(
            i_seq=i_seq,
            manager=self,
            fo_density=self.get_map_gaussian_fit("mFo", i_seq),
            fofc_density=self.get_map_gaussian_fit("mFo-DFc", i_seq),
            anom_density=self.get_map_gaussian_fit("anom", i_seq))
        predictions = predict_ion(chem_env,
                                  scatter_env,
                                  elements=candidates,
                                  svm_name=self.params.svm.svm_name)
        if (predictions is not None) and filter_outputs:
            predictions = utils.filter_svm_outputs(chem_env=chem_env,
                                                   scatter_env=scatter_env,
                                                   predictions=predictions)
        if (predictions is not None) and (len(predictions) > 0):
            final_choice = None
            predictions.sort(key=lambda x: -x[1])
            best_guess, best_score = predictions[0]
            if (best_guess != "HOH"):
                if len(predictions) == 1:
                    final_choice = mmtbx.ions.server.get_metal_parameters(
                        best_guess)
                else:
                    next_guess, next_score = predictions[1]
                    if ((best_score >= self.params.svm.min_score) and
                        (best_score >=
                         (next_score * self.params.svm.min_fraction_of_next))):
                        final_choice = mmtbx.ions.server.get_metal_parameters(
                            best_guess)
            atom_info_out = StringIO()
            atom_props.show_properties(identity="HOH", out=atom_info_out)
            result = svm_prediction(
                i_seq=i_seq,
                pdb_id_str=self.pdb_atoms[i_seq].id_str(),
                atom_info_str=atom_info_out.getvalue(),
                map_stats=self.map_stats(i_seq),
                atom_types=[pred[0] for pred in predictions],
                scores=[pred[1] for pred in predictions],
                final_choice=final_choice)
            return result
        return None
def exercise():
    if not libtbx.env.has_module("phenix_regression"):
        print "Skipping {}".format(os.path.split(__file__)[1])
        return

    models = OrderedDict([
        ("2qng", [
            Counter({
                chem_oxygen: 7,
                chem_carboxy: 2,
                chem_water: 2,
                chem_backbone: 3
            }),
            Counter({
                chem_oxygen: 6,
                chem_carboxy: 3,
                chem_water: 1,
                chem_backbone: 2
            }),
        ]),
        ("3rva", [
            Counter({
                chem_oxygen: 6,
                chem_carboxy: 4,
                chem_water: 2
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 1
            }),
            Counter({
                chem_nitrogen: 4,
                chem_nitrogen_primary: 1,
                chem_nitrogen_secondary: 3,
                chem_backbone: 3
            }),
        ]),
        ("1mjh", [
            Counter({
                chem_oxygen: 6,
                chem_water: 3,
                chem_phosphate: 3
            }),
            Counter({
                chem_oxygen: 6,
                chem_water: 3,
                chem_phosphate: 3
            }),
        ]),
        ("4e1h", [
            Counter({
                chem_oxygen: 6,
                chem_carboxy: 4
            }),
            Counter({
                chem_oxygen: 6,
                chem_carboxy: 3
            }),
            Counter({
                chem_oxygen: 6,
                chem_carboxy: 3
            }),
        ]),
        ("2xuz", [
            Counter({chem_oxygen: 6}),
        ]),
        ("3zli", [
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 1,
                chem_water: 1
            }),
            Counter({chem_sulfur: 4}),
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 1,
                chem_water: 1
            }),
            Counter({chem_sulfur: 4}),
        ]),
        ("3e0f", [
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 2,
                chem_phosphate: 2
            }),
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 2,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 1,
                chem_phosphate: 1
            }),
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 3,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 2,
                chem_phosphate: 1
            }),
        ]),
        ("3dkq", [
            Counter({
                chem_nitrogen: 4,
                chem_oxygen: 1,
                chem_nitrogen_secondary: 4,
                chem_carboxy: 1
            }),
            Counter({
                chem_nitrogen: 2,
                chem_oxygen: 1,
                chem_nitrogen_secondary: 2,
                chem_carboxy: 1
            }),
            Counter({
                chem_nitrogen: 4,
                chem_oxygen: 1,
                chem_nitrogen_secondary: 4,
                chem_carboxy: 1
            }),
        ]),
        ("2o8q", [
            Counter({
                chem_nitrogen: 3,
                chem_oxygen: 3,
                chem_nitrogen_secondary: 3,
                chem_water: 3
            }),
            Counter({
                chem_nitrogen: 3,
                chem_oxygen: 3,
                chem_nitrogen_secondary: 3,
                chem_water: 3
            }),
        ]),
        ("1tgg", [
            Counter({
                chem_oxygen: 5,
                chem_chloride: 1,
                chem_carboxy: 4,
                chem_water: 1
            }),
            Counter({
                chem_oxygen: 3,
                chem_chloride: 2,
                chem_carboxy: 3
            }),
            Counter({
                chem_oxygen: 4,
                chem_chloride: 2,
                chem_carboxy: 4
            }),
        ]),
        ("3zu8", [
            Counter({
                chem_oxygen: 7,
                chem_carboxy: 3,
                chem_water: 1,
                chem_backbone: 2
            }),
            Counter({
                chem_nitrogen: 4,
                chem_oxygen: 1,
                chem_nitrogen_primary: 1,
                chem_nitrogen_secondary: 3,
                chem_carboxy: 1,
                chem_backbone: 3
            }),
        ]),
        ("1ofs", [
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 1
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
        ]),
        ("3ul2", [
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_backbone: 1,
                chem_water: 2
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
        ]),
        ("3snm", [
            Counter({
                chem_oxygen: 5,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 3,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3
            }),
        ]),
        ("3qlq", [
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_oxygen: 7,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 2,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 5,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 2
            }),
        ]),
        ("2gdf", [
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 1
            }),
            Counter({
                chem_oxygen: 6,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 1,
                chem_backbone: 1
            }),
            Counter({
                chem_nitrogen: 1,
                chem_oxygen: 4,
                chem_nitrogen_secondary: 1,
                chem_carboxy: 3,
                chem_water: 1
            }),
            Counter({
                chem_oxygen: 6,
                chem_amide: 1,
                chem_carboxy: 3,
                chem_water: 1,
                chem_backbone: 1
            }),
        ]),
        ("1q8h", [
            Counter({
                chem_oxygen: 7,
                chem_carboxy: 6,
                chem_water: 1
            }),
            Counter({
                chem_oxygen: 7,
                chem_carboxy: 4,
                chem_water: 3
            }),
            Counter({
                chem_oxygen: 8,
                chem_carboxy: 6,
                chem_water: 2
            }),
        ]),
    ])

    for model, expected_environments in models.items():
        pdb_path = libtbx.env.find_in_repositories(relative_path=os.path.join(
            "phenix_regression", "mmtbx", "ions", model + ".pdb"),
                                                   test=os.path.isfile)

        mon_lib_srv = monomer_library.server.server()
        ener_lib = monomer_library.server.ener_lib()
        processed_pdb_file = monomer_library.pdb_interpretation.process(
            mon_lib_srv=mon_lib_srv,
            ener_lib=ener_lib,
            file_name=pdb_path,
            raw_records=None,
            force_symmetry=True,
            log=libtbx.utils.null_out())

        geometry = \
          processed_pdb_file.geometry_restraints_manager(show_energies = False)
        xray_structure = processed_pdb_file.xray_structure()
        pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy
        connectivity = geometry.shell_sym_tables[0].full_simple_connectivity()

        manager = mmtbx.ions.identify.manager(fmodel=None,
                                              pdb_hierarchy=pdb_hierarchy,
                                              xray_structure=xray_structure,
                                              connectivity=connectivity)

        elements = set(ions.DEFAULT_IONS + ions.TRANSITION_METALS)
        elements.difference_update(["CL"])

        metals = [
            i_seq for i_seq, atom in enumerate(manager.pdb_atoms)
            if atom.fetch_labels().resname.strip().upper() in elements
        ]
        assert len(metals) == len(expected_environments)

        for index, metal, expected_environment in \
          zip(xrange(len(metals)), metals, expected_environments):
            env = ChemicalEnvironment(
                metal,
                manager.find_nearby_atoms(metal, filter_by_two_fofc=False),
                manager)
            if env.chemistry != expected_environment:
                print "Problem detecting chemistry environment in", model, index
                print "Found:    ", env.chemistry
                print "Should be:", expected_environment
                sys.exit()

    print "OK"