Exemplo n.º 1
0
    def create_atomic_hotspots(self, superstar_grids_dir):
        """
        
        :param superstar_grids_dir: path to where the superstar grids are stored
        :return: 
        """
        atomic_hotspots = []

        # Read in the SuperStar and Buriedness info
        probes = ['donor', 'acceptor', 'apolar', 'positive', 'negative']
        b_grid = Grid.from_file(
            str(Path(superstar_grids_dir, 'buriedness.ccp4').resolve()))

        for p in probes:
            g_path = Path(superstar_grids_dir, f'superstar_{p}.ccp4')
            if g_path.exists():
                print(f" found grid for probe of type {p}")
                p_grid = Grid.from_file(str(g_path.resolve()))
                ahs = _AtomicHotspotResult(identifier=p,
                                           grid=p_grid,
                                           buriedness=b_grid)
                atomic_hotspots.append(ahs)
            else:
                continue

        return atomic_hotspots
Exemplo n.º 2
0
    def _generate_result(self, path):
        with PushDir(path):
            files = set(listdir(path))

            # fetch protein - this should always be protein.pdb
            prot_name = [f for f in files if f.split(".")[1] == self.supported_protein_extensions][0]
            prot = Protein.from_file(prot_name)
            files.remove(prot_name)

            # there should only be one grid extension in the directory, if there are more
            # then you can manually read in your results
            grid_extension = {f.split(".")[1] for f in files}.intersection(self.supported_grid_extensions)
            if len(grid_extension) > 1:
                raise IndexError("Too many grid types, create `hotspots.result.Results` manually")

            elif len(grid_extension) < 1:
                raise IndexError("No supported grid types found")

            elif list(grid_extension)[0] == "dat":
                raise NotImplementedError("Will put this in if requested")

            else:
                grid_extension = list(grid_extension)[0]

            # read hotspot grids
            stripped_files = {f.split(".")[0] for f in files}
            hotspot_grids = stripped_files.intersection(self.supported_interactions)
            super_grids = {p: Grid.from_file(f"{p}.{grid_extension}") for p in hotspot_grids}

            # read superstar grids
            if len([f.startswith("superstar") for f in files]) > 0 and self.read_superstar:
                superstar_grids = {p: Grid.from_file(f"superstar_{p}.{grid_extension}") for p in hotspot_grids}
            else:
                superstar_grids = None

            # read weighted_superstar grids
            if len([f.startswith("weighted") for f in files]) > 0 and self.read_weighted:
                weighted_grids = {p: Grid.from_file(f"weighted_{p}.{grid_extension}") for p in hotspot_grids}
            else:
                weighted_grids = None

            # fetch buriedness grid
            try:
                buriedness_name = [f for f in files if f.startswith("buriedness")][0]
            except IndexError:
                buriedness_name = None

            if buriedness_name and self.read_buriedness:
                buriedness = Grid.from_file(buriedness_name)
            else:
                buriedness = None

        return Results(super_grids=super_grids,
                       protein=prot,
                       buriedness=buriedness,
                       superstar=superstar_grids,
                       weighted_superstar=weighted_grids,
                       identifier=basename(path))
Exemplo n.º 3
0
def make_max_difference_maps(io):
    probes = ["donor", "acceptor", "apolar"]
    for probe in probes:
        g1 = Grid.from_file(
            join(io.ensemble_dirs[e1], "{}_{}_max.ccp4".format(e1, probe)))
        g2 = Grid.from_file(
            join(io.ensemble_dirs[e2], "{}_{}_max.ccp4".format(e2, probe)))
        diff_g = g1 - g2
        diff_g.write(
            join(io.params.pipeline_root,
                 "diff_{}_{}_{}.ccp4").format(e1, e2, probe))
Exemplo n.º 4
0
def make_thresholded_maps(io, e1, e2):
    """
    
    :param io: EnsembleIO instance. 
    :param str ens1: name of reference ensemble 
    :param str ens2: name of off-target ensemble 
    :return: 
    """
    t_dir = join(io.params.pipeline_root, "thresholded_hotspot_maps")
    if not exists(t_dir):
        os.mkdir(t_dir)
    print(t_dir)
    probes = ["donor", "acceptor", "apolar"]

    for probe in probes:
        ge1 = pickle.load(open(io.ensemble_maps[e1][probe], "r"))
        ge2 = pickle.load(open(io.ensemble_maps[e2][probe], "r"))

        iarr = ge1.get_difference_map(ge2, tolerance=0)
        diff = Grid.from_file(
            join(io.params.pipeline_root,
                 "diff_{}_{}_{}.ccp4").format(e1, e2, probe)).get_array()

        over3 = (iarr > 3) * diff
        gover3 = ge1.save_grid(over3)
        gover3.write(
            join(t_dir, "diff_{}_{}_{}_over3.ccp4".format(e1, e2, probe)))

        under3 = (iarr < 3) * diff
        gunder3 = ge1.save_grid(under3)
        gunder3.write(
            join(t_dir, "diff_{}_{}_{}_under3.ccp4".format(e1, e2, probe)))
Exemplo n.º 5
0
    def test_write_real_single(self):
        base = "testdata/1hcl"
        interactions = ["donor", "acceptor", "apolar"]
        super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions}
        superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions}
        buriedness = Grid.from_file(os.path.join(base, "buriedness.grd"))
        prot = Protein.from_file(os.path.join(base, "protein.pdb"))

        hr = Results(super_grids=super_grids,
                     protein=prot,
                     buriedness=buriedness,
                     superstar=superstar_grids)

        settings = HotspotWriter.Settings()
        settings.output_superstar = True
        with HotspotWriter("testdata/hs_io/minimal_all_grids_real", settings=settings) as w:
            w.write(hr)
Exemplo n.º 6
0
    def _get_grids(self, sub_dir=None):
        """
        create a grid dictorionary
        :return:
        """
        if sub_dir:
            base = join(self._base, sub_dir)
            self._files = listdir(base)
            self._extensions = set(
                [splitext(f)[1] for f in self._files if f != '' or f != '.py'])
        else:
            base = self._base

        if ".dat" in self._extensions:
            grid_dic = {
                splitext(fname)[0]: Grid.from_array(join(base, fname))
                for fname in [
                    f for f in self._files if splitext(f)[1] == ".grd"
                    and splitext(f)[0] in self._supported_interactions
                ]
            }
            try:
                buriedness = Grid.from_array(join(self.base, "buriedness.dat"))
            except RuntimeError:
                buriedness = None

        else:
            ext = list(
                set(self._extensions).intersection(self._supported_grids))
            if len(ext) == 1:
                grid_dic = {
                    splitext(fname)[0]: Grid.from_file(join(base, fname))
                    for fname in [
                        f for f in self._files if splitext(f)[1] == ext[0]
                        and splitext(f)[0] in self._supported_interactions
                    ]
                }
                try:
                    buriedness = Grid.from_file("buriedness{}".format(ext[0]))
                except RuntimeError:
                    buriedness = None
            else:
                raise RuntimeError("Opps, something went wrong.")

        return grid_dic, buriedness
Exemplo n.º 7
0
    def run_hotspot_calculation(self, method="ghecom"):
        """
        Runs the hotspots calculation on the specified PDB structure
        :return: 
        """
        h = Runner()
        settings = h.Settings()
        settings.nrotations = self.number_rotations
        settings.apolar_translation_threshold = 15
        settings.polar_translation_threshold = 15
        settings.sphere_maps = self.spheres

        # Check if SuperStar and Ghecom have already been run.
        super_archive_path = Path(self.out_dir.parent, "superstar_grids.zip")

        if super_archive_path.exists():
            super_tmp_path = Path(self.out_dir.parent, super_archive_path.stem)

            if not super_tmp_path.exists(): super_tmp_path.mkdir()
            unpack_archive(super_archive_path, super_tmp_path, 'zip')
            b_grid = Grid.from_file(
                str(Path(super_tmp_path, 'buriedness.ccp4').resolve()))

            result = h.from_superstar(
                protein=self.prepare_protein(),
                superstar_grids=self.create_atomic_hotspots(super_tmp_path),
                buriedness=b_grid,
                charged_probes=self.charged,
                settings=settings,
                clear_tmp=True)
            rmtree(super_tmp_path)

        else:

            result = h.from_protein(protein=self.prepare_protein(),
                                    charged_probes=self.charged,
                                    probe_size=7,
                                    buriedness_method=method,
                                    cavities=None,
                                    nprocesses=1,
                                    settings=settings)

            # Save and zip the SuperStar Grids:
            self._save_superstar_grids(h)

        # Save and zip the Results
        with hs_io.HotspotWriter(str(self.out_dir.resolve()),
                                 visualisation="pymol",
                                 grid_extension=".ccp4",
                                 zip_results=True) as writer:
            writer.write(result)

        print(f"out_file: {str(Path(self.out_dir, 'out.zip').resolve())}")

        return Path(self.out_dir, 'out.zip')
Exemplo n.º 8
0
    def test_edge_detection(self):
        self.selected = Grid.from_file("testdata/result/molA.grd")
        edge = self.selected.edge_detection()

        self.assertLess(len(edge), self.selected.count_grid())
        f = PyMOLFile()
        for i, n in enumerate(edge):
            f.commands += PyMOLCommands.sphere(f"sphere_{i}", (0, 0, 1, 1), n,
                                               0.1)
            f.commands += PyMOLCommands.load_cgo(f"sphere_{i}",
                                                 f"sphere_{i}_obj")

        f.write("testdata/grid_extension/edge_detection.py")
Exemplo n.º 9
0
    def test_write_pymol_isoslider(self):
        # read in manually
        path = "testdata/hs_io/minimal_all_grids/out.zip"
        base = tempfile.mkdtemp()
        with zipfile.ZipFile(path) as hs_zip:
            hs_zip.extractall(base)

        base = os.path.join(base, "hotspot")

        interactions = ["donor", "acceptor", "apolar"]
        super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions}
        superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions}
        prot = Protein.from_file(os.path.join(base, "protein.pdb"))

        hr = Results(super_grids=super_grids,
                     protein=prot,
                     superstar=superstar_grids)

        hr.identifier = "hotspot"

        settings = HotspotWriter.Settings()
        settings.output_superstar = True

        writer = HotspotWriter("testdata/hs_io/minimal_all_grids", settings=settings)  # we won't actually write

        writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.super_grids,
                                                                     "hotspot",
                                                                     "hotspot",
                                                                     "fhm")

        writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.superstar,
                                                                     "hotspot",
                                                                     "hotspot",
                                                                     "superstar")

        writer._write_pymol_isoslider(hr)

        writer.pymol_out.write("testdata/hs_io/minimal_all_grids/test_write_pymol_isoslider.py")
Exemplo n.º 10
0
def plot_distribution_histograms(io, mode):
    probes = ['acceptor', 'apolar', 'donor']
    for e in io.ensembles:
        grid_dict = {}
        for probe in probes:
            paths = glob(join(io.ensemble_dirs[e], '*.ccp4'))
            print(paths)
            path = [p for p in paths if probe in p and mode in p][0]
            print(path)
            grid_dict[probe] = Grid.from_file(path)

        get_grid_dic_histograms(grid_dict,
                                out_dir=io.ensemble_dirs[e],
                                prot_name=e,
                                suffix=mode)
Exemplo n.º 11
0
    def _get_hotspot(self, cav_id):
        """
        calculate hotspot map from pre-calculated superstar and buriedness grids

        :param cav_id:
        :return:
        """
        # inputs
        prot = Protein.from_file(self.apo_prep)
        sr = HotspotReader(path=os.path.join(self.superstar[cav_id], "out.zip")).read()
        superstar = [_AtomicHotspotResult(identifier=ident, grid=grid, buriedness=None)
                     for ident, grid in sr.super_grids.items()]
        buriedness = Grid.from_file(self.buriedness)

        # tasks
        start = time.time()
        h = Runner()

        s = h.Settings()
        s.apolar_translation_threshold = 14
        s.polar_translation_threshold = 14
        s.polar_contributions = False
        s.sphere_maps = False
        s.nrotations = 3000

        hr = h.from_superstar(prot, superstar, buriedness, settings=s, clear_tmp=True)
        finish = time.time()
        # output
        if not os.path.exists(self.hotspot[cav_id]):
            os.mkdir(self.hotspot[cav_id])

        with open(self.hotspot_time[cav_id], 'w') as t:
            t.write(str(finish - start))

        with HotspotWriter(self.hotspot[cav_id], zip_results=True) as writer:
            writer.write(hr)
Exemplo n.º 12
0
    def get_ensemble_array(self):
        """
        Reads in grids, converts them to 3d numpy arrays, and stacks them into 4d numpy array, which holds the information
        for the ensemble.
        :return: 
        """
        # Initialise the array
        ensemble_array = None
        # Needed for converting between Cartesian coordinates and indices.
        rec_spacing = 1.0 / self.spacing

        # Fill in ensemble array; i counts the number of grids that have been added.
        i = 0
        for p in self.paths:
            # Load in grid
            g = Grid.from_file(p)

            # Check the spacing of the grid. If different, continue and add to log.
            if g.spacing != self.spacing:
                print("Grid at {} has wrong spacing (found {}, expected {})".
                      format(p, g.spacing, self.spacing))
                continue
            # Counter i keeps track of how many grids we have added
            i += 1

            # Get the dimensions of the grid:
            curr_dims = np.array(g.bounding_box)
            # Convert to numpy array
            arr = g.get_array()

            # Create the ensemble
            if i == 1:
                # Store the dimensions of the ensemble
                ens_dims = curr_dims
                # Put in as first element of the ensemble array
                ensemble_array = arr

            elif i == 2:
                origin_diff = (curr_dims[0] - ens_dims[0]) * rec_spacing
                far_diff = ((curr_dims[1] - ens_dims[1])) * rec_spacing

                # Padding both arrays to make them the same size (and so stackable):
                arr = self.pad_array(arr, origin_diff, far_diff)
                ensemble_array = self.pad_array(ensemble_array, -origin_diff,
                                                -far_diff)
                # Stacking 2 3D arrays creates a 4D array.
                ensemble_array = np.stack((ensemble_array, arr), axis=-1)
                # Update the ensemble dimensions
                ens_dims[0] = np.minimum(ens_dims[0], curr_dims[0])
                ens_dims[1] = np.maximum(ens_dims[1], curr_dims[1])

            else:
                origin_diff = (curr_dims[0] - ens_dims[0]) * rec_spacing
                far_diff = ((curr_dims[1] - ens_dims[1])) * rec_spacing

                # Padding both arrays to make them the same size:
                arr = self.pad_array(arr, origin_diff, far_diff)
                # Ensemble array is now 4D.
                ensemble_array = np.pad(
                    ensemble_array,
                    ((self.relu(-origin_diff[0]), self.relu(far_diff[0])),
                     (self.relu(-origin_diff[1]), self.relu(far_diff[1])),
                     (self.relu(-origin_diff[2]), self.relu(far_diff[2])),
                     (0, 0)),
                    "constant",
                    constant_values=0)
                # Np.stack stacks along a new axis, but ensemble_array is already 4D, so use np.append instead.
                # When using np.append, arrays have to be the same dimension, so we expand arr with an empty 4th dimension.
                arr = np.expand_dims(arr, axis=3)
                print(arr.shape, ensemble_array.shape)
                ensemble_array = np.append(ensemble_array, arr, axis=3)
                # Update the ensemble dimensions
                ens_dims[0] = np.minimum(ens_dims[0], curr_dims[0])
                ens_dims[1] = np.maximum(ens_dims[1], curr_dims[1])

            self.dimensions = ens_dims
            self.ensemble_map_dict[i - 1] = p
            self.ensemble_array = ensemble_array
Exemplo n.º 13
0
 def setUp(self):
     # A buriedness grid
     self.buriedness = Grid.from_file("testdata/result/buriedness.grd")
     self.single_peak = random_grid(1)