Exemplo n.º 1
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for X3DNA in the "xdna_ensemble"
               command.  It specifies the name of a PDB coordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

        :Keywords:

          *executable*

               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.

          *x3dna_param*

               Determines whether base step or base pair parameters will be
               calculated. If True then stacked base step parameters will be
               analyzed [Default is True].  If False then stacked base pair
               parameters will be analyzed.
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par",
            "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary %(x3dna_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal("%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                         vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent("""x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent("""find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error("Executable %(program)r not found, should have been %(path)r.",
                             vars())
        # results
        self.profiles = {}
Exemplo n.º 2
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run HOLE_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for HOLE in the "COORD" card of
               the input file.  It specifies the name of a PDB co-ordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

               **Wildcard pattern**. A new feature (in release 2.1 of HOLE) was
               the option to include a wild card (``*``) in the filename. e.g.,
               *filename* = `"ab*.pdb"` will apply hole to all files in the
               directory whose name starts with ``ab`` and ends with
               ``.pdb``. This is intended to aid the analysis of multiple
               copies of the same molecule - produced during molecular dynamics
               or other method. The hole procedure will be applied to each file
               in turn with the same setup conditions (initial point, sampling
               distance etc.). Graphics files will contain a combination of the
               individual runs, one after another. Note that the pdb files are
               read independently so that they need not have an identical
               number of atoms or atom order etc. (though they should be
               sufficiently similar for a HOLE run from identical starting
               conditions to be useful).

               .. SeeAlso::

                  An alternative way to load in multiple files is a direct read
                  from a CHARMM binary dynamics DCD coordinate file - using the
                  *dcd* keyword or use :class:`HOLEtraj`.


        :Keywords:

          *dcd*
               DCD trajectory (must be supplied together with a matching
               PDB file *filename*) and then HOLE runs its analysis on each frame.

               It does multiple HOLE runs on positions taken from a CHARMM binary
               dynamics format .DCD trajectory file. The *dcd* file must have
               exactly the same number of atoms in exactly the same order as
               the pdb file specified by *filename*. Note that if this option
               is used the pdb file is used as a template only - the
               coordinates are ignored. Note that structural parameters
               determined for each individual structure are written in a tagged
               format so that it is possible to extract the information from
               the text output file using a :program:`grep` command. The
               reading of the file can be controlled by the *step* keyword
               and/or setting :attr:`HOLE.dcd_iniskip` to the number of frames
               to be skipped initially.

               .. Note::

                  HOLE is very picky and does not read all DCD-like formats. If
                  in doubt, look into the *logfile* for error diagnostics.

                  At the moment, DCDs generated with MDAnalysis are not
                  accepted by HOLE — use :class:`HOLEtraj`, which works with
                  anything that MDAnalysis can read.

          *logfile*

               name of the file collecting HOLE's output (which can be parsed
               using :meth:`HOLE.collect` ["hole.out"]

          *sphpdb*

               name of the HOLE sph file, a PDB-like file containig the coordinates
               of the pore centers ["hole.sph"]

          *step*

               step size for going through the trajectory (skips *step* - 1
               frames) [1]

          *cpoint*

               coordinates of a point inside the pore, e.g. ``[12.3, 0.7,
               18.55]``. If ``None`` then HOLE's own simple search algorithm is
               used.

               This specifies a point which lies within the channel, for simple
               channels such as gramicidin results do not show great
               sensitivity to the exact point taken. An easy way to produce an
               initial point is to use molecular graphics to find two atoms
               which lie either side of the pore and to average their
               co-ordinates. Or if the channel structure contains water
               molecules or counter ions then take the coordinates of one of
               these (and use the *ignore_residues* keyword to ignore them in
               the pore radius calculation).

               If this card is not specified then HOLE now (from version 2.2)
               attempts to make a guess where the channel will be. The
               procedure assumes the channel is reasonably symmetric. The
               initial guess on cpoint will be the centroid of all alpha carbon
               atoms (name 'CA' in pdb file). This is then refined by a crude
               grid search up to 5 Å from the original position. This procedure
               works most of the time but is clearly far from infallible —
               results should be careful checked (with molecular graphics) if
               it is used. [``None``]

          *cvect*

               Search direction, should be parallel to the pore axis,
               e.g. ``[0,0,1]`` for the z-axis.

               If this keyword is ``None`` then HOLE now attempts to make a
               guess where the channel will be. The procedure assumes the
               channel is reasonably symmetric. The guess will be either along
               the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the
               structure is not aligned on one of these axis the results will
               clearly be approximate. If a guess is used then results should
               be carefully checked. [``None``]

          *sample*
               distance of sample points in Å

               Specifies the distance between the planes used in the HOLE
               procedure. The default value is 0.2 Å, this should be reasonable
               for most purposes. However, if you wish to visualize a very
               tight constriction then specify a smaller value.  [0.2]

          *dotden*
               density of facettes for generating a 3D pore representation

               This number controls the density of dots which will be used by
               the program. A sphere of dots is placed on each centre
               determined in the Monte Carlo procedure. Only dots which do not
               lie within any other sphere are considered. The actual number of
               dots written is therefore controlled by *dotden* and
               *sample*. *dotden* should be set to between 5 (few dots per
               sphere) and 35 (large number of dots per sphere). [15]

          *endrad*
               Radius which is considered to be the end of the pore. This
               keyword can be used to specify the radius above which the
               program regards a result as indicating that the end of the pore
               has been reached. The default value is 22.0 Å. This may need to
               be increased for large channels or reduced for small. [22.0]

          *shorto*

               Determines the output of output in the *logfile*; for automated processing
               this must be < 3.

               - 0: Full text output
               - 1: All text output given except "run in progress" (i.e.,
                 detailed contemporary description of what HOLE is doing).
               - 2: Ditto plus no graph type output - only leaving minimum
                 radius and conductance calculations.
               - 3: All text output other than input card mirroring and error messages
                 turned off.

          *ignore_residues*

               sequence of three-letter residues that are not taken into account during the
               calculation; wildcards are *not* supported
               [ ``["SOL","WAT", "TIP", "HOH", "K  ", "NA ", "CL "]`` ]

          *radius*
               Path to the radii; if set to None then a set of default radii, :data:`SIMPLE2_RAD`,
               is used (an extension of ``simple.rad`` from the HOLE distribution)

               This specifies the name for the file specifying van der Waals
               radii for each atom. A number of files with different values are
               supplied with HOLE.

          *sphpdb*

               This keyword specifies the filename for output of the sphere
               centre information in pdb form. Its typical suffix is
               ".sph". The co-ordinates are set to the sphere centres and the
               occupancies are the sphere radii. All centres are assigned the
               atom name QSS and residue name SPH and the residue number is set
               to the storage number of the centre. The file can be imported
               into molecular graphics programs but are likely to be bonded
               together in a awful manner - as the points are very close to one
               another. In VMD sph objects are best displayed as
               "Points". Displaying .sph objects rather than rendered or dot
               surfaces can be useful to analyze the distance of particular
               atoms from the sphere-centre line.

               Most usefully .sph files can be used to produce molecular
               graphical output from a hole run. This is achieved by using the
               :program:`sph_process` program to read the .sph
               file. ["hole.sph"]

          *executable*

               Path to the :program:`hole` executable
               (e.g. ``/opt/hole/exe/hole``); the other programs
               :program:`sph_process` and :program:`sos_triangle` are assumed
               to live in the same directory as :program:`hole`. If
               :program:`hole` is found on the :envvar:`PATH` then the bare
               executable name is sufficient.  ["hole"]
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = []
        self.tempdirs = []

        self.filename = filename
        self.coordinates = self.check_and_fix_long_filename(self.filename)
        self.dcd = kwargs.pop('dcd', None)
        if self.dcd:
            self.dcd = self.check_and_fix_long_filename(self.dcd)
        self.dcd_step = kwargs.pop("step", 1) - 1  # HOLE docs description is confusing: step or skip??
        self.dcd_iniskip = 0
        self.cpoint = kwargs.pop("cpoint", None)
        self.cvect = kwargs.pop("cvect", None)
        self.sample = float(kwargs.pop("sample", 0.20))
        self.dotden = int(kwargs.pop("dotden", 15))
        self.endrad = float(kwargs.pop("endrad", 22.))
        self.shorto = int(kwargs.pop("shorto", 0))  # look at using SHORTO 2 for minimum output
        self.ignore_residues = kwargs.pop("ignore_residues", self.default_ignore_residues)
        self.radius = self.check_and_fix_long_filename(
            realpath(kwargs.pop('radius', None) or write_simplerad2()))

        logger.info("Setting up HOLE analysis for %(filename)r", vars(self))
        logger.info("Using radius file %(radius)r", vars(self))

        # guess executables
        self.exe = {}
        hole_exe_name = kwargs.pop('executable', 'hole')
        self.exe['hole'] = which(hole_exe_name)
        if self.exe['hole'] is None:
            errmsg = "HOLE binary %(hole_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal("%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                         vars())
            raise OSError(errno.ENOENT, errmsg)
        holepath = os.path.dirname(self.exe['hole'])
        self.exe['sos_triangle'] = os.path.join(holepath, "sos_triangle")
        self.exe['sph_process'] = os.path.join(holepath, "sph_process")

        self.sphpdb = kwargs.pop("sphpdb", "hole.sph")
        self.logfile = kwargs.pop("logfile", "hole.out")

        self.template = textwrap.dedent("""
            ! Input file for Oliver Smart's HOLE program
            ! written by MDAnalysis.analysis.hole.HOLE
            ! filename = %(filename)s
            COORD  %(coordinates)s
            RADIUS %(radius)s
            SPHPDB %(sphpdb)s
            SAMPLE %(sample)f
            ENDRAD %(endrad)f
            IGNORE %(ignore)s
            SHORTO %(shorto)d
            """)
        if self.cpoint is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "\nCPOINT %(cpoint_xyz)s\n"
        else:
            logger.info("HOLE will guess CPOINT")
        if self.cvect is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "\nCVECT  %(cvect_xyz)s\n"
        else:
            logger.info("HOLE will guess CVECT")

        if self.dcd:
            # CHARMD -- DCD (matches COORD)
            # CHARMS int int -- ignore_first_N_frames   skip_every_X_frames
            # http://d2o.bioch.ox.ac.uk:38080/doc/hole_d03.html#CHARMD
            self.template += "\nCHARMD %(dcd)s\nCHARMS %(dcd_iniskip)d %(dcd_step)d\n"

        # sanity checks
        if self.shorto > 2:
            logger.warn("SHORTO (%d) needs to be < 3 in order to extract a HOLE profile!",
                        self.shorto)
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error("Executable %(program)r not found, should have been %(path)r.",
                             vars())
        # results
        self.profiles = {}
Exemplo n.º 3
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for X3DNA in the "xdna_ensemble"
               command.  It specifies the name of a PDB coordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

        :Keywords:

          *executable*

               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.

          *x3dna_param*

               Determines whether base step or base pair parameters will be
               calculated. If True then stacked base step parameters will be
               analyzed [Default is True].  If False then stacked base pair
               parameters will be analyzed.
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par",
            "cf_7methods.par", "col_chains.scr", "col_helices.scr",
            "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb",
            "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary %(x3dna_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal(
                "%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent(
                """x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent(
                """find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error(
                    "Executable %(program)r not found, should have been %(path)r.",
                    vars())
        # results
        self.profiles = {}