Exemplo n.º 1
0
    def __init__(self, filename, mode='r', force_overwrite=True):
        self._closed = True  # is the file currently closed?
        self._mode = mode  # what mode were we opened in
        if StrictVersion(import_(
                'scipy.version').short_version) < StrictVersion('0.12.0'):
            raise ImportError('MDTraj NetCDF support requires scipy>=0.12.0. '
                              'You have %s' %
                              import_('scipy.version').short_version)
        netcdf = import_('scipy.io').netcdf_file

        if mode not in ['r', 'w']:
            raise ValueError("mode must be one of ['r', 'w']")

        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # AMBER uses the NetCDF3 format, with 64 bit encodings, which
        # for scipy.io.netcdf_file is "version=2"
        self._handle = netcdf(filename, mode=mode, version=2)
        self._closed = False

        # self._frame_index is the current frame that we're at in the
        #     file
        # self._needs_initialization indicates whether we need to set the
        #     global properties of the file. This is required before the first
        #     write operation on a new file

        if mode == 'w':
            self._frame_index = 0
            self._needs_initialization = True
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 2
0
    def __init__(self, filename, mode='r', force_overwrite=False):
        self._closed = True
        self._mode = mode
        if StrictVersion(import_(
                'scipy.version').short_version) < StrictVersion('0.12.0'):
            raise ImportError('MDTraj NetCDF support requires scipy>=0.12.0. '
                              'You have %s' %
                              import_('scipy.version').short_version)
        netcdf = import_('scipy.io').netcdf_file

        if mode not in ('r', 'w'):
            raise ValueError("mode must be one of ['r', 'w']")

        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # AMBER uses the NetCDF3 format, with 64 bit encodings, which for
        # scipy.io.netcdf_file is "version=2"
        self._handle = netcdf(filename, mode=mode, version=2)
        self._closed = False
        if mode == 'w':
            self._needs_initialization = True
        elif mode == 'r':
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 3
0
    def __init__(self, filename, mode='r', force_overwrite=True):
        self._closed = True   # is the file currently closed?
        self._mode = mode      # what mode were we opened in
        if StrictVersion(import_('scipy.version').short_version) < StrictVersion('0.12.0'):
            raise ImportError('MDTraj NetCDF support requires scipy>=0.12.0. '
                              'You have %s' % import_('scipy.version').short_version)
        netcdf = import_('scipy.io').netcdf_file

        if mode not in ['r', 'w']:
            raise ValueError("mode must be one of ['r', 'w']")

        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # AMBER uses the NetCDF3 format, with 64 bit encodings, which
        # for scipy.io.netcdf_file is "version=2"
        self._handle = netcdf(filename, mode=mode, version=2)
        self._closed = False

        # self._frame_index is the current frame that we're at in the
        #     file
        # self._needs_initialization indicates whether we need to set the
        #     global properties of the file. This is required before the first
        #     write operation on a new file

        if mode == 'w':
            self._frame_index = 0
            self._needs_initialization = True
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 4
0
def get_dihedral_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the dihedral
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    idihedrals : np.ndarray, shape[n_dihedrals, 4], dtype=int
        All sets of 4 atoms A,B,C,D such that A is bonded to B, B is bonded
        to C, and C is bonded to D
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    idihedrals = []

    # TODO: CHECK FOR DIHEDRAL ANGLES THAT ARE 180 and recover
    # conf : msmbuilder.Trajectory
    #    An msmbuilder trajectory, only the first frame will be used. This
    #    is used purely to make the check for angle(ABC) != 180.

    for a in xrange(n_atoms):
        for b in graph.neighbors(a):
            for c in filter(lambda c: c not in [a, b], graph.neighbors(b)):
                for d in filter(lambda d: d not in [a, b, c],
                                graph.neighbors(c)):
                    idihedrals.append((a, b, c, d))

    return np.array(idihedrals)
Exemplo n.º 5
0
def get_angle_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the bond
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    iangles : np.ndarray, shape[n_angles, 3], dtype=int
        n_angles x 3 array of indices, where each row is the index of three
        atoms m,n,o such that n is bonded to both m and o.
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    iangles = []

    for i in xrange(n_atoms):
        for (m, n) in combinations(graph.neighbors(i), 2):
            # so now the there is a bond angle m-i-n
            iangles.append((m, i, n))

    return np.array(iangles)
Exemplo n.º 6
0
def entry_point():
    subparsers = parser.add_subparsers(dest="subparser_name")
    scriptfiles = {}
    argv = sys.argv[:]
    if len(argv) == 1:
        argv.append('-h')

    for scriptname in scripts.__all__:
        # get the name and first sentence of the description from each of the
        # msmbuilder commands
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            script = import_('msmbuilder.scripts.%s' % scriptname)
            scriptparser = getattr(script, 'parser', None)
        scriptfiles[scriptname] = script.__file__

        try:
            description = scriptparser.description
        except:
            description = scriptparser.parser.description

        # http://stackoverflow.com/a/17124446/1079728
        first_sentence = ' '.join(' '.join(
            re.split(r'(?<=[.:;])\s', description)[:1]).split())
        subparsers.add_parser(scriptname, help=first_sentence)

    args = parser.parse_args(argv[1:2])
    sys.argv = argv[1:]
    getattr(scripts, args.subparser_name).entry_point()
Exemplo n.º 7
0
def _find_chains(bond_list):
    """Given a set of bonds, find unique molecules, with the assumption that
    there are no bonds between separate chains (i.e., only INTRAmolecular
    bonds), which also implies that each atom can be in exactly one chain.
    
    Parameters
    ----------
    bond_list : list of (int, int)
        The list of bonds

    Returns
    _______
    chains : list of list of int
        List of atoms in each chain

    Notes
    -----
    This function requires the NetworkX python package.
    """
    nx = import_('networkx')
    chains = []
    bond_list = np.asarray(bond_list)
    molecules = nx.Graph()
    molecules.add_nodes_from(set(bond_list.flatten()))
    molecules.add_edges_from(bond_list)
    return [sorted(x) for x in list(nx.connected_components(molecules))]
Exemplo n.º 8
0
    def _init_from_handle(self, handle):
        self._handle = handle
        self._open = handle.isopen != 0
        self.mode = mode = handle.mode  # the mode in which the file was opened?

        if mode not in ['r', 'w', 'a']:
            raise ValueError("mode must be one of ['r', 'w', 'a']")

        # import tables
        self.tables = import_('tables')

        if mode == 'w':
            # what frame are we currently reading or writing at?
            self._frame_index = 0
            # do we need to write the header information?
            self._needs_initialization = True

        elif mode == 'a':
            try:
                self._frame_index = len(self._handle.root.coordinates)
                self._needs_initialization = False
            except self.tables.NoSuchNodeError:
                self._frame_index = 0
                self._needs_initialization = True
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
Exemplo n.º 9
0
def get_dihedral_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the dihedral
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    idihedrals : np.ndarray, shape[n_dihedrals, 4], dtype=int
        All sets of 4 atoms A,B,C,D such that A is bonded to B, B is bonded
        to C, and C is bonded to D
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    idihedrals = []

    # TODO: CHECK FOR DIHEDRAL ANGLES THAT ARE 180 and recover
    # conf : msmbuilder.Trajectory
    #    An msmbuilder trajectory, only the first frame will be used. This
    #    is used purely to make the check for angle(ABC) != 180.

    for a in xrange(n_atoms):
        for b in graph.neighbors(a):
            for c in filter(lambda c: c not in [a, b], graph.neighbors(b)):
                for d in filter(lambda d: d not in [a, b, c], graph.neighbors(c)):
                    idihedrals.append((a, b, c, d))

    return np.array(idihedrals)
Exemplo n.º 10
0
    def from_openmm(cls, value):
        """Create a mdtraj topology from an OpenMM topology

        Parameters
        ----------
        value : simtk.openmm.app.Topology
            An OpenMM topology that you wish to convert to a
            mdtraj topology.
        """
        app = import_('simtk.openmm.app')

        if not isinstance(value, app.Topology):
            raise TypeError('value must be an OpenMM Topology. '
                            'You supplied a %s' % type(value))

        out = cls()
        atom_mapping = {}

        for chain in value.chains():
            c = out.add_chain()
            for residue in chain.residues():
                r = out.add_residue(residue.name, c, residue.segment_id)
                for atom in residue.atoms():
                    if atom.element is None:
                        element = elem.virtual
                    else:
                        element = elem.get_by_symbol(atom.element.symbol)
                    a = out.add_atom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in value.bonds():
            out.add_bond(atom_mapping[a1], atom_mapping[a2])

        return out
Exemplo n.º 11
0
    def to_dataframe(self):
        """Convert this topology into a pandas dataframe

        Returns
        -------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame.
        bonds : np.ndarray
            The bonds in this topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond.
        """
        pd = import_('pandas')
        data = [
            (atom.serial, atom.name, atom.element.symbol, atom.residue.resSeq,
             atom.residue.name, atom.residue.chain.index, atom.segment_id)
            for atom in self.atoms
        ]

        atoms = pd.DataFrame(data,
                             columns=[
                                 "serial", "name", "element", "resSeq",
                                 "resName", "chainID", "segmentID"
                             ])

        bonds = np.array([(a.index, b.index) for (a, b) in self.bonds])
        return atoms, bonds
Exemplo n.º 12
0
    def from_openmm(cls, value):
        """Create a mdtraj topology from an OpenMM topology

        Parameters
        ----------
        value : simtk.openmm.app.Topology
            An OpenMM topology that you wish to convert to a
            mdtraj topology.
        """
        app = import_('simtk.openmm.app')

        if not isinstance(value, app.Topology):
            raise TypeError('value must be an OpenMM Topology. '
                            'You supplied a %s' % type(value))

        out = cls()
        atom_mapping = {}

        for chain in value.chains():
            c = out.add_chain()
            for residue in chain.residues():
                r = out.add_residue(residue.name, c)
                for atom in residue.atoms():
                    if atom.element is None:
                        element = elem.virtual
                    else:
                        element = elem.get_by_symbol(atom.element.symbol)
                    a = out.add_atom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in value.bonds():
            out.add_bond(atom_mapping[a1], atom_mapping[a2])

        return out
Exemplo n.º 13
0
    def to_openmm(self):
        """Convert this topology into OpenMM topology

        Returns
        -------
        topology : simtk.openmm.app.Topology
           This topology, as an OpenMM topology
        """
        app = import_('simtk.openmm.app')

        out = app.Topology()
        atom_mapping = {}

        for chain in self.chains:
            c = out.addChain()
            for residue in chain.residues:
                r = out.addResidue(residue.name, c)
                for atom in residue.atoms:
                    a = out.addAtom(atom.name, app.Element.getBySymbol(atom.element.symbol), r)
                    atom_mapping[atom] = a

        for a1, a2 in self.bonds:
            out.addBond(atom_mapping[a1], atom_mapping[a2])

        return out
Exemplo n.º 14
0
def get_angle_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the bond
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    iangles : np.ndarray, shape[n_angles, 3], dtype=int
        n_angles x 3 array of indices, where each row is the index of three
        atoms m,n,o such that n is bonded to both m and o.
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    iangles = []

    for i in xrange(n_atoms):
        for (m, n) in combinations(graph.neighbors(i), 2):
            # so now the there is a bond angle m-i-n
            iangles.append((m, i, n))

    return np.array(iangles)
Exemplo n.º 15
0
    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self.filename = filename
        self.mode = mode
        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)
        # import tables
        self.tables = import_('tables')

        if mode == 'w':
            print("Warning: The LH5 trajectory format is deprecated.", file=sys.stderr)
            # what frame are we currently reading or writing at?
            self._frame_index = 0
            # do we need to write the header information?
            self._needs_initialization = True
            if not filename.endswith('.lh5'):
                warnings.warn('The .lh5 extension is recommended.')
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
        else:
            raise ValueError("mode must be one of ['r', 'w']")

        # Compression style of legacy MSMBuilder2 lh5 trajectory format
        compression = self.tables.Filters(
            complib='blosc', shuffle=True, complevel=1)
        self._handle = self._open_file(
            filename, mode=mode, filters=compression)
        self._open = True
Exemplo n.º 16
0
def chemical_shifts_ppm(trj):
    """Predict chemical shifts of a trajectory using ppm.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.

    Returns
    -------
    results : pandas.DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have ppm available on your path; see
    (http://spin.ccic.ohio-state.edu/index.php/download/index).

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Li, DW, and Bruschweiler, R. "PPM: a side-chain and backbone chemical
       shift predictor for the assessment of protein conformational ensembles."
       J Biomol NMR. 2012 Nov;54(3):257-65.
    """
    pd = import_('pandas')
    binary = find_executable(PPM)

    first_resSeq = trj.top.residue(0).resSeq

    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_ppm` requires the external program PPM, available at http://spin.ccic.ohio-state.edu/index.php/download/index' % ', '.join(PPM))

    with enter_temp_directory():
        trj.save("./trj.pdb")
        cmd = "%s -pdb trj.pdb -mode detail" % binary

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your PPM installation or your input trajectory." % cmd))

        d = pd.read_csv("./bb_details.dat", delim_whitespace=True)
        columns = ["resSeq", "resName", "name", "expt", "other"]

        d = pd.read_csv("./bb_details.dat", delim_whitespace=True, header=None).drop([0, 4], axis=1)
        d = d.rename(columns={1: "resSeq", 2: "resName", 3: "name"})
        d["resSeq"] += first_resSeq - 1  # Fix bug in PPM that reindexes to 1
        d = d.drop("resName", axis=1)
        d = d.set_index(["resSeq", "name"])
        d.columns = np.arange(trj.n_frames)
        d.columns.name = "frame"

    return d
Exemplo n.º 17
0
def _str_to_unit(unit_string):
    """eval() based transformer that extracts a simtk.unit object
    from a string description.

    Parameters
    ----------
    unit_string : str
        string description of a unit. this may contain expressions with
        multiplication, division, powers, etc.

    Examples
    --------
    >>> type(_str_to_unit('nanometers**2/meters*gigajoules'))
    <class 'simtk.unit.unit.Unit'>
    >>> str(_str_to_unit('nanometers**2/meters*gigajoules'))
    'nanometer**2*gigajoule/meter'

    """
    units = import_('simtk.unit')
    # parse the string with the ast, and then run out unit context
    # visitor on it, which will basically change bare names like
    # "nanometers" into "unit.nanometers" and simulataniously check that
    # there's no nefarious stuff in the expression.


    node = _unit_context.visit(ast.parse(unit_string, mode='eval'))
    fixed_node = ast.fix_missing_locations(node)
    output = eval(compile(fixed_node, '<string>', mode='eval'))

    return output
Exemplo n.º 18
0
def _find_chains(bond_list):
    """Given a set of bonds, find unique molecules, with the assumption that
    there are no bonds between separate chains (i.e., only INTRAmolecular
    bonds), which also implies that each atom can be in exactly one chain.
    
    Parameters
    ----------
    bond_list : list of (int, int)
        The list of bonds

    Returns
    _______
    chains : list of list of int
        List of atoms in each chain

    Notes
    -----
    This function requires the NetworkX python package.
    """
    nx = import_('networkx')
    chains = []
    bond_list = np.asarray(bond_list)
    molecules = nx.Graph()
    molecules.add_nodes_from(set(bond_list.flatten()))
    molecules.add_edges_from(bond_list)
    return list(nx.connected_components(molecules))
Exemplo n.º 19
0
def in_units_of(quantity, units_out, units_in=None):
    """Convert a quantity between unit systems

    Parameters
    ----------
    quantity : number, np.ndarray, or simtk.unit.Quantity
        quantity can either be a unitted quantity -- i.e. instance of
        simtk.unit.Quantity, or just a bare number or numpy array
    units_out : str
        A string description of the units you want out. This should look
        like "nanometers/picosecondsecond" or "nanometers**3" or whatever
    units_in : str
        If you supply a quantity that's not a simtk.unit.Quantity, you should
        tell me what units it is in. If you don't, i'm just going to echo you
        back your quantity without doing any unit checking.

    Examples
    --------
    >>> in_units_of(1*units.meter**2/units.second, 'nanometers**2/picosecond')  # doctest: +SKIP
    1000000.0
    """
    units = import_('simtk.unit')

    if quantity is None:
        return quantity

    if isinstance(quantity, units.Quantity):
        return quantity.value_in_unit(_str_to_unit(units_out))
    else:
        if units_in is None:
            return quantity
        united_quantity = units.Quantity(quantity, _str_to_unit(units_in))
        return united_quantity.value_in_unit(_str_to_unit(units_out))
Exemplo n.º 20
0
    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self.filename = filename
        self.mode = mode
        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)
        # import tables
        self.tables = import_('tables')

        if mode == 'w':
            print("Warning: The LH5 trajectory format is deprecated.",
                  file=sys.stderr)
            # what frame are we currently reading or writing at?
            self._frame_index = 0
            # do we need to write the header information?
            self._needs_initialization = True
            if not filename.endswith('.lh5'):
                warnings.warn('The .lh5 extension is recommended.')
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
        else:
            raise ValueError("mode must be one of ['r', 'w']")

        # Compression style of legacy MSMBuilder2 lh5 trajectory format
        compression = self.tables.Filters(complib='blosc',
                                          shuffle=True,
                                          complevel=1)
        self._handle = self._open_file(filename,
                                       mode=mode,
                                       filters=compression)
        self._open = True
Exemplo n.º 21
0
    def to_dataframe(self):
        """Convert this topology into a pandas dataframe

        Returns
        -------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame.
        bonds : np.ndarray
            The bonds in this topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond.
        """
        pd = import_('pandas')
        data = []
        for atom in self.atoms:
            if atom.element is None:
                element_symbol = ""
            else:
                element_symbol = atom.element.symbol
            data.append((atom.serial, atom.name, element_symbol,
                         atom.residue.resSeq, atom.residue.name,
                         atom.residue.chain.index))

        atoms = pd.DataFrame(data, columns=["serial", "name", "element",
                                            "resSeq", "resName", "chainID"])

        bonds = np.array([(a.index, b.index) for (a, b) in self.bonds])
        return atoms, bonds
Exemplo n.º 22
0
def entry_point():
    subparsers = parser.add_subparsers(dest="subparser_name")
    scriptfiles = {}
    argv = sys.argv[:]
    if len(argv) == 1:
        argv.append('-h')

    for scriptname in scripts.__all__:
        # get the name and first sentence of the description from each of the
        # msmbuilder commands
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            script = import_('msmbuilder.scripts.%s' % scriptname)
            scriptparser = getattr(script, 'parser', None)
        scriptfiles[scriptname] = script.__file__

        try:
            description = scriptparser.description
        except:
            description = scriptparser.parser.description

        # http://stackoverflow.com/a/17124446/1079728
        first_sentence = ' '.join(' '.join(re.split(r'(?<=[.:;])\s', description)[:1]).split())
        subparsers.add_parser(scriptname, help=first_sentence)

    args = parser.parse_args(argv[1:2])        
    sys.argv = argv[1:]
    getattr(scripts, args.subparser_name).entry_point()
Exemplo n.º 23
0
def _str_to_unit(unit_string, simtk=False):
    """eval() based transformer that extracts a simtk.unit object
    from a string description.

    Parameters
    ----------
    unit_string : str
        string description of a unit. this may contain expressions with
        multiplication, division, powers, etc.

    Examples
    --------
    >>> type(_str_to_unit('nanometers**2/meters*gigajoules'))
    <class 'simtk.unit.unit.Unit'>
    >>> str(_str_to_unit('nanometers**2/meters*gigajoules'))
    'nanometer**2*gigajoule/meter'

    """
    # parse the string with the ast, and then run out unit context
    # visitor on it, which will basically change bare names like
    # "nanometers" into "unit.nanometers" and simulataniously check that
    # there's no nefarious stuff in the expression.

    assert isinstance(unit_string, six.string_types)
    unit_definitions = UNIT_DEFINITIONS
    if simtk:
        unit_definitions = import_('simtk.unit').unit_definitions
    parsed = ast.parse(unit_string, mode='eval')
    node = _unit_context.visit(parsed)
    fixed_node = ast.fix_missing_locations(node)
    output = eval(compile(fixed_node, '<string>', mode='eval'), {}, locals())
    return output
Exemplo n.º 24
0
    def to_openmm(self):
        """Convert this topology into OpenMM topology

        Returns
        -------
        topology : simtk.openmm.app.Topology
           This topology, as an OpenMM topology
        """
        app = import_('simtk.openmm.app')

        out = app.Topology()
        atom_mapping = {}

        for chain in self.chains:
            c = out.addChain()
            for residue in chain.residues:
                r = out.addResidue(residue.name, c)
                for atom in residue.atoms:
                    a = out.addAtom(
                        atom.name,
                        app.Element.getBySymbol(atom.element.symbol), r)
                    atom_mapping[atom] = a

        for a1, a2 in self.bonds:
            out.addBond(atom_mapping[a1], atom_mapping[a2])

        return out
Exemplo n.º 25
0
    def to_openmm(self, traj=None):
        """Convert this topology into OpenMM topology

        Parameters
        ----------
        traj : MDTraj.Trajectory, optional, default=None
            If specified, use the first frame from this trajectory to
            set the unitcell information in the openmm topology.

        Returns
        -------
        topology : simtk.openmm.app.Topology
           This topology, as an OpenMM topology
        """
        app = import_('simtk.openmm.app')
        mm = import_('simtk.openmm')
        u = import_('simtk.unit')

        out = app.Topology()
        atom_mapping = {}

        for chain in self.chains:
            c = out.addChain()
            for residue in chain.residues:
                r = out.addResidue(residue.name, c)
                for atom in residue.atoms:
                    if atom.element is elem.virtual:
                        element = None
                    else:
                        element = app.Element.getBySymbol(atom.element.symbol)
                    a = out.addAtom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in self.bonds:
            out.addBond(atom_mapping[a1], atom_mapping[a2])

        if traj is not None:
            angles = traj.unitcell_angles[0]

            if np.linalg.norm(angles - 90.0) > 1E-4:
                raise (ValueError("Unitcell angles must be 90.0 to use "
                                  "in OpenMM topology."))

            box_vectors = mm.Vec3(*traj.unitcell_lengths[0]) * u.nanometer
            out.setUnitCellDimensions(box_vectors)

        return out
Exemplo n.º 26
0
    def to_openmm(self, traj=None):
        """Convert this topology into OpenMM topology

        Parameters
        ----------
        traj : MDTraj.Trajectory, optional, default=None
            If specified, use the first frame from this trajectory to
            set the unitcell information in the openmm topology.

        Returns
        -------
        topology : simtk.openmm.app.Topology
           This topology, as an OpenMM topology
        """
        app = import_('simtk.openmm.app')
        mm = import_('simtk.openmm')
        u = import_('simtk.unit')

        out = app.Topology()
        atom_mapping = {}

        for chain in self.chains:
            c = out.addChain()
            for residue in chain.residues:
                r = out.addResidue(residue.name, c)
                for atom in residue.atoms:
                    if atom.element is elem.virtual:
                        element = None
                    else:
                        element = app.Element.getBySymbol(atom.element.symbol)
                    a = out.addAtom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in self.bonds:
            out.addBond(atom_mapping[a1], atom_mapping[a2])

        if traj is not None:
            angles = traj.unitcell_angles[0]

            if np.linalg.norm(angles - 90.0) > 1E-4:
                raise(ValueError("Unitcell angles must be 90.0 to use "
                                 "in OpenMM topology."))

            box_vectors = mm.Vec3(*traj.unitcell_lengths[0]) * u.nanometer
            out.setUnitCellDimensions(box_vectors)

        return out
Exemplo n.º 27
0
def chemical_shifts_shiftx2(trj, pH=5.0, temperature=298.00):
    """Predict chemical shifts of a trajectory using ShiftX2.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.
    pH : float, optional, default=5.0
        pH value which gets passed to the ShiftX2 predictor.
    temperature : float, optional, default=298.00
        Temperature which gets passed to the ShiftX2 predictor.

    Returns
    -------
    results : pandas DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have ShiftX2 available on your path; see (http://www.shiftx2.ca/).

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Beomsoo Han, Yifeng Liu, Simon Ginzinger, and David Wishart.
       "SHIFTX2: significantly improved protein chemical shift
       prediction." J. Biomol. NMR, 50, 1 43-57 (2011)
    """
    pd = import_('pandas')
    binary = find_executable(SHIFTX2)
    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_shiftx2` requires the external program SHIFTX2, available at http://www.shiftx2.ca/' % ', '.join(SHIFTX2))

    results = []
    with enter_temp_directory():
        for i in range(trj.n_frames):
            trj[i].save("./trj%d.pdb" % i)
        cmd = "%s -b 'trj*.pdb' -p %.1f -t %.2f" % (binary, pH, temperature)

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your ShiftX2 installation or your input trajectory." % cmd))

        for i in range(trj.n_frames):
            d = pd.read_csv("./trj%d.pdb.cs" % i)
            d.rename(columns={"NUM": "resSeq", "RES": "resName", "ATOMNAME": "name"}, inplace=True)
            d["frame"] = i
            results.append(d)

    results = pd.concat(results)
    results = results.pivot_table(rows=["resSeq", "name"], cols="frame", values="SHIFT")
    return results
Exemplo n.º 28
0
def mol2_to_dataframes(filename):
    """Convert a GAFF (or sybyl) mol2 file to a pair of pandas dataframes.

    Parameters
    ----------
    filename : str
        Name of mol2 filename

    Returns
    -------
    atoms_frame : pd.DataFrame
        DataFrame containing atom information
    bonds_frame : pd.DataFrame
        DataFrame containing bond information

    Notes
    -----
    These dataframes may contain force field information as well as the
    information necessary for constructing the coordinates and molecular
    topology.  This function has been tested for GAFF and sybyl-style
    mol2 files but has been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!

    See Also
    --------
    If you just need the coordinates and bonds, use load_mol2(filename)
    to get a Trajectory object.
    """
    pd = import_('pandas')
    with open(filename) as f:
        data = dict((key, list(grp)) for key, grp in itertools.groupby(f, _parse_mol2_sections))

    # Mol2 can have "status bits" at the end of the bond lines. We don't care
    # about these, but they interfere with using pd_read_table because it looks
    # like one line has too many columns. So we just regex out the offending
    # text.
    status_bit_regex = "BACKBONE|DICT|INTERRES|\|"
    data["@<TRIPOS>BOND\n"] = [re.sub(status_bit_regex, lambda _: "", s)
                               for s in data["@<TRIPOS>BOND\n"]]

    if len(data["@<TRIPOS>BOND\n"]) > 1:
        csv = StringIO()
        csv.writelines(data["@<TRIPOS>BOND\n"][1:])
        csv.seek(0)
        bonds_frame = pd.read_table(csv, names=["bond_id", "id0", "id1", "bond_type"],
            index_col=0, header=None, sep="\s*", engine='python')
    else:
        bonds_frame = None

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>ATOM\n"][1:])
    csv.seek(0)
    atoms_frame = pd.read_csv(csv, sep="\s*", engine='python',  header=None)
    ncols = atoms_frame.shape[1]
    names=["serial", "name", "x", "y", "z", "atype", "code", "resName", "charge", "status"]
    atoms_frame.columns = names[:ncols]
    
    return atoms_frame, bonds_frame
Exemplo n.º 29
0
Arquivo: hdf5.py Projeto: bkmi/mdtraj
    def __init__(self,
                 filename,
                 mode='r',
                 force_overwrite=True,
                 compression='zlib',
                 root_uep='/'):
        self._open = False  # is the file handle currently open?
        self.mode = mode  # the mode in which the file was opened?

        if not mode in ['r', 'w', 'a']:
            raise ValueError("mode must be one of ['r', 'w', 'a']")

        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # import tables
        self.tables = import_('tables')

        if compression == 'zlib':
            compression = self.tables.Filters(complib='zlib',
                                              shuffle=True,
                                              complevel=1)
        elif compression is None:
            compression = None
        else:
            raise ValueError('compression must be either "zlib" or None')

        try:
            self._handle = self._open_file(filename,
                                           mode=mode,
                                           filters=compression,
                                           root_uep=root_uep)
        except self.tables.exceptions.HDF5ExtError:  # Cannot find group
            raise ValueError(
                'The group {root_uep} was not found in {filename}. '
                'Create the group first.'.format(root_uep=root_uep,
                                                 filename=filename))

        self._open = True

        if mode == 'w':
            # what frame are we currently reading or writing at?
            self._frame_index = 0
            # do we need to write the header information?
            self._needs_initialization = True
            if not filename.endswith('.h5'):
                warnings.warn('The .h5 extension is recommended.')

        elif mode == 'a':
            try:
                self._frame_index = len(self._handle.root.coordinates)
                self._needs_initialization = False
            except self.tables.NoSuchNodeError:
                self._frame_index = 0
                self._needs_initialization = True
        elif mode == 'r':
            self._frame_index = 0
            self._needs_initialization = False
Exemplo n.º 30
0
def chemical_shifts_ppm(trj):
    """Predict chemical shifts of a trajectory using ppm.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.

    Returns
    -------
    results : pandas.DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have ppm available on your path; see
    (http://spin.ccic.ohio-state.edu/index.php/download/index).

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Li, DW, and Bruschweiler, R. "PPM: a side-chain and backbone chemical
       shift predictor for the assessment of protein conformational ensembles."
       J Biomol NMR. 2012 Nov;54(3):257-65.
    """
    pd = import_('pandas')
    binary = find_executable(PPM)

    first_resSeq = trj.top.residue(0).resSeq

    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_ppm` requires the external program PPM, available at http://spin.ccic.ohio-state.edu/index.php/download/index' % ', '.join(PPM))

    with enter_temp_directory():
        trj.save("./trj.pdb")
        cmd = "%s -pdb trj.pdb -mode detail" % binary

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your PPM installation or your input trajectory." % cmd))

        d = pd.read_table("./bb_details.dat", index_col=False, header=None, sep="\s*").drop([3], axis=1)

        d = d.rename(columns={0: "resSeq", 1: "resName", 2: "name"})
        d["resSeq"] += first_resSeq - 1  # Fix bug in PPM that reindexes to 1
        d = d.drop("resName", axis=1)
        d = d.set_index(["resSeq", "name"])
        d.columns = np.arange(trj.n_frames)
        d.columns.name = "frame"

    return d
Exemplo n.º 31
0
def mol2_to_dataframes(filename):
    """Convert a GAFF (or sybyl) mol2 file to a pair of pandas dataframes.

    Parameters
    ----------
    filename : str
        Name of mol2 filename

    Returns
    -------
    atoms_frame : pd.DataFrame
        DataFrame containing atom information
    bonds_frame : pd.DataFrame
        DataFrame containing bond information

    Notes
    -----
    These dataframes may contain force field information as well as the
    information necessary for constructing the coordinates and molecular
    topology.  This function has been tested for GAFF and sybyl-style
    mol2 files but has been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!

    See Also
    --------
    If you just need the coordinates and bonds, use load_mol2(filename)
    to get a Trajectory object.
    """
    pd = import_('pandas')
    with open(filename) as f:
        data = dict((key, list(grp)) for key, grp in itertools.groupby(f, _parse_mol2_sections))

    # Mol2 can have "status bits" at the end of the bond lines. We don't care
    # about these, but they interfere with using pd_read_table because it looks
    # like one line has too many columns. So we just regex out the offending
    # text.
    status_bit_regex = "BACKBONE|DICT|INTERRES|\|"
    data["@<TRIPOS>BOND\n"] = [re.sub(status_bit_regex, lambda _: "", s)
                               for s in data["@<TRIPOS>BOND\n"]]

    if len(data["@<TRIPOS>BOND\n"]) > 1:
        csv = StringIO()
        csv.writelines(data["@<TRIPOS>BOND\n"][1:])
        csv.seek(0)
        bonds_frame = pd.read_table(csv, names=["bond_id", "id0", "id1", "bond_type"],
            index_col=0, header=None, sep="\s*", engine='python')
    else:
        bonds_frame = None

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>ATOM\n"][1:])
    csv.seek(0)
    atoms_frame = pd.read_csv(csv, sep="\s*", engine='python',  header=None)
    ncols = atoms_frame.shape[1]
    names=["serial", "name", "x", "y", "z", "atype", "code", "resName", "charge", "status"]
    atoms_frame.columns = names[:ncols]
    
    return atoms_frame, bonds_frame
Exemplo n.º 32
0
def mol2_to_dataframes(filename):
    """Convert a GAFF (or sybyl) mol2 file to a pair of pandas dataframes.

    Parameters
    ----------
    filename : str
        Name of mol2 filename

    Returns
    -------
    atoms_frame : pd.DataFrame
        DataFrame containing atom information
    bonds_frame : pd.DataFrame
        DataFrame containing bond information

    Notes
    -----
    These dataframes may contain force field information as well as the
    information necessary for constructing the coordinates and molecular
    topology.  This function has been tested for GAFF and sybyl-style
    mol2 files but has been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!

    See Also
    --------
    If you just need the coordinates and bonds, use load_mol2(filename)
    to get a Trajectory object.
    """
    pd = import_('pandas')
    with open(filename) as f:
        data = dict((key, list(grp))
                    for key, grp in itertools.groupby(f, _parse_mol2_sections))

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>BOND\n"][1:])
    csv.seek(0)
    bonds_frame = pd.read_table(csv,
                                names=["bond_id", "id0", "id1", "bond_type"],
                                index_col=0,
                                header=None,
                                sep="\s*",
                                engine='python')

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>ATOM\n"][1:])
    csv.seek(0)
    atoms_frame = pd.read_csv(csv,
                              sep="\s*",
                              engine='python',
                              header=None,
                              names=[
                                  "serial", "name", "x", "y", "z", "atype",
                                  "code", "resName", "charge"
                              ])
    return atoms_frame, bonds_frame
Exemplo n.º 33
0
def chemical_shifts_shiftx2(trj):
    """Predict chemical shifts of a trajectory using ShiftX2.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.

    Returns
    -------
    results : pandas DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have ShiftX2 available on your path; see (http://www.shiftx2.ca/).

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Beomsoo Han, Yifeng Liu, Simon Ginzinger, and David Wishart.
       "SHIFTX2: significantly improved protein chemical shift
       prediction." J. Biomol. NMR, 50, 1 43-57 (2011)
    """
    pd = import_('pandas')
    binary = find_executable(SHIFTX2)
    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_shiftx2` requires the external program SHIFTX2, available at http://www.shiftx2.ca/' % ', '.join(SHIFTX2))

    results = []
    with enter_temp_directory():
        for i in range(trj.n_frames):
            trj[i].save("./trj%d.pdb" % i)
        cmd = "%s -b 'trj*.pdb'" % binary

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your ShiftX2 installation or your input trajectory." % cmd))

        for i in range(trj.n_frames):
            d = pd.read_csv("./trj%d.pdb.cs" % i)
            d.rename(columns={"NUM": "resSeq", "RES": "resName", "ATOMNAME": "name"}, inplace=True)
            d["frame"] = i
            results.append(d)

    results = pd.concat(results)
    results = results.pivot_table(rows=["resSeq", "name"], cols="frame", values="SHIFT")
    return results
Exemplo n.º 34
0
    def visit_Name(self, node):
        # we want to prefix all names to look like unit.nanometers instead
        # of just "nanometers", because I don't want to import * from
        # units into this module.
        units = import_('simtk.unit')
        if not (node.id == 'units' or hasattr(units, node.id)):
            # also, let's take this opporunity to check that the node.id
            # (which supposed to be the name of the unit, like "nanometers")
            # is actually an attribute in simtk.unit
            raise ValueError('%s is not a valid unit' % node.id)

        return ast.Attribute(value=ast.Name(id='units', ctx=ast.Load()),
                             attr=node.id, ctx=ast.Load())
Exemplo n.º 35
0
    def __init__(self, filename, mode='r', force_overwrite=False):
        self._closed = True
        self._mode = mode
        if StrictVersion(import_('scipy.version').short_version) < StrictVersion('0.12.0'):
            raise ImportError('MDTraj NetCDF support requires scipy>=0.12.0. '
                              'You have %s' % import_('scipy.version').short_version)
        netcdf = import_('scipy.io').netcdf_file

        if mode not in ('r', 'w'):
            raise ValueError("mode must be one of ['r', 'w']")

        if mode == 'w' and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # AMBER uses the NetCDF3 format, with 64 bit encodings, which for
        # scipy.io.netcdf_file is "version=2"
        self._handle = netcdf(filename, mode=mode, version=2)
        self._closed = False
        if mode == 'w':
            self._needs_initialization = True
        elif mode == 'r':
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 36
0
    def __init__(self, filename, mode='r', force_overwrite=True):
        self._closed = True  # is the file currently closed?
        self._mode = mode  # what mode were we opened in
        netcdf = import_('netCDF4')

        if mode not in ['r', 'w', 'a', 'ws', 'as']:
            raise ValueError(
                ("mode must be one of ['r', 'w', 'a', 'ws', 'as']"
                 " 'r' indicates read, 'w' indicates write, and 'a' indicates"
                 " append. 'a' and 'w' can be appended with 's', which turns "
                 " off buffering"))

        if mode in ['w', 'ws'
                    ] and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists')

        # AMBER uses the NetCDF3 format, with 64 bit encodings
        self._handle = netcdf.Dataset(filename,
                                      mode=mode,
                                      format='NETCDF3_64BIT',
                                      clobber=force_overwrite)
        self._closed = False

        # self._frame_index is the current frame that we're at in the
        #     file
        # self._needs_initialization indicates whether we need to set the
        #     global properties of the file. This is required before the first
        #     write operation on a new file
        # self._n_atoms is the number of atoms in the file

        if mode in ['a', 'as']:
            self._frame_index = len(self._handle.dimensions['frame'])
            self._n_atoms = len(self._handle.dimensions['atom'])
            self._needs_initialization = False
        elif mode in ['w', 'ws']:
            self._frame_index = 0
            self._n_atoms = None
            # self._n_atoms will be set during _initialize_headers call
            self._needs_initialization = True
        elif mode == 'r':
            self._frame_index = 0
            self._n_atoms = len(self._handle.dimensions['atom'])
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 37
0
def mol2_to_dataframes(filename):
    """Convert a GAFF (or sybyl) mol2 file to a pair of pandas dataframes.


    Parameters
    ----------
    filename : str
        Name of mol2 filename

    Returns
    -------
    atoms_frame : pd.DataFrame
        DataFrame containing atom information
    bonds_frame : pd.DataFrame
        DataFrame containing bond information
    
    Notes
    -----
    These dataframes may contain force field information as well as the
    information necessary for constructing the coordinates and molecular
    topology.  This function has been tested for GAFF and sybyl-style 
    mol2 files but has been primarily tested on GAFF mol2 files. 
    This function does NOT accept multi-structure MOL2 files!!!    
    
    See Also
    --------
    If you just need the coordinates and bonds, use load_mol2(filename)
    to get a Trajectory object.
    """
    pd = import_("pandas")
    with open(filename) as f:
        data = dict((key, list(grp)) for key, grp in itertools.groupby(f, _parse_mol2_sections))

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>BOND\n"][1:])
    csv.seek(0)
    bonds_frame = pd.read_table(csv, names=["bond_id", "id0", "id1", "bond_type"], index_col=0, header=None, sep="\s*")

    csv = StringIO()
    csv.writelines(data["@<TRIPOS>ATOM\n"][1:])
    csv.seek(0)
    atoms_frame = pd.read_csv(
        csv, sep="\s*", names=["serial", "name", "x", "y", "z", "atype", "code", "resName", "charge"], header=None
    )  # , usecols=range(1, 10))  # usecols not available in pandas 0.11
    return atoms_frame, bonds_frame
Exemplo n.º 38
0
    def __init__(self, filename, mode="r", force_overwrite=True):
        self._closed = True  # is the file currently closed?
        self._mode = mode  # what mode were we opened in
        netcdf = import_("netCDF4")

        if mode not in ["r", "w", "a", "ws", "as"]:
            raise ValueError(
                (
                    "mode must be one of ['r', 'w', 'a', 'ws', 'as']"
                    " 'r' indicates read, 'w' indicates write, and 'a' indicates"
                    " append. 'a' and 'w' can be appended with 's', which turns "
                    " off buffering"
                )
            )

        if mode in ["w", "ws"] and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists')

        # AMBER uses the NetCDF3 format, with 64 bit encodings
        self._handle = netcdf.Dataset(filename, mode=mode, format="NETCDF3_64BIT", clobber=force_overwrite)
        self._closed = False

        # self._frame_index is the current frame that we're at in the
        #     file
        # self._needs_initialization indicates whether we need to set the
        #     global properties of the file. This is required before the first
        #     write operation on a new file
        # self._n_atoms is the number of atoms in the file

        if mode in ["a", "as"]:
            self._frame_index = len(self._handle.dimensions["frame"])
            self._n_atoms = len(self._handle.dimensions["atom"])
            self._needs_initialization = False
        elif mode in ["w", "ws"]:
            self._frame_index = 0
            self._n_atoms = None
            # self._n_atoms will be set during _initialize_headers call
            self._needs_initialization = True
        elif mode == "r":
            self._frame_index = 0
            self._n_atoms = len(self._handle.dimensions["atom"])
            self._needs_initialization = False
        else:
            raise RuntimeError()
Exemplo n.º 39
0
    def __init__(self, filename, mode="r", force_overwrite=True, compression="zlib"):
        self._open = False  # is the file handle currently open?
        self.mode = mode  # the mode in which the file was opened?

        if not mode in ["r", "w", "a"]:
            raise ValueError("mode must be one of ['r', 'w', 'a']")

        if mode == "w" and not force_overwrite and os.path.exists(filename):
            raise IOError('"%s" already exists' % filename)

        # import tables
        self.tables = import_("tables")

        if compression == "zlib":
            compression = self.tables.Filters(complib="zlib", shuffle=True, complevel=1)
        elif compression is None:
            compression = None
        else:
            raise ValueError('compression must be either "zlib" or None')

        self._handle = self._open_file(filename, mode=mode, filters=compression)
        self._open = True

        if mode == "w":
            # what frame are we currently reading or writing at?
            self._frame_index = 0
            # do we need to write the header information?
            self._needs_initialization = True
            if not filename.endswith(".h5"):
                warnings.warn("The .h5 extension is recommended.")

        elif mode == "a":
            try:
                self._frame_index = len(self._handle.root.coordinates)
                self._needs_initialization = False
            except self.tables.NoSuchNodeError:
                self._frame_index = 0
                self._needs_initialization = True
        elif mode == "r":
            self._frame_index = 0
            self._needs_initialization = False
Exemplo n.º 40
0
    def to_bondgraph(self):
        """Create a NetworkX graph from the atoms and bonds in this topology

        Returns
        -------
        g : nx.Graph
            A graph whose nodes are the Atoms in this topology, and
            whose edges are the bonds

        See Also
        --------
        atoms
        bonds

        Notes
        -----
        This method requires the NetworkX python package.
        """
        nx = import_('networkx')
        g = nx.Graph()
        g.add_nodes_from(self.atoms)
        g.add_edges_from(self.bonds)
        return g
Exemplo n.º 41
0
    def to_bondgraph(self):
        """Create a NetworkX graph from the atoms and bonds in this topology

        Returns
        -------
        g : nx.Graph
            A graph whose nodes are the Atoms in this topology, and
            whose edges are the bonds

        See Also
        --------
        atoms
        bonds

        Notes
        -----
        This method requires the NetworkX python package.
        """
        nx = import_('networkx')
        g = nx.Graph()
        g.add_nodes_from(self.atoms)
        g.add_edges_from(self.bonds)
        return g
Exemplo n.º 42
0
def test_delay_import():
    import_('scipy.sparse')
Exemplo n.º 43
0
def test_delay_import_fail_1():
    import_('sdfsdfsfsfdsdf')
Exemplo n.º 44
0
def test_delay_import():
    import_('scipy.sparse')
Exemplo n.º 45
0
>>> io.saveh('file.hdf5', x=x)                                  # doctest: +SKIP
>>> np.all(x == io.loadh('file.hdf5')['x'])                     # doctest: +SKIP
True

Functions
---------
"""
from __future__ import print_function, division
import os
import warnings
import numpy as np
from mdtraj.utils import import_
from mdtraj.utils.six import PY3, iteritems
if PY3:
    basestring = str
tables = import_('tables')

__all__ = ['saveh', 'loadh']

try:
    COMPRESSION = tables.Filters(complevel=9, complib='blosc', shuffle=True)
except Exception:  #type?
    warnings.warn("Missing BLOSC; no compression will used.")
    COMPRESSION = tables.Filters()

def saveh(file, *args, **kwargs):
    """Save several numpy arrays into a single file in compressed ``.hdf`` format.

    If arguments are passed in with no keywords, the corresponding variable
    names, in the ``.hdf`` file, are 'arr_0', 'arr_1', etc. If keyword arguments
    are given, the corresponding variable names, in the ``.hdf`` file will
Exemplo n.º 46
0
#------------------------------------------------------------------------------

from __future__ import print_function
import os
import sys
import warnings
import functools
import operator
from argparse import ArgumentParser

import numpy as np
import mdtraj as md
from mdtraj.utils import import_, ilen
from mdtraj.geometry.internal import COVALENT_RADII

spatial = import_('scipy.spatial')

#------------------------------------------------------------------------------
# Code
#------------------------------------------------------------------------------

class NoTopologyError(Exception):
    def __init__(self):
        super(NoTopologyError, self).__init__("One more more of the "
            "trajectory files should contain topology information (i.e. "
            "either HDF5 or PDB)")


def parse_args():
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('files', nargs='+', help='''Input trajectory file(s),
Exemplo n.º 47
0
def test_delay_import_fail_1():
    with pytest.raises(ImportError):
        import_('sdfsdfsfsfdsdf')
Exemplo n.º 48
0
# ------------------------------------------------------------------------------

from __future__ import print_function
import os
import sys
import warnings
import functools
import operator
from argparse import ArgumentParser

import numpy as np
import mdtraj as md
from mdtraj.utils import import_, ilen
from mdtraj.geometry.internal import COVALENT_RADII

spatial = import_("scipy.spatial")

# ------------------------------------------------------------------------------
# Code
# ------------------------------------------------------------------------------


class NoTopologyError(Exception):
    def __init__(self):
        super(NoTopologyError, self).__init__(
            "One more more of the " "trajectory files should contain topology information (i.e. " "either HDF5 or PDB)"
        )


def parse_args():
    parser = ArgumentParser(description=__doc__)
Exemplo n.º 49
0
def chemical_shifts_spartaplus(trj, rename_HN=True):
    """Predict chemical shifts of a trajectory using SPARTA+.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.
    rename_HN : bool, optional, default=True
        SPARTA+ calls the amide proton "HN" instead of the standard "H".
        When True, this option renames the output as "H" to match the PDB
        and BMRB nomenclature.

    Returns
    -------
    results : pandas.DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have SPARTA+ available on your path; see
    (http://spin.niddk.nih.gov/bax/software/SPARTA+/). Also, the SPARTAP_DIR
    environment variable must be set so that SPARTA+ knows where to find
    its database files.

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Shen, Y., and Bax, Ad. "SPARTA+: a modest improvement in empirical
       NMR chemical shift prediction by means of an artificial neural network."
       J. Biomol. NMR, 48, 13-22 (2010)
    """
    pd = import_('pandas')
    binary = find_executable(SPARTA_PLUS)
    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_spartaplus` requires the external program SPARTA+, available at http://spin.niddk.nih.gov/bax/software/SPARTA+/' % ', '.join(SPARTA_PLUS))

    names = ["resSeq", "resName", "name", "SS_SHIFT", "SHIFT", "RC_SHIFT", "HM_SHIFT", "EF_SHIFT", "SIGMA"]

    with enter_temp_directory():
        for i in range(trj.n_frames):
            trj[i].save("./trj%d.pdb" % i)

        cmd = "%s -in %s" % (binary, ' '.join("trj%d.pdb" % i for i in range(trj.n_frames)))

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your SPARTA+ installation or your input trajectory." % cmd))

        lines_to_skip = _get_lines_to_skip("trj0_pred.tab")

        results = []
        for i in range(trj.n_frames):
            d = pd.read_table("./trj%d_pred.tab" % i, names=names, header=None, sep="\s*", skiprows=lines_to_skip)
            d["frame"] = i
            results.append(d)

    results = pd.concat(results)

    if rename_HN:
        results.name[results.name == "HN"] = "H"

    results = results.pivot_table(rows=["resSeq", "name"], cols="frame", values="SHIFT")

    return results
Exemplo n.º 50
0
True

Functions
---------
"""
from __future__ import print_function, division, absolute_import

import io
import os
import warnings
import numpy as np
from mdtraj.utils import import_
from mdtraj.utils.six import PY2, PY3, iteritems, StringIO
if PY3:
    basestring = str
tables = import_('tables')
TABLES2 = tables.__version__ < '3.0.0'

__all__ = ['saveh', 'loadh']

try:
    COMPRESSION = tables.Filters(complevel=9, complib='zlib', shuffle=True)
except Exception:  #type?
    warnings.warn("Missing Zlib; no compression will used.")
    COMPRESSION = tables.Filters()

# Note to developers: This module is pseudo-deprecated. It provides (loadh, saveh)
# which are useful functions (and we want to maintain them), but aren't really
# within the scope of MDTraj as we now understand it.
#
# With that said, many people use these functions and no good would come from getting
Exemplo n.º 51
0
##############################################################################
# imports
##############################################################################

import os
import tempfile
import shutil
import numpy as np

import mdtraj as md
from mdtraj import element
from mdtraj.utils import import_
from mdtraj.testing import skipif, get_fn, eq, slow

try:
    scripttest = import_('scripttest')
    HAVE_SCRIPTTEST = True
except SystemExit:
    HAVE_SCRIPTTEST = False

##############################################################################
# globals
##############################################################################

# if you switch DEBUG_MODE to True, none of the files will deleted
# at the end of the execution of this suite, so that you can debug the
# problem by running mdconvert manually.
DEBUG_MODE = False
# DEBUG_MODE = False

staging_dir = tempfile.mkdtemp()
Exemplo n.º 52
0
def test_delay_import_fail_1():
    import_('sdfsdfsfsfdsdf')
Exemplo n.º 53
0
    def from_dataframe(cls, atoms, bonds=None):
        """Create a mdtraj topology from a pandas data frame

        Parameters
        ----------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame. This data
            frame should have columns "serial" (atom index), "name" (atom name),
            "element" (atom's element), "resSeq" (index of the residue)
            "resName" (name of the residue), "chainID" (index of the chain),
            and optionally "segmentID", following the same conventions
            as wwPDB 3.0 format.
        bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional
            The bonds in the topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond. Specifiying
            bonds here is optional. To create standard protein bonds, you can
            use `create_standard_bonds` to "fill in" the bonds on your newly
            created Topology object

        See Also
        --------
        create_standard_bonds
        """
        pd = import_('pandas')

        if bonds is None:
            bonds = np.zeros((0, 2))

        for col in ["name", "element", "resSeq",
                    "resName", "chainID", "serial"]:
            if col not in atoms.columns:
                raise ValueError('dataframe must have column %s' % col)

        if "segmentID" not in atoms.columns:
            atoms["segmentID"] = ""

        out = cls()
        if not isinstance(atoms, pd.DataFrame):
            raise TypeError('atoms must be an instance of pandas.DataFrame. '
                            'You supplied a %s' % type(atoms))
        if not isinstance(bonds, np.ndarray):
            raise TypeError('bonds must be an instance of numpy.ndarray. '
                            'You supplied a %s' % type(bonds))

        if not np.all(np.arange(len(atoms)) == atoms.index):
            raise ValueError('atoms must be uniquely numbered '
                             'starting from zero.')
        out._atoms = [None for i in range(len(atoms))]

        for ci in np.unique(atoms['chainID']):
            chain_atoms = atoms[atoms['chainID'] == ci]
            c = out.add_chain()

            for ri in np.unique(chain_atoms['resSeq']):
                residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri]
                rnames = residue_atoms['resName']
                residue_name = np.array(rnames)[0]
                segids = residue_atoms['segmentID']
                segment_id = np.array(segids)[0]
                if not np.all(rnames == residue_name):
                    raise ValueError('All of the atoms with residue index %d '
                                     'do not share the same residue name' % ri)
                r = out.add_residue(residue_name, c, ri,segment_id)

                for atom_index, atom in residue_atoms.iterrows():
                    atom_index = int(atom_index)  # Fixes bizarre hashing issue on Py3K.  See #545
                    a = Atom(atom['name'], elem.get_by_symbol(atom['element']),
                             atom_index, r, serial=atom['serial'])
                    out._atoms[atom_index] = a
                    r._atoms.append(a)

        for ai1, ai2 in bonds:
            out.add_bond(out.atom(ai1), out.atom(ai2))

        out._numAtoms = out.n_atoms
        return out
Exemplo n.º 54
0
def chemical_shifts_spartaplus(trj):
    """Predict chemical shifts of a trajectory using SPARTA+.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.

    Returns
    -------
    results : pandas.DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have SPARTA+ available on your path; see
    (http://spin.niddk.nih.gov/bax/software/SPARTA+/). Also, the SPARTAP_DIR
    environment variable must be set so that SPARTA+ knows where to find
    its database files.

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Shen, Y., and Bax, Ad. "SPARTA+: a modest improvement in empirical
       NMR chemical shift prediction by means of an artificial neural network."
       J. Biomol. NMR, 48, 13-22 (2010)
    """
    pd = import_('pandas')
    binary = find_executable(SPARTA_PLUS)
    if binary is None:
        raise OSError('External command not found. Looked for %s in PATH. `chemical_shifts_spartaplus` requires the external program SPARTA+, available at http://spin.niddk.nih.gov/bax/software/SPARTA+/' % ', '.join(SPARTA_PLUS))

    names = ["VARS", "resSeq", "resName", "name", "SS_SHIFT", "SHIFT", "RC_SHIFT", "HM_SHIFT", "EF_SHIFT", "SIGMA"]

    with enter_temp_directory():
        for i in range(trj.n_frames):
            trj[i].save("./trj%d.pdb" % i)

        cmd = "%s -in %s" % (binary, ' '.join("trj%d.pdb" % i for i in range(trj.n_frames)))

        return_flag = os.system(cmd)

        if return_flag != 0:
            raise(IOError("Could not successfully execute command '%s', check your SPARTA+ installation or your input trajectory." % cmd))

        lines_to_skip = _get_lines_to_skip("trj0_pred.tab")

        results = []
        for i in range(trj.n_frames):
            d = pd.read_csv("./trj%d_pred.tab" % i, skiprows=lines_to_skip, delim_whitespace=True, header=None, names=names)
            d["frame"] = i
            results.append(d)

    results = pd.concat(results)
    results = results.pivot_table(rows=["resSeq", "name"], cols="frame", values="SHIFT")

    return results
Exemplo n.º 55
0
    def from_dataframe(cls, atoms, bonds=None):
        """Create a mdtraj topology from a pandas data frame

        Parameters
        ----------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame. This data
            frame should have columns "serial" (atom index), "name" (atom name),
            "element" (atom's element), "resSeq" (index of the residue)
            "resName" (name of the residue), "chainID" (index of the chain),
            and optionally "segmentID", following the same conventions 
            as wwPDB 3.0 format.
        bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional
            The bonds in the topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond. Specifiying
            bonds here is optional. To create standard protein bonds, you can
            use `create_standard_bonds` to "fill in" the bonds on your newly
            created Topology object

        See Also
        --------
        create_standard_bonds
        """
        pd = import_('pandas')

        if bonds is None:
            bonds = np.zeros((0, 2))

        for col in [
                "name", "element", "resSeq", "resName", "chainID", "serial"
        ]:
            if col not in atoms.columns:
                raise ValueError('dataframe must have column %s' % col)

        if "segmentID" not in atoms.columns:
            atoms["segmentID"] = ""

        out = cls()
        if not isinstance(atoms, pd.DataFrame):
            raise TypeError('atoms must be an instance of pandas.DataFrame. '
                            'You supplied a %s' % type(atoms))
        if not isinstance(bonds, np.ndarray):
            raise TypeError('bonds must be an instance of numpy.ndarray. '
                            'You supplied a %s' % type(bonds))

        if not np.all(np.arange(len(atoms)) == atoms.index):
            raise ValueError('atoms must be uniquely numbered '
                             'starting from zero.')
        out._atoms = [None for i in range(len(atoms))]

        for ci in np.unique(atoms['chainID']):
            chain_atoms = atoms[atoms['chainID'] == ci]
            c = out.add_chain()

            for ri in np.unique(chain_atoms['resSeq']):
                residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri]
                rnames = residue_atoms['resName']
                residue_name = np.array(rnames)[0]
                segids = residue_atoms['segmentID']
                segment_id = np.array(segids)[0]
                if not np.all(rnames == residue_name):
                    raise ValueError('All of the atoms with residue index %d '
                                     'do not share the same residue name' % ri)
                r = out.add_residue(residue_name, c, ri, segment_id)

                for atom_index, atom in residue_atoms.iterrows():
                    atom_index = int(
                        atom_index
                    )  # Fixes bizarre hashing issue on Py3K.  See #545
                    a = Atom(atom['name'],
                             elem.get_by_symbol(atom['element']),
                             atom_index,
                             r,
                             serial=atom['serial'])
                    out._atoms[atom_index] = a
                    r._atoms.append(a)

        for ai1, ai2 in bonds:
            out.add_bond(out.atom(ai1), out.atom(ai2))

        out._numAtoms = out.n_atoms
        return out
Exemplo n.º 56
0
##############################################################################
# imports
##############################################################################

import os
import tempfile
import shutil
import numpy as np

import mdtraj as md
from mdtraj import element
from mdtraj.utils import import_
from mdtraj.testing import skipif, get_fn, eq, slow

try:
    scripttest = import_('scripttest')
    HAVE_SCRIPTTEST = True
except SystemExit:
    HAVE_SCRIPTTEST = False

##############################################################################
# globals
##############################################################################

# if you switch DEBUG_MODE to True, none of the files will deleted
# at the end of the execution of this suite, so that you can debug the
# problem by running mdconvert manually.
DEBUG_MODE = False
# DEBUG_MODE = False

staging_dir = tempfile.mkdtemp()
Exemplo n.º 57
0
def test_topology_openmm_boxes():
    u = import_('simtk.unit')
    traj = md.load(get_fn('1vii_sustiva_water.pdb'))
    mmtop = traj.topology.to_openmm(traj=traj)
    box = mmtop.getUnitCellDimensions() / u.nanometer
Exemplo n.º 58
0
def chemical_shifts_spartaplus(trj, rename_HN=True):
    """Predict chemical shifts of a trajectory using SPARTA+.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.
    rename_HN : bool, optional, default=True
        SPARTA+ calls the amide proton "HN" instead of the standard "H".
        When True, this option renames the output as "H" to match the PDB
        and BMRB nomenclature.

    Returns
    -------
    results : pandas.DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have SPARTA+ available on your path; see
    (http://spin.niddk.nih.gov/bax/software/SPARTA+/). Also, the SPARTAP_DIR
    environment variable must be set so that SPARTA+ knows where to find
    its database files.

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference below.

    References
    ----------
    .. [1] Shen, Y., and Bax, Ad. "SPARTA+: a modest improvement in empirical
       NMR chemical shift prediction by means of an artificial neural network."
       J. Biomol. NMR, 48, 13-22 (2010)
    """
    pd = import_('pandas')
    binary = find_executable(SPARTA_PLUS)
    if binary is None:
        raise OSError(
            'External command not found. Looked for %s in PATH. `chemical_shifts_spartaplus` requires the external program SPARTA+, available at http://spin.niddk.nih.gov/bax/software/SPARTA+/'
            % ', '.join(SPARTA_PLUS))

    names = [
        "resSeq", "resName", "name", "SS_SHIFT", "SHIFT", "RC_SHIFT",
        "HM_SHIFT", "EF_SHIFT", "SIGMA"
    ]

    with enter_temp_directory():
        for i in range(trj.n_frames):
            trj[i].save("./trj%d.pdb" % i)

        subprocess.check_call(
            [binary, '-in'] +
            ["trj{}.pdb".format(i)
             for i in range(trj.n_frames)] + ['-out', 'trj0_pred.tab'])

        lines_to_skip = _get_lines_to_skip("trj0_pred.tab")

        results = []
        for i in range(trj.n_frames):
            d = pd.read_table("./trj%d_pred.tab" % i,
                              names=names,
                              header=None,
                              sep="\s+",
                              skiprows=lines_to_skip)
            d["frame"] = i
            results.append(d)

    results = pd.concat(results)

    if rename_HN:
        results.name[results.name == "HN"] = "H"

    if LooseVersion(pd.__version__) < LooseVersion('0.14.0'):
        results = results.pivot_table(rows=["resSeq", "name"],
                                      cols="frame",
                                      values="SHIFT")
    else:
        results = results.pivot_table(index=["resSeq", "name"],
                                      columns="frame",
                                      values="SHIFT")

    return results