Пример #1
0
def redim(array, ndim, shape=None):
    """
    Add or remove trailing dimensions from an array by reshaping. Useful for turning N-dimensional data into the 2D
    shape required for matrix factorization / clustering, and for reversing this transformation. Returns a view of
    the input array.

    array : numpy array with 2 or more dimensions.

    ndim : int, desired number of dimensions when contracting dimensions.

    shape : tuple, desired shape when expanding dimensions.

    """

    from numpy import prod

    result = None
    if (ndim > array.ndim) and (shape is None):
        raise ValueError(
            "Cannot expand dimensions without supplying a shape argument")

    if ndim < array.ndim:
        new_shape = (*array.shape[:(ndim - 1)], prod(array.shape[(ndim - 1):]))
        result = array.reshape(new_shape)

    elif ndim > array.ndim:
        new_shape = shape
        result = array.reshape(new_shape)

    elif ndim == array.ndim:
        new_shape = array.shape
        result = array.reshape(new_shape)

    return result
Пример #2
0
def invert_scale(scaler, X, value):
    import numpy as np

    new_row = [x for x in X] + [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]
Пример #3
0
def pad_array(array):
    copied_index = 0
    for i in range(len(array)):
        if array[i] != 0:
            copied_index = i
        else:
            array[i] = array[copied_index]
    arr = array.reshape((len(array), 1))
    return arr
Пример #4
0
def expand_1d_to_2d_array(array, length, axis=0):
    """
    General utility routine to 'extend arbitrary dimensional array into a higher dimension
    by duplicating the data along a given 'axis' (default is 0) of size 'length'.

    Examples::

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=0)
        [[1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]]

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=1)
        [[1 1 1 1]
         [2 2 2 2]
         [3 3 3 3]
         [4 4 4 4]]

    :param array:
    :param length:
    :param axis:
    :return:
    """
    from numpy.ma import MaskedArray

    if axis == 0:
        new_shape = (length, array.size)
        reshaped = array
    else:
        new_shape = (array.size, length)
        reshaped = array.reshape(array.size, 1)

    array_2d = np.broadcast_to(reshaped, new_shape, subok=True)

    # Broadcast the mask too (this gets lost otherwise)
    if isinstance(array, MaskedArray):
        array_2d.mask = np.broadcast_to(reshaped.mask, new_shape)

    return array_2d
Пример #5
0
def expand_1d_to_2d_array(array, length, axis=0):
    """
    General utility routine to 'extend arbitrary dimensional array into a higher dimension
    by duplicating the data along a given 'axis' (default is 0) of size 'length'.

    Examples::

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=0)
        [[1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]]

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=1)
        [[1 1 1 1]
         [2 2 2 2]
         [3 3 3 3]
         [4 4 4 4]]

    :param array:
    :param length:
    :param axis:
    :return:
    """
    from numpy.ma import MaskedArray

    if axis == 0:
        new_shape = (length, array.size)
        reshaped = array
    else:
        new_shape = (array.size, length)
        reshaped = array.reshape(array.size, 1)

    array_2d = np.broadcast_to(reshaped, new_shape, subok=True)

    # Broadcast the mask too (this gets lost otherwise)
    if isinstance(array, MaskedArray):
        array_2d.mask = np.broadcast_to(reshaped.mask, new_shape)

    return array_2d
def invert_scale(scaler, X, yhat):
    """
Since you "scale" object saves data_min and data_max, as well as feature_range,
you just need to pass it to this function, togheter with the object you want to
inverse scale (yhat) too.

    :param scaler: Scaler object from scipy, previously calculated in the same dataset being worked here.
    :param X: Matrix line used as input to generate this output prediction (yhat).
    :param yhat: Predicted output for your network.
    :return: Only the "yhat" data rescaled.
    """
    # We have input features in X information. We need to add "y" info
    # (prediction, not ground truth or reference) before unscaling (because scaler was made with whole dataset, X and y).
    # We call it "row" because it's a array (there's no columns). We'll reshape
    # it into a matrix line ahead in this function.
    new_row = [x for x in X] + [yhat]
    array = numpy.array(new_row)
    # Array is vertical, but scaler expects a matrix or matrix line, and that's
    # why we reshape it.
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]
Пример #7
0
def expand_1d_to_2d_array(array, length, axis=0):
    """
    General utility routine to 'extend arbitrary dimensional array into a higher dimension
    by duplicating the data along a given 'axis' (default is 0) of size 'length'.

    Examples::

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=0)
        [[1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]
         [1 2 3 4]]

        >>> a = np.array([1, 2, 3, 4])
        >>> expand_1d_to_2d_array(a, 4, axis=1)
        [[1 1 1 1]
         [2 2 2 2]
         [3 3 3 3]
         [4 4 4 4]]

    :param array:
    :param length:
    :param axis:
    :return:
    """
    from numpy.lib.stride_tricks import broadcast_to

    if axis == 0:
        array_2d = broadcast_to(array, (length, array.size))
        # array_2d = np.lib.stride_tricks.as_strided(array_1d, (length, array_1d.size), (0, array_1d.itemsize))
    else:
        reshaped = array.reshape(array.size, 1)
        array_2d = broadcast_to(reshaped, (array.size, length))
        # array_2d = np.lib.stride_tricks.as_strided(array_1d, (array_1d.size, length), (array_1d.itemsize, 0))

    return array_2d
Пример #8
0
def ensure_2d_array(array, flags, **kwargs):
    array = require(array, requirements = flags, **kwargs)
    if len(array.shape) == 1:
        array = array.reshape(-1,array.size)
    return array
Пример #9
0
def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
             n_bonds = int(line[0])
             break
        i += 1
    lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71)
    array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ')
    if len(array) != n_bonds*2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Пример #10
0
def ensure_2d_array(array, flags, **kwargs):
    array = require(array, requirements=flags, **kwargs)
    if len(array.shape) == 1:
        array = array.reshape(-1, array.size)
    return array
Пример #11
0
def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
            n_bonds = int(line[0])
            break
        i += 1
    lines = ''.join(lines[i + 1:]) + psf.read(n_bonds / 4 * 71)
    array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Пример #12
0
def invert_scale(scaler, X, yhat):
	new_row = [x for x in X] + [yhat]
	array = numpy.array(new_row)
	array = array.reshape(1, len(array))
	inverted = scaler.inverse_transform(array)
	return inverted[0, -1]
Пример #13
0
def parsePSF(filename, title=None, ag=None):
    """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError("ag must be an AtomGroup instance")

    psf = openFile(filename, "rb")
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith(b"!NATOM"):
            n_atoms = int(line.split(b"!")[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError("ag and PSF file must have same number of atoms")

    serials = zeros(n_atoms, ATOMIC_FIELDS["serial"].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS["segment"].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS["resnum"].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS["resname"].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS["name"].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS["type"].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS["charge"].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS["mass"].dtype)

    # lines = psf.readlines(71 * (n_atoms + 5))
    n = 0
    n_bonds = 0
    for i, line in enumerate(psf):
        if line.strip() == b"":
            continue
        if b"!NBOND:" in line.upper():
            items = line.split()
            n_bonds = int(items[0])
            break
        if n + 1 > n_atoms:
            continue

        if len(line) <= 71:
            serials[n] = line[:8]
            segnames[n] = line[9:13].strip()
            resnums[n] = line[14:19]
            resnames[n] = line[19:23].strip()
            atomnames[n] = line[24:28].strip()
            atomtypes[n] = line[29:35].strip()
            charges[n] = line[35:44]
            masses[n] = line[50:60]
        else:
            items = line.split()
            serials[n] = items[0]
            segnames[n] = items[1]
            resnums[n] = items[2]
            resnames[n] = items[3]
            atomnames[n] = items[4]
            atomtypes[n] = items[5]
            charges[n] = items[6]
            masses[n] = items[7]
        n += 1

    if n < n_atoms:
        raise IOError("number of lines in PSF is less than the number of " "atoms")

    #    i = n_atoms
    #    while 1:
    #        line = lines[i].split()
    #        if len(line) >= 2 and line[1] == '!NBOND:':
    #             n_bonds = int(line[0])
    #             break
    #        i += 1
    #    lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71)
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b"":
            continue
        if b"!" in line:
            break
        lines.append(line.decode(encoding="UTF-8"))

    lines = "".join(lines)
    array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=" ")
    if len(array) != n_bonds * 2:
        raise IOError("number of bonds expected and parsed do not match")

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Пример #14
0
def calcCrossCorr(modes, n_cpu=1, norm=True):
    """Returns cross-correlations matrix.  For a 3-d model, cross-correlations
    matrix is an NxN matrix, where N is the number of atoms.  Each element of
    this matrix is the trace of the submatrix corresponding to a pair of atoms.
    Cross-correlations matrix may be calculated using all modes or a subset of modes
    of an NMA instance.  For large systems, calculation of cross-correlations
    matrix may be time consuming.  Optionally, multiple processors may be
    employed to perform calculations by passing ``n_cpu=2`` or more."""

    if not isinstance(n_cpu, int):
        raise TypeError('n_cpu must be an integer')
    elif n_cpu < 1:
        raise ValueError('n_cpu must be equal to or greater than 1')

    if not isinstance(modes, (Mode, Vector, NMA, ModeSet)):
        if isinstance(modes, list):
            try:
                is3d = modes[0].is3d()
            except:
                raise TypeError(
                    'modes must be a list of Mode or Vector instances, '
                    'not {0}'.format(type(modes)))
        else:
            raise TypeError(
                'modes must be a Mode, Vector, NMA, or ModeSet instance, '
                'not {0}'.format(type(modes)))
    else:
        is3d = modes.is3d()

    if is3d:
        model = modes
        if isinstance(modes, (Mode, ModeSet)):
            model = modes._model
            if isinstance(modes, (Mode)):
                indices = [modes.getIndex()]
                n_modes = 1
            else:
                indices = modes.getIndices()
                n_modes = len(modes)
        elif isinstance(modes, Vector):
            indices = [0]
            n_modes = 1
        else:
            n_modes = len(modes)
            indices = np.arange(n_modes)

        array = model._getArray()
        n_atoms = model._n_atoms

        if not isinstance(modes, Vector):
            variances = model._vars
        else:
            array = array.reshape(-1, 1)
            variances = np.ones(1)

        if n_cpu == 1:
            s = (n_modes, n_atoms, 3)
            arvar = (array[:, indices] * variances[indices]).T.reshape(s)
            array = array[:, indices].T.reshape(s)
            covariance = np.tensordot(array.transpose(2, 0, 1),
                                      arvar.transpose(0, 2, 1),
                                      axes=([0, 1], [1, 0]))
        else:
            import multiprocessing
            n_cpu = min(multiprocessing.cpu_count(), n_cpu)
            queue = multiprocessing.Queue()
            size = n_modes / n_cpu
            for i in range(n_cpu):
                if n_cpu - i == 1:
                    indices = modes.indices[i * size:]
                else:
                    indices = modes.indices[i * size:(i + 1) * size]
                process = multiprocessing.Process(target=_crossCorrelations,
                                                  args=(queue, n_atoms, array,
                                                        variances, indices))
                process.start()
            while queue.qsize() < n_cpu:
                time.sleep(0.05)
            covariance = queue.get()
            while queue.qsize() > 0:
                covariance += queue.get()
    else:
        covariance = calcCovariance(modes)
    if norm:
        diag = np.power(covariance.diagonal(), 0.5)
        D = np.outer(diag, diag)
        covariance = div0(covariance, D)
    return covariance