示例#1
0
def pathEVmutationFolder(folder=None):
    """Returns or sets path of local folder where EVmutation data are stored.
    To release the current folder, pass an invalid path, e.g.
    ``folder=''``.
    """
    if folder is None:
        folder = SETTINGS.get('EVmutation_local_folder')
        if folder:
            if isdir(folder):
                return folder
            else:
                LOGGER.warn('Local folder {} is not accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local EVmutation folder is set: {}'.format(
                repr(folder)))
            SETTINGS['EVmutation_local_folder'] = folder
            SETTINGS.save()
        else:
            current = SETTINGS.pop('EVmutation_local_folder')
            if current:
                LOGGER.info('EVmutation folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{} is not a valid path.'.format(repr(folder)))
示例#2
0
 def __or__(self, other):
     
     if self is other:
         return self
 
     try:
         ag = other.getAtomGroup()
     except AttributeError:
         raise TypeError('other must be an AtomPointer')
         
     if self._ag != ag:
         raise ValueError('both selections must be from the same AtomGroup')
         
     acsi = self.getACSIndex()
     if acsi != other.getACSIndex():
         LOGGER.warn('Active coordinate set indices do not match, it will '
                     'be set to zero.')
         acsi = 0
         
     indices = unique(concatenate((self._getIndices(), 
                                   other._getIndices())))
     if indices[-1] == atommap.DUMMY:
         indices = indices[:-1]
     return Selection(self._ag, indices, '({0:s}) or ({1:s})'.format(
                                 self.getSelstr(), other.getSelstr()), 
                                 acsi, unique=True)
示例#3
0
def parseCCD(ids):
    """Retrieve the whole Chemical Component Dictionary (CCD) resource.
    """
    if isListLike(ids):
        n_ids = len(ids)
    else:
        ids = [ids]
        n_ids = 1

    ret = []
    for id in ids:
        id_url = 'http://ligand-expo.rcsb.org/reports/{0}/{1}/{1}.cif'.format(id[0],
                                                                              id)
        try:
            handle = openURL(id_url)
        except Exception as err:
            LOGGER.warn('download failed ({1}).'.format(str(err)))
        else:
            data = handle.read()
            if len(data):
                if PY3K:
                    data = data.decode()

                parsingDict, prog = parseSTARLines(data.split('\n'), shlex=True)
                        
                star_dict = StarDict(parsingDict, prog, id)
                ret.append(star_dict[id])
            else:
                ret.append(None)
                LOGGER.warn('Could not parse CCD data for {0}'.format(id))

    if n_ids == 1:
        return ret[0]

    return ret
示例#4
0
def calcSignatureSqFlucts(mode_ensemble, **kwargs):
    """
    Get the signature square fluctuations of *mode_ensemble*. 
    
    :arg mode_ensemble: an ensemble of structures or ENMs 
    :type mode_ensemble: :class: `ModeEnsemble`
    """

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
    
    if not mode_ensemble.isMatched():
        LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
                    'Consider running mode_ensemble.match() prior to using this function')

    modesets = mode_ensemble
    V = []
    for modes in modesets:
        sqfs = calcSqFlucts(modes)
        V.append(sqfs)
    V = np.vstack(V)

    title_str = '%d modes'%mode_ensemble.numModes()
    weights = mode_ensemble.getWeights()
    if weights is not None:
        weights = weights[:, :, 0]
    labels = mode_ensemble.getLabels()

    # even the original model is 3d, sqfs are still 1d
    sig = sdarray(V, title=title_str, weights=weights, labels=labels, is3d=False)

    return sig
示例#5
0
 def __init__(self, PDB, n_modes='all', recover_pickle=False, **kwargs):
     assert isinstance(PDB, (str, Atomic)), \
            'PDB must be either a PDBID or an Atomic instance.'
     assert type(recover_pickle) is bool
     # definition and initialization of variables
     if isinstance(PDB, str):
         self.PDBID = PDB
         self._pdb = None
     else:
         self.PDBID = None
         self._pdb = PDB.copy()
     self.n_modes = n_modes
     self.chids = None
     self.resids = None
     self.feats = None
     self._gnm = None
     self._anm = None
     self.timestamp = None
     if recover_pickle:
         try:
             self.recoverPickle(**kwargs)
         except Exception as e:
             LOGGER.warn('Unable to recover pickle: %s' % e)
             self.refresh()
     else:
         self.refresh()
     return
示例#6
0
    def __and__(self, other):
        
        if self is other:
            return self
    
        if not isinstance(other, AtomPointer):
            raise TypeError('other must be an AtomPointer')
            
        if self._ag != other.getAtomGroup():
            raise ValueError('both selections must be from the same AtomGroup')
    
        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('active coordinate set indices do not match, '
                           'so it will be set to zero in the union.')
            acsi = 0

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0
            
        indices = set(self._getIndices())
    
        indices = indices.intersection(other.getIndices())
        if indices:
            indices = np.unique(indices)
            return Selection(self._ag, indices, '({0:s}) and ({1:s})'.format(
                                    self.getSelstr(), other.getSelstr()), acsi)
示例#7
0
文件: cath.py 项目: nffaruk/ProDy
def range2selstr(rangestr):
    if rangestr.strip() == '':
        return None
    frags = rangestr.split(',')
    sels = []
    for frag in frags:
        try:
            fromtos = frag.split('-')
            if len(fromtos) == 2:
                fro, to = fromtos
            else:
                LOGGER.warn('range "%s" is irregular' % rangestr)
                fro = '1'
                to = fromtos[-1]
            fro_num = intResnum(fro)
            to_num = intResnum(to)

            if fro_num > to_num:
                LOGGER.warn('range "%s" is irregular' % rangestr)
                to_num = fro_num
                fro_num = 1
            fro = str(fro_num)
            to = str(to_num)
        except ValueError:
            print('error occurred when parsing "%s"' % rangestr)
            continue
        sels.append('resindex %s to %s' % (fro, to))
    selstr = ' or '.join(sels)
    return selstr
示例#8
0
文件: clustenm.py 项目: SHZ66/ProDy
    def writePDB(self, filename=None, single=True, **kwargs):

        '''
        Write conformers in PDB format to a file.
        
        :arg filename: The name of the file. If it is None (default), the title of the ClustENM will be used.
        :type filename: str

        :arg single: If it is True (default), then the conformers will be saved into a single PDB file with
            each conformer as a model. Otherwise, a directory will be created with the filename,
            and each conformer will be saved as a separate PDB fle.
        :type single: bool
        '''

        if filename is None:
            filename = self.getTitle()

        if single:
            filename = writePDB(filename, self)
            LOGGER.info('PDB file saved as %s' % filename)
        else:
            direc = filename
            if isdir(direc):
                LOGGER.warn('%s is not empty; will be flooded' % direc)
            else:
                mkdir(direc)

            LOGGER.info('Saving files ...')
            for i, lab in enumerate(self.getLabels()):
                filename = '%s/%s'%(direc, lab)
                writePDB(filename, self, csets=i)
            LOGGER.info('PDB files saved in %s ...'%direc)
示例#9
0
 def calcAuxPredictions(self, aux_clsf, force_env=None):
     assert self.predictions is not None, 'Primary predictions not found.'
     assert self.featMatrix is not None, 'Features not computed.'
     assert force_env in [None, 'chain', 'reduced', 'sliced']
     # import feature subset
     clsf_dict = pickle.load(open(aux_clsf, 'rb'))
     LOGGER.info('Auxiliary Random Forest classifier imported.')
     feat_subset = clsf_dict['features']
     if force_env is not None:
         # force a given ENM environment model
         for i, f in enumerate(feat_subset):
             if f in RHAPSODY_FEATS['PDB'] and \
                (f.startswith('ANM') or f.startswith('GNM')):
                 old_env = f.split('-')[-1]
                 feat_subset[i] = f.replace(old_env, force_env)
     assert all(f in self.featSet for f in feat_subset), \
            'The new set of features must be a subset of the original one.'
     # reduce original feature matrix
     sel = [i for i, f in enumerate(self.featSet) if f in feat_subset]
     fm = self.featMatrix[:, sel]
     p_a = calcPredictions(fm, clsf_dict, SAV_coords=self.SAVcoords['text'])
     if p_a is None:
         LOGGER.warn('No additional predictions.')
         return None
     self.auxPreds = p_a
     p_o = self.predictions
     self.mixPreds = np.where(np.isnan(p_o['score']), p_a, p_o)
     return self.auxPreds, self.mixPreds
示例#10
0
文件: signature.py 项目: brezal/ProDy
def showSignatureOverlaps(mode_ensemble):

    from matplotlib.pyplot import xlabel, ylabel

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('mode_ensemble should be an instance of ModeEnsemble')

    if not mode_ensemble.isMatched():
        LOGGER.warn(
            'modes in mode_ensemble did not match cross modesets. '
            'Consider running mode_ensemble.match() prior to using this function'
        )

    overlaps = calcSignatureOverlaps(mode_ensemble, diag=True)
    r, c = np.triu_indices(overlaps.shape[1], k=1)
    overlap_triu = overlaps[:, r, c]

    meanV = overlap_triu.mean(axis=1)
    stdV = overlap_triu.std(axis=1)

    show = showSignatureAtomicLines(meanV, stdV)
    xlabel('Mode index')
    ylabel('Overlap')

    return show
示例#11
0
文件: signature.py 项目: brezal/ProDy
def calcSignatureCrossCorr(mode_ensemble, norm=True):
    """Calculate average cross-correlations for a ModeEnsemble."""

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('mode_ensemble should be an instance of ModeEnsemble')

    if not mode_ensemble.isMatched():
        LOGGER.warn(
            'modes in mode_ensemble did not match cross modesets. '
            'Consider running mode_ensemble.match() prior to using this function'
        )
    matches = mode_ensemble
    n_atoms = matches.numAtoms()
    n_sets = len(matches)

    C = np.zeros((n_sets, n_atoms, n_atoms))
    for i in range(n_sets):
        m = matches[i]
        c = calcCrossCorr(m, norm=norm)
        C[i, :, :] = c

    title_str = '%d modes' % mode_ensemble.numModes()
    weights = mode_ensemble.getWeights()
    if weights is not None:
        W = np.zeros((mode_ensemble.numModeSets(), mode_ensemble.numAtoms(),
                      mode_ensemble.numAtoms()))
        for i, w in enumerate(weights):
            w2 = np.outer(w, w)
            W[i, :, :] = w2
    labels = mode_ensemble.getLabels()

    # even the original model is 3d, cross-correlations are still 1d
    sig = sdarray(C, title=title_str, weights=W, labels=labels, is3d=False)

    return sig
示例#12
0
文件: signature.py 项目: brezal/ProDy
def calcSignatureCollectivity(mode_ensemble, masses=None):
    """Calculate average collectivities for a ModeEnsemble."""

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('mode_ensemble should be an instance of ModeEnsemble')

    if not mode_ensemble.isMatched():
        LOGGER.warn(
            'modes in mode_ensemble did not match cross modesets. '
            'Consider running mode_ensemble.match() prior to using this function'
        )

    n_modes = mode_ensemble.numModes()
    n_sets = len(mode_ensemble)

    C = np.zeros((n_sets, n_modes))
    for i in range(n_sets):
        m = mode_ensemble[i]
        c = calcCollectivity(m, masses=masses)
        C[i, :] = c

    title_str = 'collectivities of %d modes' % mode_ensemble.numModes()
    labels = mode_ensemble.getLabels()

    # even the original model is 3d, cross-correlations are still 1d
    sig = sdarray(C, title=title_str, weights=None, labels=labels, is3d=False)

    return sig
示例#13
0
文件: goa.py 项目: nffaruk/ProDy
def calcDeepFunctionOverlaps(*goa_data, **kwargs):
    """Calculate function overlaps between the deep 
    (most detailed) molecular functions in particular 
    from two sets of GO terms.

    :arg goa1: the first set of GO terms
    :type goa1: tuple, list, :class:`~numpy.ndarray`

    :arg goa2: the second set of GO terms
    :type goa2: tuple, list, :class:`~numpy.ndarray`
    """
    return_funcs = kwargs.pop('return_funcs', False)

    deepFuncs = [findDeepestFunctions(entry, **kwargs) for entry in goa_data]
    for i, entry in enumerate(deepFuncs):
        if len(entry) == 0:
            LOGGER.warn(
                'ensemble member {0} has no deep molecular functions and was omitted'
                .format(goa_data[i]._title))

    deepFuncs = [entry for entry in deepFuncs if len(entry) > 0]
    overlaps = calcGoOverlap(*deepFuncs, **kwargs)

    if return_funcs:
        return overlaps, deepFuncs

    return overlaps
示例#14
0
文件: goa.py 项目: nffaruk/ProDy
def calcEnsembleFunctionOverlaps(ens, **kwargs):
    """Calculate function overlaps for an ensemble as the 
    mean of the value from :func:`calcDeepFunctionOverlaps`.

    :arg ens: an ensemble with labels
    :type ens: :class:`Ensemble`
    """
    if not isinstance(ens, Ensemble) and not isListLike(ens):
        raise TypeError('ens should be an ensemble or list-like')

    if isinstance(ens, Ensemble):
        ids = [label[:5] for label in ens.getLabels()]
    else:
        ids = ens

    if not isinstance(ids[0], str):
        raise TypeError('ens should have labels')

    goa_ens = queryGOA(ids, **kwargs)
    for entry in goa_ens:
        if len(entry._molecular) == 0:
            LOGGER.warn(
                'ensemble member {0} has no molecular functions and was omitted'
                .format(entry._title))

    goa_ens = [entry for entry in goa_ens if len(entry._molecular) > 0]

    overlaps = calcDeepFunctionOverlaps(*goa_ens, **kwargs)

    return overlaps
示例#15
0
文件: Uniprot.py 项目: yaz62/rhapsody
 def _checkAccessionNumber(self, acc):
     if '-' in acc:
         acc = acc.split('-')[0]
         message = 'Isoforms are not allowed, the main sequence for ' + \
                   acc + ' will be used instead.'
         LOGGER.warn(message)
     return acc
示例#16
0
文件: goa.py 项目: nffaruk/ProDy
def calcMinBranchLength(go_id1, go_id2, go):
    '''Find the minimum branch length between two terms in the GO DAG.

    :arg go_id1: the first GO ID
    :type go_id1: str

    :arg go_id2: the second GO ID
    :type go_id2:str

    :arg go: object containing a gene ontology (GO) directed acyclic graph (DAG)
    :type go: `~goatools.obo_parser.GODag`
    '''
    # First get the deepest common ancestor
    dca = findDeepestCommonAncestor([go_id1, go_id2], go)
    if dca is None:
        LOGGER.warn(
            'There are no common ancestors between {0} and {1} so no meaningful distance can be calculated.'
            .format(go_id1, go_id2))
        return None

    # Then get the distance from the DCA to each term
    dca_depth = go[dca].depth
    d1 = go[go_id1].depth - dca_depth
    d2 = go[go_id2].depth - dca_depth

    # Return the total distance - i.e., to the deepest common ancestor and back.
    return d1 + d2
示例#17
0
文件: goa.py 项目: prody/ProDy
def calcEnsembleFunctionOverlaps(ens, **kwargs):
    """Calculate function overlaps for an ensemble as the 
    mean of the value from :func:`calcDeepFunctionOverlaps`.

    :arg ens: an ensemble with labels
    :type ens: :class:`Ensemble`
    """
    if not isinstance(ens, Ensemble) and not isListLike(ens):
        raise TypeError('ens should be an ensemble or list-like')

    if isinstance(ens, Ensemble):
        ids = [label[:5] for label in ens.getLabels()]
    else:
        ids = ens

    if not isinstance(ids[0], str):
        raise TypeError('ens should have labels')

    goa_ens = queryGOA(ids, **kwargs)
    for entry in goa_ens:
        if len(entry._molecular) == 0:
            LOGGER.warn(
                'ensemble member {0} has no molecular functions and was omitted'.format(entry._title))

    goa_ens = [entry for entry in goa_ens if len(entry._molecular) > 0]

    overlaps = calcDeepFunctionOverlaps(*goa_ens, **kwargs)

    return overlaps
示例#18
0
文件: goa.py 项目: prody/ProDy
def calcDeepFunctionOverlaps(*goa_data, **kwargs):
    """Calculate function overlaps between the deep 
    (most detailed) molecular functions in particular 
    from two sets of GO terms.

    :arg goa1: the first set of GO terms
    :type goa1: tuple, list, :class:`~numpy.ndarray`

    :arg goa2: the second set of GO terms
    :type goa2: tuple, list, :class:`~numpy.ndarray`
    """
    return_funcs = kwargs.pop('return_funcs', False)

    deepFuncs = [findDeepestFunctions(entry, **kwargs) for entry in goa_data]
    for i, entry in enumerate(deepFuncs):
        if len(entry) == 0:
            LOGGER.warn(
                'ensemble member {0} has no deep molecular functions and was omitted'
                .format(goa_data[i]._title))

    deepFuncs = [entry for entry in deepFuncs if len(entry) > 0]
    overlaps = calcGoOverlap(*deepFuncs, **kwargs)

    if return_funcs:
        return overlaps, deepFuncs

    return overlaps
示例#19
0
文件: goa.py 项目: prody/ProDy
def calcMinBranchLength(go_id1, go_id2, go):
    '''Find the minimum branch length between two terms in the GO DAG.

    :arg go_id1: the first GO ID
    :type go_id1: str

    :arg go_id2: the second GO ID
    :type go_id2:str

    :arg go: object containing a gene ontology (GO) directed acyclic graph (DAG)
    :type go: `~goatools.obo_parser.GODag`
    '''
    # First get the deepest common ancestor
    dca = findDeepestCommonAncestor([go_id1, go_id2], go)
    if dca is None:
        LOGGER.warn('There are no common ancestors between {0} and {1} so no meaningful distance can be calculated.'.format(
            go_id1, go_id2))
        return None

    # Then get the distance from the DCA to each term
    dca_depth = go[dca].depth
    d1 = go[go_id1].depth - dca_depth
    d2 = go[go_id2].depth - dca_depth

    # Return the total distance - i.e., to the deepest common ancestor and back.
    return d1 + d2
示例#20
0
文件: signature.py 项目: brezal/ProDy
def calcSignatureFractVariance(mode_ensemble):
    """Calculate signature fractional variance for a ModeEnsemble."""

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('mode_ensemble should be an instance of ModeEnsemble')

    if not mode_ensemble.isMatched():
        LOGGER.warn(
            'modes in mode_ensemble did not match cross modesets. '
            'Consider running mode_ensemble.match() prior to using this function'
        )

    matches = mode_ensemble
    n_sets = len(matches)

    W = []
    is3d = None
    for i in range(n_sets):
        m = matches[i]
        var = calcFractVariance(m)
        W.append(var)
        if is3d is None:
            is3d = m.is3d()

    title_str = '%d modes' % mode_ensemble.numModes()
    labels = mode_ensemble.getLabels()
    sig = sdarray(W, title=title_str, weights=None, labels=labels, is3d=is3d)

    return sig
示例#21
0
def calcMBSfromSim(simMatrix, nEvals=20, remove_outliers=True,
                   remove_offset=True, **kwargs):

    LOGGER.timeit('_MBS')
    n = simMatrix.shape[0]
    mbs = np.zeros(n) 
    for i in range(n):
        try:
            # cut "non-covalent" bonds around atom 'i'
            modSim = MBSPointMutation(simMatrix, i)
            # compute laplacian's spectrum of eigvals
            laplacian = sparse.csgraph.laplacian(modSim, normed=True)
            evals = sparse.linalg.eigsh(laplacian, k=min(nEvals, n-1), 
                                        which='SM', return_eigenvectors=False)
            # sort eigvals in ascending order
            evals = np.sort(evals)
            # compute MBS at site i
            mbs[i] = np.sum(1./evals[1:])
        except Exception as err:
            LOGGER.warn('Unable to compute MBS at position '
                        '{0}. {1}'.format(i, err))
            mbs[i] = np.nan
    if any(~np.isnan(mbs)):
        # remove outliers
        if remove_outliers is True:
            mbs = _removeOutliers(mbs, **kwargs)
        # remove offset
        if remove_offset is True:
            offset = min(mbs[~np.isnan(mbs)])
            mbs = mbs - offset 
    LOGGER.report('MBS computed in %.1fs.', '_MBS')

    return mbs
示例#22
0
    def setDrugGroup(self, group):
        """Set drug_group and update home page
        
        :arg group: group of drugs if using DrugBank
            options are ``"Approved"`` or ``"All"``. Default is ``"All"``
        :type group: str
        """
        if self.data_source == 'DrugBank':
            if group is None:
                group = 'All'
            elif not isinstance(group, str):
                raise TypeError('group must be string or None')
            elif group.lower() == 'all':
                group = 'All'
            elif group.lower() == 'approved':
                group = 'Approved'
            else:
                raise ValueError('group should be approved, all or None')

            self.drug_group = group
            if self.no_data:
                self.updateHomePage()

        elif group is not None:
            LOGGER.warn('there are no groups when using STITCH')
示例#23
0
def searchQuartataWeb(data_source=None,
                      drug_group=None,
                      input_type=None,
                      query_type=None,
                      data=None,
                      num_predictions=None,
                      browser_type=None,
                      job_id=None,
                      filename=None,
                      result_type='Chemical'):
    """Wrapper function for searching QuartataWeb.

    :arg result_type: type of results to get from QuartataWeb.
        So far only ``'Chemical'`` is supported.
    :type result_type: str

    All other arguments are the same as :class:`.QuartataWebBrowser`.
    """
    if result_type == 'Chemical':
        return QuartataChemicalRecord(data_source, drug_group, input_type,
                                      query_type, data, num_predictions,
                                      browser_type, job_id, filename)
    else:
        LOGGER.warn('No other result types are supported yet')
        return None
示例#24
0
    def __and__(self, other):

        if self is other:
            return self

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('other must be an AtomPointer')

        if self._ag != ag:
            raise ValueError('both selections must be from the same AtomGroup')

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('active coordinate set indices do not match, '
                           'so it will be set to zero in the union.')
            acsi = 0

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0

        indices = set(self._getIndices())

        indices = indices.intersection(other.getIndices())
        if indices:
            indices = unique(indices)
            if indices[-1] == atommap.DUMMY:
                indices = indices[:-1]
            return Selection(self._ag, indices, '({0}) and ({1})'
                             .format(self.getSelstr(), other.getSelstr()),
                             acsi)
示例#25
0
    def getHits(self):
        """Returns the dictionary associated with the DaliRecord"""

        if self._alignPDB is None:
            LOGGER.warn('Dali Record does not have any data yet. Please run fetch.')

        return self._alignPDB
示例#26
0
    def __or__(self, other):

        if self is other:
            return self

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('other must be an AtomPointer')

        if self._ag != ag:
            raise ValueError('both selections must be from the same AtomGroup')

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0

        indices = unique(concatenate(
            (self._getIndices(), other._getIndices())))
        if indices[-1] == atommap.DUMMY:
            indices = indices[:-1]
        return Selection(self._ag,
                         indices,
                         '({0}) or ({1})'.format(self.getSelstr(),
                                                 other.getSelstr()),
                         acsi,
                         unique=True)
示例#27
0
def pathRhapsodyFolder(folder=None):
    """Returns or sets path of local folder where files and pickles necessary
    to run Rhapsody will be stored. To release the current folder, pass an
    invalid path, e.g. ``folder=''``.
    """
    if folder is None:
        folder = SETTINGS.get('rhapsody_local_folder')
        if folder:
            if isdir(folder):
                return folder
            else:
                LOGGER.warn('Local folder {} is not accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local Rhapsody folder is set: {}'.format(
                repr(folder)))
            SETTINGS['rhapsody_local_folder'] = folder
            SETTINGS.save()
        else:
            current = SETTINGS.pop('rhapsody_local_folder')
            if current:
                LOGGER.info('Rhapsody folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{} is not a valid path.'.format(repr(folder)))
示例#28
0
def mapOntoChainByAlignment(atoms, chain, **kwargs):
    """This function is similar to :func:`.mapOntoChain` but correspondence 
    of chains is found by alignment provided. 
    
    :arg alignments: A list of predefined alignments. It can be also a 
        dictionary or :class:`MSA` instance where the keys or 
        labels are the title of *atoms* or *chains*. 
    :type alignments: list, dict, :class:`MSA`
    """

    alignments = kwargs.pop('alignments', None)
    if alignments is None:
        return mapOntoChain(atoms, chain, **kwargs)
    else:
        if isinstance(alignments, (MSA, dict)):
            refseq = str(alignments[chain.getTitle()])
            tarseq = str(alignments[atoms.getTitle()])
            alignment = [refseq, tarseq]
        else:
            index = kwargs.pop('index', 0)
            alignment = alignments[index]

        tar_aligned_seq = alignment[-1]
        for char in GAPCHARS:
            tar_aligned_seq = tar_aligned_seq.replace(char, '').upper()
        hv = atoms.getHierView()
        for target_chain in hv.iterChains():
            tar_seq = target_chain.getSequence().upper()
            if tar_seq == tar_aligned_seq:
                mappings = mapOntoChain(target_chain, chain, pwalign=alignment, **kwargs)
                return mappings
        LOGGER.warn('The sequence of chain does not match that in alignment (%s).'%atoms.getTitle())
    return []
示例#29
0
 def _isSaturationMutagenesis(self, queryUniprot=False):
     assert self._isColSet('SAV coords'), 'SAV list not set.'
     if self.saturation_mutagenesis is None:
         self.saturation_mutagenesis = False
         try:
             SAVs = self.getUniqueSAVcoords()
             SAV_list = list(SAVs['unique SAV coords'])
             acc = list(set(SAVs['Uniprot ID']))
             if len(acc) != 1:
                 raise RuntimeError('Multiple accession numbers found')
             else:
                 acc = acc[0]
             pos = list(set(SAVs['position']))
             if len(pos) == 1:
                 query = f'{acc} {pos[0]}'
             else:
                 query = acc
             # generate target scanning list
             if queryUniprot:
                 target_SAV_list = Uniprot.seqScanning(query)
             else:
                 seq = ''.join(SAVs['wt. aa'][range(0, len(SAVs), 19)])
                 target_SAV_list = Uniprot.seqScanning(query, sequence=seq)
             if SAV_list == target_SAV_list:
                 self.saturation_mutagenesis = True
             else:
                 raise RuntimeError('Missing SAVs detected.')
         except Exception as e:
             LOGGER.warn(f'Not a saturation mutagenesis list: {e}')
     return self.saturation_mutagenesis
示例#30
0
    def __getitem__(self, index):
        """A list or tuple of integers can be used for indexing."""

        if self._n_modes == 0:
            raise ValueError('{0} modes are not calculated, use '
                             'calcModes() method'.format(str(self)))
        if isinstance(index, slice):
            if (index.stop is not None and index.stop > self.numModes()) or (
                    index.start is not None and index.start > self.numModes()):
                LOGGER.warn(
                    'The selection index contains a higher number than the total mode number ({0})'
                    .format(self.numModes()))
            indices = np.arange(*index.indices(len(self)))
            if len(indices) > 0:
                return ModeSet(self, indices)
        elif isinstance(index, (list, tuple, np.ndarray)):
            if len(index) == 1:
                return self._getMode(index[0])
            return ModeSet(self, index)
        try:
            index = int(index)
        except Exception:
            raise IndexError('indices must be int, slice, list, or tuple')
        else:
            return self._getMode(index)
示例#31
0
文件: cath.py 项目: fongchun/ProDy
def range2selstr(rangestr):
    if rangestr.strip() == '':
        return None
    frags = rangestr.split(',')
    sels = []
    for frag in frags:
        try:
            fromtos = frag.split('-')
            if len(fromtos) == 2:
                fro, to = fromtos
            else:
                LOGGER.warn('range "%s" is irregular'%rangestr)
                fro = '1'
                to = fromtos[-1]
            fro_num = intResnum(fro)
            to_num = intResnum(to)

            if fro_num > to_num:
                LOGGER.warn('range "%s" is irregular'%rangestr)
                to_num = fro_num
                fro_num = 1
            fro = str(fro_num)
            to = str(to_num)
        except ValueError:
            print('error occurred when parsing "%s"'%rangestr)
            continue
        sels.append('resnum %s to %s'%(fro, to))
    selstr = ' or '.join(sels)
    return selstr
示例#32
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = float(kwargs.get('cutoff', 1.20))
    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))

    ag = None
    title_suffix = ''
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else: 
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    hd = None
    LOGGER.warn('Building coordinates from electron density map. This may take a while.')
    LOGGER.timeit()
    _parseEMDLines(ag, stream, cutoff=cutoff, n_nodes=n_nodes, num_iter=num_iter, format='EMD')
    LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                         ag.numCoordsets() - n_csets))
    return ag
示例#33
0
文件: compare.py 项目: prody/ProDy
def mapOntoChainByAlignment(atoms, chain, **kwargs):
    """This function is similar to :func:`.mapOntoChain` but correspondence 
    of chains is found by alignment provided. 
    
    :arg alignments: A list of predefined alignments. It can be also a 
        dictionary or :class:`MSA` instance where the keys or 
        labels are the title of *atoms* or *chains*. 
    :type alignments: list, dict, :class:`MSA`
    """

    alignments = kwargs.pop('alignments', None)
    if alignments is None:
        return mapOntoChain(atoms, chain, **kwargs)
    else:
        if isinstance(alignments, (MSA, dict)):
            refseq = str(alignments[chain.getTitle()])
            tarseq = str(alignments[atoms.getTitle()])
            alignment = [refseq, tarseq]
        else:
            index = kwargs.pop('index', 0)
            alignment = alignments[index]

        tar_aligned_seq = alignment[-1]
        for char in GAPCHARS:
            tar_aligned_seq = tar_aligned_seq.replace(char, '').upper()
        hv = atoms.getHierView()
        for target_chain in hv.iterChains():
            tar_seq = target_chain.getSequence().upper()
            if tar_seq == tar_aligned_seq:
                mappings = mapOntoChain(target_chain, chain, pwalign=alignment, **kwargs)
                return mappings
        LOGGER.warn('The sequence of chain does not match that in alignment (%s).'%atoms.getTitle())
    return []
示例#34
0
    def getESSAEnsemble(self):

        'Returns ESSA mode ensemble, comprised of ENMS calculated for each scanned/perturbed residue.'

        if self._lowmem:
            LOGGER.warn('ModeEnsemble was not generated due to lowmem=True')
        else:
            return self._ensemble[:]
示例#35
0
    def getParticularSMILES(self, key):
        """Returns SMILES for a particular chemical"""
        if not self.isSuccess:
            LOGGER.warn(
                'Quartata Chemical Record does not have any data yet.'
                'Please run fetch again, possibly with different parameters.')

        return self._chemDict[key]['SMILES']
示例#36
0
def _try_import_matplotlib():
    try:
        import matplotlib as plt
        plt.rcParams.update({'font.size': 20, 'font.family': 'Arial'})
    except ImportError:
        LOGGER.warn('matplotlib is required for generating figures')
        return None
    return plt
示例#37
0
    def saveESSAEnsemble(self):

        'Saves ESSA mode ensemble, comprised of ENMS calculated for each scanned/perturbed residue.'

        if self._lowmem:
            LOGGER.warn('ModeEnsemble was not generated due to lowmem=True')
        else:
            saveModeEnsemble(self._ensemble, filename='{}_{}'.format(self._title, self._enm))
示例#38
0
文件: goa.py 项目: nffaruk/ProDy
def parseOBO(**kwargs):
    """Parse a GO OBO file containing the GO itself.
    See `OBO`_ for more information on the file format.

    .. _OBO: http://owlcollab.github.io/oboformat/doc/obo-syntax.html
    """
    try:
        from goatools import obo_parser
    except:
        raise ImportError('GOATools needs to be installed to use parseOBO')

    go_obo_url = kwargs.get('go_obo_url', None)
    if go_obo_url is None:
        go_obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

    data_folder = kwargs.get('data_folder', None)
    if data_folder is None:
        data_folder = os.getcwd() + '/Data'

    # Check if we have the ./data directory already
    if (not os.path.isfile(data_folder)):
        # Emulate mkdir -p (no error if folder exists)
        try:
            os.mkdir(data_folder)
        except OSError as e:
            if (e.errno != 17):
                raise e
    else:
        raise Exception(
            'Data path (' + data_folder + ') exists as a file. '
            'Please rename, remove or change the desired location of the data path.'
        )

    # Check if the file exists already
    if (not os.path.isfile(data_folder + '/go-basic.obo')):
        try:
            handle = openURL(go_obo_url)
        except Exception as err:
            LOGGER.warn('{0} download failed ({1}).'.format(
                go_obo_url, str(err)))
        else:
            data = handle.read()
            if len(data):
                filename = data_folder + '/go-basic.obo'

                with open(filename, 'w+b') as obofile:
                    obofile.write(data)

                LOGGER.debug('{0} downloaded ({1})'.format(
                    go_obo_url, sympath(filename)))
            else:
                LOGGER.warn(
                    '{0} download failed, reason unknown.'.format(go_obo_url))

    else:
        go_obo = data_folder + '/go-basic.obo'

    return obo_parser.GODag(go_obo)
示例#39
0
    def getPDBs(self, filtered=True):
        """Returns PDB list (filters may be applied)"""

        if self._alignPDB is None:
            LOGGER.warn('Dali Record does not have any data yet. Please run fetch.')
        
        if filtered:
            return self._pdbList
        return self._pdbListAll
示例#40
0
文件: goa.py 项目: prody/ProDy
def parseOBO(**kwargs):
    """Parse a GO OBO file containing the GO itself.
    See `OBO`_ for more information on the file format.

    .. _OBO: http://owlcollab.github.io/oboformat/doc/obo-syntax.html
    """
    try:
        from goatools import obo_parser
    except:
        raise ImportError('GOATools needs to be installed to use parseOBO')

    go_obo_url = kwargs.get('go_obo_url', None)
    if go_obo_url is None:
        go_obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

    data_folder = kwargs.get('data_folder', None)
    if data_folder is None:
        data_folder = os.getcwd() + '/Data'

    # Check if we have the ./data directory already
    if(not os.path.isfile(data_folder)):
        # Emulate mkdir -p (no error if folder exists)
        try:
            os.mkdir(data_folder)
        except OSError as e:
            if(e.errno != 17):
                raise e
    else:
        raise Exception('Data path (' + data_folder + ') exists as a file. '
                        'Please rename, remove or change the desired location of the data path.')

    # Check if the file exists already
    if(not os.path.isfile(data_folder+'/go-basic.obo')):
        try:
            handle = openURL(go_obo_url)
        except Exception as err:
            LOGGER.warn('{0} download failed ({1}).'.format(
                go_obo_url, str(err)))
        else:
            data = handle.read()
            if len(data):
                filename = data_folder+'/go-basic.obo'

                with open(filename, 'w+b') as obofile:
                    obofile.write(data)

                LOGGER.debug('{0} downloaded ({1})'
                             .format(go_obo_url, sympath(filename)))
            else:
                LOGGER.warn('{0} download failed, reason unknown.'
                            .format(go_obo_url))

    else:
        go_obo = data_folder+'/go-basic.obo'

    return obo_parser.GODag(go_obo)
示例#41
0
文件: atomgroup.py 项目: npabon/ProDy
    def _setCoords(self, coords, label=None, overwrite=False):
        """Set coordinates without data type checking.  *coords* must
        be a :class:`~numpy.ndarray`, but may have data type other than
        :class:`numpy.float64`, e.g. :class:`numpy.float32`.  *label*
        argument may be used to label coordinate sets.  *label* may be
        a string or a list of strings length equal to the number of
        coordinate sets."""

        n_atoms = self._n_atoms
        if n_atoms:
            if coords.shape[-2] != n_atoms:
                raise ValueError('coords array has incorrect number of atoms')
        else:
            self._n_atoms = n_atoms = coords.shape[-2]

        ndim = coords.ndim
        shape = coords.shape
        if self._coords is None or overwrite or (ndim == 3 and shape[0] > 1):
            if ndim == 2:
                self._coords = coords.reshape((1, n_atoms, 3))
                if label is None:
                    self._cslabels = [None]
                else:
                    self._cslabels = [str(label)]
                self._n_csets = n_csets = 1

            else:
                self._coords = coords
                self._n_csets = n_csets = shape[0]

                if label is None or isinstance(label, str):
                    self._cslabels = [label] * n_csets

                elif isinstance(label, (list, tuple)):
                    if len(label) == n_csets:
                        self._cslabels = list(label)

                    else:
                        self._cslabels = [None] * n_csets
                        LOGGER.warn('Number of labels does not match number '
                                    'of coordinate sets.')
                else:
                    LOGGER.warn('Wrong type for `label` argument.')
            self._acsi = 0
            self._setTimeStamp()

        else:
            acsi = self._acsi
            if ndim == 2:
                self._coords[acsi] = coords
            else:
                self._coords[acsi] = coords[0]
            self._setTimeStamp(acsi)
            if label is not None:
                self._cslabels[acsi] = str(label)
示例#42
0
文件: norm.py 项目: fongchun/ProDy
def SCN(M, **kwargs):
    la = importLA()
    total_count = kwargs.pop('total_count', None)
    max_loops = kwargs.pop('max_loops', 100)
    tol = kwargs.pop('tol', 1e-5)

    N = M.copy()
    n = 0
    d0 = None
    p = 1
    last_p = None

    while True:
        C = np.diag(div0(1., np.sum(N, axis=0)))
        N = np.dot(N, C)

        R = np.diag(div0(1., np.sum(N, axis=1)))
        N = np.dot(R, N)

        n += 1

        # check convergence of symmetry
        d = np.mean(np.abs(N - N.T))
        
        if d0 is not None:
            p = div0(d, d0)
            dp = np.abs(p - last_p)
            if dp < tol:
                break
        else:
            d0 = d
        LOGGER.debug('Iteration {0}: d = {1}, p = {2}'.format(str(n), str(d), str(p)))
        last_p = p
        
        if max_loops is not None:
            if n >= max_loops:
                LOGGER.warn('The SCN algorithm did not converge after {0} '
                            'iterations.'.format(max_loops))
                break
    # guarantee symmetry
    N = (N + N.T) / 2.
    if total_count is 'original':
        total_count = np.sum(M)

    if total_count is not None:
        sum_N = np.sum(N)
        k = total_count / sum_N
        N = N * k
    return N
示例#43
0
文件: emdfile.py 项目: prody/ProDy
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Any object with the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map',True)
    make_nodes = kwargs.get('make_nodes',False)

    if map is False and make_nodes is False:
        LOGGER.warn('At least one of map and make_nodes should be True. '
                    'Setting map to False was an intentional change from the default '
                    'behaviour so make_nodes has been set to True.')
        make_nodes = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
    atomgroup._n_atoms = n_nodes

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
示例#44
0
    def __add__(self, other):

        if not isinstance(other, AtomGroup):
            raise TypeError('unsupported operand type(s) for +: {0} and '
                            '{1}'.format(repr(type(self).__name__),
                                         repr(type(other).__name__)))

        new = AtomGroup(self._title + ' + ' + other._title)
        if self._n_csets:
            if self._n_csets == other._n_csets:
                new.setCoords(np.concatenate((self._coords, other._coords), 1))
                if self._n_csets > 1:
                    LOGGER.info('All {0} coordinate sets are copied to '
                                '{1}.'.format(self._n_csets, new.getTitle()))
            else:
                new.setCoords(np.concatenate((self._getCoords(),
                                              other._getCoords())))
                LOGGER.info('Active coordinate sets are copied to {0}.'
                            .format(new.getTitle()))
        elif other._n_csets:
            LOGGER.warn('No coordinate sets are copied to {0}'
                        .format(new.getTitle()))

        for key in set(list(self._data) + list(other._data)):
            if key in ATOMIC_FIELDS and ATOMIC_FIELDS[key].readonly:
                continue
            this = self._data.get(key)
            that = other._data.get(key)
            if this is not None or that is not None:
                if this is None:
                    shape = list(that.shape)
                    shape[0] = len(self)
                    this = np.zeros(shape, that.dtype)
                if that is None:
                    shape = list(this.shape)
                    shape[0] = len(other)
                    that = np.zeros(shape, this.dtype)
                new._data[key] = np.concatenate((this, that))

        if self._bonds is not None and other._bonds is not None:
            new.setBonds(np.concatenate([self._bonds,
                                         other._bonds + self._n_atoms]))
        elif self._bonds is not None:
            new.setBonds(self._bonds.copy())
        elif other._bonds is not None:
            new.setBonds(other._bonds + self._n_atoms)

        return new
示例#45
0
文件: atomgroup.py 项目: npabon/ProDy
    def addCoordset(self, coords, label=None):
        """Add a coordinate set.  *coords* argument may be an object with
        :meth:`getCoordsets` method."""

        if self._coords is None:
            return self.setCoords(coords)

        n_atoms = self._n_atoms
        atoms = coords
        try:
            coords = (atoms._getCoordsets()
                      if hasattr(coords, '_getCoordsets') else
                      atoms.getCoordsets())
        except AttributeError:
            pass
        else:
            if coords is None:
                raise ValueError('coordinates of {0} are not set'
                                 .format(str(atoms)))

        try:
            checkCoords(coords, csets=True, natoms=n_atoms, dtype=None)
        except TypeError:
            raise TypeError('coords must be a numpy array or an '
                            'object with `getCoords` method')

        if coords.ndim == 2:
            coords = coords.reshape((1, n_atoms, 3))

        diff = coords.shape[0]
        self._coords = np.concatenate((self._coords, coords), axis=0)
        self._n_csets = self._coords.shape[0]
        timestamps = self._timestamps
        self._timestamps = np.zeros(self._n_csets)
        self._timestamps[:len(timestamps)] = timestamps
        self._timestamps[len(timestamps):] = time()
        self._kdtrees.extend([None] * diff)
        if label is None or isinstance(label, str):
            self._cslabels.extend([label] * diff)
        elif isinstance(label, (list, tuple)):
            if len(label) == diff:
                self._cslabels.extend([str(lbl) for lbl in label])
            else:
                LOGGER.warn('Number of labels does not match number '
                            'of coordinate sets.')
        else:
            LOGGER.warn('Wrong type for `label` argument.')
示例#46
0
def prody_select(selstr, *pdbs, **kwargs):
    """Write selected atoms from a PDB file in PDB format.
    
    :arg selstr: atom selection string, see :ref:`selections`
    
    :arg pdbs: :term:`PDB` identifier(s) or filename(s)
    
    :arg output: output filename, default is :file:`pdb_selected.pdb`

    :arg prefix: prefix for output file, default is PDB filename
    
    :arg suffix: output filename suffix, default is :file:`_selected`"""

    from os.path import isfile
    from prody import LOGGER, parsePDB, writePDB
    
    #selstr = kwargs.get('selstr')
    if not pdbs:
        raise ValueError('pdb argument must be provided')

    if ((isfile(selstr) or len(selstr) == 4 and selstr[0].isdigit()) and 
        len(pdbs) == 1 and not isfile(pdbs[0])):
        pdbs, selstr = selstr, pdbs[0]
        LOGGER.warn('The order of selstr and pdb arguments have switched '
                    'to support multiple files, old order will be supported '
                    'until v1.4.')
        pdbs = [pdbs]

    prefix = kwargs.get('prefix', None)
    suffix = kwargs.get('suffix', '_selected')
    output = kwargs.get('output', None)
    
    for pdb in pdbs:    
        pdb = parsePDB(pdb)
            
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warn('Selection {0:s} did not match any atoms.'
                        .format(repr(selstr)))
            return
        LOGGER.info('Selection {0:s} matched {1:d} atoms.'
                    .format(repr(selstr), len(pdbselect)))

        outname = output or ((prefix or pdb.getTitle()) + suffix)
        LOGGER.info('Selection is written into: ' + 
                    writePDB(outname, pdbselect))
示例#47
0
文件: localpdb.py 项目: sixpi/ProDy
def pathPDBFolder(folder=None, divided=False):
    """Returns or specify local PDB folder for storing PDB files downloaded from
    `wwPDB <http://www.wwpdb.org/>`_ servers.  Files stored in this folder can
    be accessed via :func:`.fetchPDB` from any working directory.  To release
    the current folder, pass an invalid path, e.g. ``folder=''``.

    If *divided* is **True**, the divided folder structure of wwPDB servers
    will be assumed when reading from and writing to the local folder.  For
    example, a structure with identifier **1XYZ** will be present as
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`.

    If *divided* is **False**, a plain folder structure will be expected and
    adopted when saving files.  For example, the same structure will be
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.

    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""

    if folder is None:
        folder = SETTINGS.get('pdb_local_folder')
        if folder:
            if isdir(folder):
                return folder, SETTINGS.get('pdb_local_divided', True)
            else:
                LOGGER.warn('PDB local folder {0} is not a accessible.'
                            .format(repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder)))
            if divided:
                LOGGER.info('wwPDB divided folder structure will be assumed.')
            else:
                LOGGER.info('A plain folder structure will be assumed.')
            SETTINGS['pdb_local_folder'] = folder
            SETTINGS['pdb_local_divided'] = bool(divided)
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_local_folder')
            if current:
                LOGGER.info('PDB folder {0} is released.'
                            .format(repr(current)))
                SETTINGS.pop('pdb_local_divided')
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(folder)))
示例#48
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`listPDBCluster`."""
    
    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)
    
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, '_prody_fetchPDBClusters')
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
示例#49
0
文件: pdbfile.py 项目: prody/ProDy
def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
    altloc_keys = list(altloc)
    altloc_keys.sort()
    indices = {}
    for key in altloc_keys:
        xyz = atomgroup.getCoords()
        success = 0
        lines = altloc[key]
        for line, i in lines:
            aan = line[12:16].strip()
            arn = line[17:21].strip()
            ach = line[21]
            ari = int(line[22:26].split()[0])
            rn, ids, ans = indices.get((ach, ari), (None, None, None))
            if ids is None:
                ids = indices.get(ach, None)
                if ids is None:
                    ids = (chainids == ach).nonzero()[0]
                    indices[ach] = ids
                ids = ids[resnums[ids] == ari]
                if len(ids) == 0:
                    LOGGER.warn("failed to parse altloc {0} at line {1}, "
                                "residue not present for altloc 'A'".format(
                                repr(key), i+1))
                    continue
                rn = resnames[ids[0]]
                ans = atomnames[ids]
                indices[(ach, ari)] = (rn, ids, ans)
            if rn != arn:
                LOGGER.warn("failed to parse altloc {0} at line {1}, "
                            "residue name mismatch (expected {2}, "
                            "parsed {3})".format(repr(key), i+1, repr(rn),
                                                   repr(arn)))
                continue
            index = ids[(ans == aan).nonzero()[0]]
            if len(index) != 1:
                LOGGER.warn("failed to parse altloc {0} at line {1}, atom"
                            " {2} not found in the residue"
                            .format(repr(key), i+1, repr(aan)))
                continue
            try:
                xyz[index[0], 0] = float(line[30:38])
                xyz[index[0], 1] = float(line[38:46])
                xyz[index[0], 2] = float(line[46:54])
            except:
                LOGGER.warn('failed to parse altloc {0} at line {1}, could'
                            ' not read coordinates'.format(repr(key), i+1))
                continue
            success += 1
        LOGGER.info('{0} out of {1} altloc {2} lines were parsed.'
                    .format(success, len(lines), repr(key)))
        if success > 0:
            LOGGER.info('Altloc {0} is appended as a coordinate set to '
                        'atomgroup {1}.'.format(repr(key), atomgroup.getTitle()))
            atomgroup.addCoordset(xyz, label='altloc ' + key)
示例#50
0
文件: wwpdb.py 项目: fongchun/ProDy
def checkIdentifiers(*pdb):
    """Check whether *pdb* identifiers are valid, and replace invalid ones
    with **None** in place."""

    identifiers = []
    append = identifiers.append
    for pid in pdb:
        try:
            pid = pid.strip().lower()
        except AttributeError:
            LOGGER.warn('{0} is not a valid identifier.'.format(repr(pid)))
            append(None)
        else:
            if not (len(pid) == 4 and pid.isalnum()):
                LOGGER.warn('{0} is not a valid identifier.'
                            .format(repr(pid)))
                append(None)
            else:
                append(pid)
    return identifiers
示例#51
0
 def __or__(self, other):
     
     if self is other:
         return self
 
     if not isinstance(other, AtomPointer):
         raise TypeError('other must be an AtomPointer')
         
     if self._ag != other.getAtomGroup():
         raise ValueError('both selections must be from the same AtomGroup')
         
     acsi = self.getACSIndex()
     if acsi != other.getACSIndex():
         LOGGER.warn('Active coordinate set indices do not match, it will '
                     'be set to zero.')
         acsi = 0
         
     indices = np.unique(np.concatenate((self._getIndices(), 
                                         other._getIndices())))
     return Selection(self._ag, indices, '({0:s}) or ({1:s})'.format(
                                 self.getSelstr(), other.getSelstr()), 
                                 acsi, unique=True)
示例#52
0
文件: flags.py 项目: fongchun/ProDy
def addNonstdAminoacid(resname, *properties):
    """Add non-standard amino acid *resname* with *properties* selected from:

      * {props}

    .. ipython:: python

       addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large',
       'polar', 'surface')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    resname = str(resname)
    if len(resname) > 4:
        LOGGER.warn('Residue name {0} is unusually long.'
                    .format(repr(resname)))
    propset = set(properties)
    for cat, val in CATEGORIES.items():
        intersection = val.intersection(propset)
        if intersection:
            if len(intersection) > 1:
                raise ValueError('amino acid properties {0} cannot be '
                                 'present together'
                                 .format(', '.join([repr(prp)
                                                    for prp in intersection])))
            for prop in intersection:
                propset.remove(prop)
    if propset:
        raise ValueError('amino acid property {0} is not valid'
                         .format(repr(propset.pop())))

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    nonstd[resname] = set(properties)
    updateNonstandard(nonstd)
示例#53
0
文件: nma.py 项目: fongchun/ProDy
    def _getTrace(self):
        """Returns trace, and emit a warning message if trace is calculated
        using eigenvalues of a subset of variances (eigenvalues or inverse
        eigenvalues)."""

        trace = self._trace
        if trace is None:
            if self._vars is None:
                raise ValueError('variances are not set or calculated')
            trace = self._vars.sum()
            diff = self._dof - self._n_modes
            if self._is3d and diff > 6:
                diff = True
            elif diff > 1:
                diff = True
            else:
                diff = False
            if diff:
                from prody import LOGGER
                LOGGER.warn('Total variance for {0} is calculated using '
                            '{1} available modes out of {2} possible.'
                            .format(str(self), self._n_modes, self._dof))
        return trace
示例#54
0
文件: compare.py 项目: prody/ProDy
def getDictMapping(target, chain, map_dict):
    """Returns lists of matching residues (based on *map_dict*)."""

    pdbid = chain._chain.getTitle()[:4].lower()
    chid = chain._chain.getChid().upper()
    key = pdbid + chid

    mapping = map_dict.get(key)
    if mapping is None:
        LOGGER.warn('map_dict does not have the mapping for {0}'.format(key))
        return None

    tar_indices = mapping[0]
    chn_indices = mapping[1]

    chain_res_list = [res for res in chain]

    amatch = []
    bmatch = []
    n_match = 0
    n_mapped = 0
    for i, a in enumerate(target):  
        ares = a.getResidue()
        amatch.append(ares)
        if i in tar_indices:
            try:
                n = tar_indices.index(i)
            except IndexError:
                LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)'
                            .format(max(tar_indices)+1, len(chain_res_list), target.getTitle()))
                return None
            try:
                b = chain_res_list[chn_indices[n]]
            except IndexError:
                LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)'
                            .format(max(chn_indices)+1, len(chain_res_list), chain.getTitle()))
                return None
            bres = b.getResidue()
            bmatch.append(bres)
            if a.getResname() == b.getResname():
                n_match += 1
            n_mapped += 1
        else:
            bmatch.append(None)

    return amatch, bmatch, n_match, n_mapped
示例#55
0
def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False):
    """Align PDB files using transformations from *ensemble*, which may be
    a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of
    the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to
    determine the PDB structure and model number.  First four characters of
    the label is expected to be the PDB identifier and ending numbers to be the
    model number.  For example, the :class:`.Transformation` from conformation
    with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th
    model of structure **2k39**.  After applicable transformations are made,
    structure will be written into *outputdir* as :file:`2k39_aligned.pdb`.
    If *gzip* is **True**, output files will be compressed.  Return value is
    the output filename or list of filenames, in the order files are processed.
    Note that if multiple models from a file are aligned, that filename will
    appear in the list multiple times."""

    if not isinstance(ensemble, (PDBEnsemble, PDBConformation)):
        raise TypeError('ensemble must be a PDBEnsemble or PDBConformation')
    if isinstance(ensemble, PDBConformation):
        ensemble = [ensemble]
    if gzip:
        gzip = '.gz'
    else:
        gzip = ''
    output = []
    pdbdict = {}
    for conf in ensemble:
        trans = conf.getTransformation()
        if trans is None:
            raise ValueError('transformations are not calculated, call '
                             '`superpose` or `iterpose`')
        label = conf.getLabel()

        pdb = label[:4]
        filename = pdbdict.get(pdb, fetchPDB(pdb))
        if filename is None:
            LOGGER.warning('PDB file for conformation {0} is not found.'
                           .format(label))
            output.append(None)
            continue
        LOGGER.info('Parsing PDB file {0} for conformation {1}.'
                    .format(pdb, label))

        acsi = None
        model = label.rfind('m')
        if model > 3:
            model = label[model+1:]
            if model.isdigit():
                acsi = int(model) - 1
            LOGGER.info('Applying transformation to model {0}.'
                        .format(model))

        if isinstance(filename, str):
            ag = parsePDB(filename)
        else:
            ag = filename

        if acsi is not None:
            if acsi >= ag.numCoordsets():
                LOGGER.warn('Model number {0} for {1} is out of range.'
                            .format(model, pdb))
                output.append(None)
                continue
            ag.setACSIndex(acsi)
        trans.apply(ag)
        outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip)
        if ag.numCoordsets() > 1:
            pdbdict[pdb] = ag
        else:
            writePDB(outfn, ag)
        output.append(os.path.normpath(outfn))

    for pdb, ag in pdbdict.items():  # PY3K: OK
        writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag)
    if len(output) == 1:
        return output[0]
    else:
        return output
示例#56
0
def searchPfam(query, **kwargs):
    """Return Pfam search results in a dictionary.  Matching Pfam accession
    as keys will map to evalue, alignment start and end residue positions.

    :arg query: UniProt ID, PDB identifier, protein sequence, or a sequence
        file, sequence queries must not contain without gaps and must be at
        least 16 characters long
    :type query: str

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60
    :type timeout: int

    *query* can also be a PDB identifier, e.g. ``'1mkp'`` or ``'1mkpA'`` with
    chain identifier.  UniProt ID of the specified chain, or the first
    protein chain will be used for searching the Pfam database."""

    prefix = "{http://pfam.xfam.org/}"
    query = str(query)
    if isfile(query):
        from prody.sequence import MSAFile

        try:
            seq = next(MSAFile(query))
        except:
            with openFile(query) as inp:
                seq = "".join(inp.read().split())
        else:
            seq = seq[0][1]
        if not seq.isalpha():
            raise ValueError("could not parse a sequence without gaps from " + query)
    else:
        seq = "".join(query.split())

    import xml.etree.cElementTree as ET

    LOGGER.timeit("_pfam")
    timeout = int(kwargs.get("timeout", 60))
    if len(seq) >= MINSEQLEN:
        if not seq.isalpha():
            raise ValueError(repr(seq) + " is not a valid sequence")

            fseq = ">Seq\n" + seq
            parameters = {"hmmdb": "pfam", "seq": fseq}
            enc_params = urllib.urlencode(parameters)
            request = urllib2.Request("http://hmmer.janelia.org/search/hmmscan", enc_params)

            url = urllib2.urlopen(request).geturl() + "?output=xml"
        LOGGER.debug('Submitted Pfam search for sequence "{0}...".'.format(seq[:MINSEQLEN]))

        xml = openURL(url, timeout=timeout).read()

        try:
            root = ET.XML(xml)
        except Exception as err:
            raise ValueError("failed to parse results XML, check URL: " + url)
            matches = {}
            for child in root[0]:
                if child.tag == "hits":
                    accession = child.get("acc")
                    pfam_id = accession.split(".")[0]
                    matches[pfam_id] = {}
                    matches[pfam_id]["accession"] = accession
                    matches[pfam_id]["class"] = "Domain"
                    matches[pfam_id]["id"] = child.get("name")
                    matches[pfam_id]["locations"] = {}
                    matches[pfam_id]["locations"]["ali_end"] = child[0].get("alisqto")
                    matches[pfam_id]["locations"]["ali_start"] = child[0].get("alisqfrom")
                    matches[pfam_id]["locations"]["bitscore"] = child[0].get("bitscore")
                    matches[pfam_id]["locations"]["end"] = child[0].get("alisqto")
                    matches[pfam_id]["locations"]["evalue"] = child.get("evalue")
                    matches[pfam_id]["locations"]["evidence"] = "hmmer v3.0"
                    matches[pfam_id]["locations"]["hmm_end"] = child[0].get("alihmmto")
                    matches[pfam_id]["locations"]["hmm_start"] = child[0].get("alihmmfrom")
                    matches[pfam_id]["locations"]["significant"] = child[0].get("significant")
                    matches[pfam_id]["locations"]["start"] = child[0].get("alisqfrom")
                    matches[pfam_id]["type"] = "Pfam-A"
                return matches

    else:
        if len(seq) <= 5:
            idcode = None
            from prody import parsePDBHeader

            try:
                polymers = parsePDBHeader(seq[:4], "polymers")
            except Exception as err:
                LOGGER.warn("failed to parse header for {0} ({1})".format(seq[:4], str(err)))
            else:
                chid = seq[4:].upper()
                for poly in polymers:
                    if chid and poly.chid != chid:
                        continue
                    for dbref in poly.dbrefs:
                        if dbref.database != "UniProt":
                            continue
                        idcode = dbref.idcode
                        LOGGER.info(
                            "UniProt ID code {0} for {1} chain " "{2} will be used.".format(idcode, seq[:4], poly.chid)
                        )
                        break
                    if idcode is not None:
                        break
            if idcode is None:
                LOGGER.warn("A UniProt ID code for PDB {0} could not be " "parsed.".format(repr(seq)))
                url = "http://pfam.xfam.org/protein/" + seq + "?output=xml"
            else:
                url = "http://pfam.xfam.org/protein/" + idcode + "?output=xml"

        else:
            url = "http://pfam.xfam.org/protein/" + seq + "?output=xml"

    LOGGER.debug("Retrieving Pfam search results: " + url)
    xml = None
    while LOGGER.timing("_pfam") < timeout:
        try:
            xml = openURL(url, timeout=timeout).read()
        except Exception:
            pass
        else:
            if xml:
                break

    if not xml:
        raise IOError("Pfam search timed out or failed to parse results " "XML, check URL: " + url)
    else:
        LOGGER.report("Pfam search completed in %.2fs.", "_pfam")

    if xml.find(b"There was a system error on your last request.") > 0:
        LOGGER.warn("No Pfam matches found for: " + seq)
        return None

    try:
        root = ET.XML(xml)
    except Exception as err:
        raise ValueError("failed to parse results XML, check URL: " + url)

    if len(seq) >= MINSEQLEN:
        try:
            xml_matches = root[0][0][0][0]
        except IndexError:
            raise ValueError("failed to parse results XML, check URL: " + url)
    else:
        results = dictElement(root[0], prefix)
        try:
            xml_matches = results["matches"]
        except KeyError:
            raise ValueError("failed to parse results XML, check URL: " + url)

    matches = dict()
    for child in xml_matches:

        try:
            accession = child.attrib["accession"][:7]
        except KeyError:
            raise ValueError("failed to parse results XML, check URL: " + url)

        if not re.search("^P(F|B)[0-9]{5}$", accession):
            raise ValueError("{0} does not match pfam accession" " format".format(accession))

        match = matches.setdefault(accession, dict(child.items()))
        locations = match.setdefault("locations", [])
        for loc in child:
            locations.append(dict(loc.items()))

    if len(seq) < MINSEQLEN:
        query = "Query " + repr(query)
    else:
        query = "Query sequence"

    if matches:
        LOGGER.info(query + " matched {0} Pfam families.".format(len(matches)))
    else:
        LOGGER.info(query + " did not match any Pfam families.")
    return matches
示例#57
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://www.pdb.org/pdb/files/ligand/{0}.xml'.format(cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError('XML file for ligand {0} is not found online'
                              .format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}')+1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [(audit.get('action_type'), audit.get('date'))
                       for audit in
                       list(root.find(ns + 'pdbx_chem_comp_auditCategory'))]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_
示例#58
0
def psiBlastCycle(sequence=None, filename=None, **kwargs):
    """Returns a :class:`PDBBlastRecord` instance that contains results from
    a single cycle of EBI psiblast.

    :arg sequence: an object with an associated sequence string 
         or a sequence string itself
    :type sequence: :class:`Atomic`, :class:`Sequence`, or str

    :arg filename: a *filename* to save the results in XML format
    :type filename: str

    The following search parameters can be adjusted by the user.
    We use the same default values as 
    http://www.ebi.ac.uk/Tools/services/rest/psiblast/parameterdetails/
    wherever applicable.

    :arg email: email address for reporting problems
        default is [email protected]
    :type email: str with an @ before a .

    :arg matrix: The comparison matrix to be used to score alignments when searching the database
        possible values are 'BLOSUM45', 'BLOSUM62', 'BLOSUM80', 'PAM30' and 'PAM70' 
        default is 'BLOSUM62'
    :type matrix: str

    :arg gapopen: Penalty taken away from the score when a gap is created in sequence alignments. 
        Increasing the gap opening penalty will decrease the number of gaps in the final alignment.
        Possible values range from 8 to 16 inclusive, default is 11
    :type gapopen: int

    :arg gapext: Penalty taken away from the score for each base or residue in the gap. 
        Increasing the gap extension penalty favors short gaps in the final alignment, 
        conversly decreasing the gap extension penalty favors long gaps in the final alignment. 
        Possible values range from 0 to 3, default is 1
    :type gapext: int

    :arg expthr: Expectation threshold that limits the number of scores and alignments reported. 
        This is the maximum number of times the match is expected to occur by chance.
        Possible values are 1.0e-200, 1.0e-100, 1.0e-50, 1.0e-10, 1.0e-5, 1.0e-4, 1.0e-3,
        1.0e-2, 0.1, 1.0, 10.0, 100, 1000
        default is 10.0
    :type expthr: float

    :arg psithr: Expectation value threshold for automatic selection of matched sequences for 
        inclusion in the PSSM at each iteration.
        Possible values are 1.0e-6, 1.0e-5, 1.0e-4, 2.0e-4, 5.0e-4, 1.0e-3, 2.0e-3, 5.0e-3,
        1.0e-2, 2.0e-2, 0.1, 0.3, 0.5, 1.0, 3.0, 10.0
        default is 1.0e-3
    :type psithr: float

    :arg scores: Maximum number of match score summaries reported in the result output.
        Possible values are 5, 10, 20, 50, 100, 200, 500, 750, 1000, or 5000
        Default is 500
    :type scores: int

    :arg alignments: Maximum number of match alignments reported in the result output.
        Possible values are 5, 10, 20, 50, 100, 200, 500, 750, 1000, or 5000
        Default is 500
    :type alignmets: int

    :arg dropoff: The amount a score can drop before extension of word hits is halted
        Possible values are 0, 2, 4, 6, 8, 10, 15, 20, 25, or 30
        Default is 15
    :type dropoff: int

    :arg finaldropoff: Dropoff value for final gapped alignment
        Possible values are 10, 12, 14, 16, 18, 20, 22, 24, 25, 26, 28, or 30
        Default is 25
    :type finaldropoff: int

    :arg filter: Filter regions of low sequence complexity. This can avoid issues with 
        low complexity sequences where matches are found due to composition rather than 
        meaningful sequence similarity. However, in some cases filtering also masks 
        regions of interest and so should be used with caution.
        Possible values are T and F, default is F
    :type filter: str

    :arg seqrange: Specify a range or section of the input sequence to use in the search.
        Example: Specifying '34-89' in an input sequence of total length 100, will tell BLAST 
        to only use residues 34 to 89, inclusive.
    :type seqrange: str of form START-END

    :arg database: a database name from those available. See
        http://www.ebi.ac.uk/Tools/services/rest/psiblast/parameterdetails/database
        default is pdb
    :type database: str

    :arg previousjobid: The job identifier for the previous PSI-BLAST iteration. 
        default is None
        You can change this if you want to continue from a previous run
    :type previousjobid: str

    :arg selectedHits: Name of a file containing a list of identifiers of the 
        hits from the previous iteration to use to construct the search PSSM 
        for this iteration.
        default is None
    :type selectedHits: str

    :arg cpfile: Name of a Checkpoint file from the previous iteration. 
        default is None
    :type cpfile: str

    :arg sleep: how long to wait to reconnect for status
         Sleep time is multiplied by 1.5 when results are not ready.
         default is 2 seconds
    :type sleep: float

    :arg timeout:  when to give up waiting for the results 
        default is 120 seconds
    :type timeout: float

    :arg cycle: cycle number
    :type cycle: int

    """
    cycle = kwargs.get('cycle',0)

    if sequence == 'runexample':
        sequence = ('ASFPVEILPFLYLGCAKDSTNLDVLEEFGIKYILNVTPNLPNLFENAGEFKYKQIPI'
                    'SDHWSQNLSQFFPEAISFIDEARGKNCGVLVHSLAGISRSVTVTVAYLMQKLNLSMN'
                    'DAYDIVKMKKSNISPNFNFMGQLLDFERTL')

    elif isinstance(sequence, Atomic):
        sequence = sequence.calpha.getSequence()

    elif isinstance(sequence, Sequence):
        sequence = str(sequence)

    elif isinstance(sequence, str):
        if len(sequence) in [4, 5, 6]:
            ag = parsePDB(sequence)
            sequence = ag.calpha.getSequence()
        sequence = ''.join(sequence.split())

    elif sequence is None:
        if cycle == 0: 
            cycle = 1
    else:
        raise TypeError('sequence must be Atomic, Sequence, or str not {0}'
                        .format(type(sequence)))

    if cycle == 0:
        query = [('sequence', sequence)]
    else:
        query = []

    email = kwargs.get('email','*****@*****.**')
    if not isinstance(email, str):
        raise TypeError('email must be a string')
    elif email.find('@') == -1 or email.find('.') == -1 or len(email.split('@')) != 2:
        raise ValueError('email must be a valid email address with at least one . and exactly one @ sign')
    elif not email.find('@') < email.find(email.split('.')[-1]):
        raise ValueError('email must be a valid email address with a . after the @ sign')
    query.append(('email', email))
    query.append(('title', 'ProDy psiBlastPDB request'))

    previousjobid = kwargs.get('previousjobid','')
    if previousjobid is not '':
        query.append(('previousjobid',previousjobid))

    selectedHits = kwargs.get('selectedHits','')
    if selectedHits is not '':
        query.append(('selectedHits',selectedHits))

    database = kwargs.get('database','pdb')
    checkPsiBlastParameter('database', database)
    query.append(('database',database))

    matrix = kwargs.get('matrix', 'BLOSUM62')
    checkPsiBlastParameter('matrix', matrix)
    query.append(('matrix',matrix))

    gapopen = kwargs.get('gapopen',11)
    checkPsiBlastParameter('gapopen', gapopen)
    query.append(('gapopen',gapopen))

    gapext = kwargs.get('gapext',1)
    checkPsiBlastParameter('gapext', gapext)
    query.append(('gapext',gapext))

    expthr = kwargs.get('expthr', 10.)
    checkPsiBlastParameter('expthr', expthr)
    query.append(('expthr',expthr))
    
    psithr = kwargs.get('psithr',1.0e-3)
    checkPsiBlastParameter('psithr', psithr)
    query.append(('psithr',psithr))

    scores = kwargs.get('scores',500)
    checkPsiBlastParameter('scores', scores)
    query.append(('scores',scores))

    alignments = kwargs.get('alignments',500)
    checkPsiBlastParameter('alignments', alignments)
    query.append(('alignments',alignments))
    
    query.append(('alignView',0))
                    
    dropoff = kwargs.get('dropoff',15)
    checkPsiBlastParameter('dropoff', dropoff)
    query.append(('dropoff',dropoff))
        
    finaldropoff = kwargs.get('finaldropoff',25)
    checkPsiBlastParameter('finaldropoff', finaldropoff)
    query.append(('finaldropoff',finaldropoff))
        
    filter = kwargs.get('filter','F')
    checkPsiBlastParameter('filter', filter)
    query.append(('filter',filter))
    
    if previousjobid is '' and selectedHits is '':
        seqrange = kwargs.get('seqrange', None)
        if seqrange is None:
            seqrange = '0-' + str(len(sequence))
        elif not isinstance(seqrange, str):
            raise TypeError('seqrange should be a string')
        elif len(seqrange.split('-')) != 2:
            raise ValueError('seqrange should take the form START-END')
        try:
            start = int(seqrange.split('-')[0])
            end = int(seqrange.split('-')[1])
        except:
            raise ValueError('seqrange should be START-END with START and END being integers')
        query.append(('seqrange',seqrange))
        
    headers = { 'User-Agent' : 'ProDy' }
    
    try:
        import urllib.parse
        urlencode = lambda data: bytes(urllib.parse.urlencode(data), 'utf-8')
    except ImportError:
        from urllib import urlencode

    sleep = float(kwargs.pop('sleep', 2))
    timeout = float(kwargs.pop('timeout', 120))
    
    data = urlencode(query)

    # submit the job
    base_url = 'http://www.ebi.ac.uk/Tools/services/rest/psiblast/'
    url = base_url + 'run/'
    LOGGER.timeit('_prody_psi-blast')
    if cycle == 0:
        LOGGER.info('PSI-Blast searching PDB database for "{0}..."'
                    .format(sequence[:5]))
    else:
        LOGGER.info('PSI-Blast searching PDB database, cycle={0}'
                    .format(cycle))

    handle = openURL(url, data=data, headers=headers)
    job_id = handle.read()
    handle.close()

    # check the status
    url = base_url + 'status/' + job_id
    handle = openURL(url)
    status = handle.read()
    handle.close()
                    
    # keep checking the status until it's no longer running
    while status == 'RUNNING':
        LOGGER.sleep(int(sleep), 'to reconnect to EBI for status.')
        LOGGER.write('Connecting to EBI for status...')
        handle = openURL(url)
        status = handle.read()
        LOGGER.clear()
        sleep = int(sleep * 1.5)
        if LOGGER.timing('_prody_psi-blast') > timeout:
            LOGGER.warn('PSI-Blast search time out.')
            return None

    LOGGER.info('The status is {0}'.format(status))
    LOGGER.clear()
    LOGGER.report('PSI-Blast search completed in %.1fs.', '_prody_psi-blast')
 
    if cycle != 1:
        # get the results
        url = base_url + 'result/' + job_id + '/xml'
        handle = openURL(url)
        results = handle.read()
        handle.close()
        
        try:
            ext_xml = filename.lower().endswith('.xml')
        except AttributeError:
            pass
        else:
            if not ext_xml:
                filename += '.xml'
            f_out = open(filename, 'w')
            f_out.write(results)
            f_out.close()
            LOGGER.info('Results are saved as {0}.'.format(repr(filename)))
        
        return job_id, PsiBlastRecord(results, sequence)
    else:
        return job_id
示例#59
0
文件: pdbfile.py 项目: prody/ProDy
def writePDBStream(stream, atoms, csets=None, **kwargs):
    """Write *atoms* in PDB format to a *stream*.

    :arg stream: anything that implements a :meth:`write` method (e.g. file,
        buffer, stdout)
        
    :arg renumber: whether to renumber atoms with serial indices
        Default is **True**
    :type renumber: bool
    """

    renumber = kwargs.get('renumber',True)

    remark = str(atoms)
    try:
        coordsets = atoms._getCoordsets(csets)
    except AttributeError:
        try:
            coordsets = atoms._getCoords()
        except AttributeError:
            raise TypeError('atoms must be an object with coordinate sets')
        if coordsets is not None:
            coordsets = [coordsets]
    else:
        if coordsets.ndim == 2:
            coordsets = [coordsets]
    if coordsets is None:
        raise ValueError('atoms does not have any coordinate sets')

    try:
        acsi = atoms.getACSIndex()
    except AttributeError:
        try:
            atoms = atoms.getAtoms()
        except AttributeError:
            raise TypeError('atoms must be an Atomic instance or an object '
                            'with `getAtoms` method')
        else:
            if atoms is None:
                raise ValueError('atoms is not associated with an Atomic '
                                 'instance')
            try:
                acsi = atoms.getACSIndex()
            except AttributeError:
                raise TypeError('atoms does not have a valid type')

    try:
        atoms.getIndex()
    except AttributeError:
        pass
    else:
        atoms = atoms.select('all')

    n_atoms = atoms.numAtoms()

    occupancy = kwargs.get('occupancy')
    if occupancy is None:
        occupancies = atoms._getOccupancies()
        if occupancies is None:
            occupancies = np.zeros(n_atoms, float)
    else:
        occupancies = np.array(occupancy)
        if len(occupancies) != n_atoms:
            raise ValueError('len(occupancy) must be equal to number of atoms')

    beta = kwargs.get('beta')
    if beta is None:
        bfactors = atoms._getBetas()
        if bfactors is None:
            bfactors = np.zeros(n_atoms, float)
    else:
        bfactors = np.array(beta)
        if len(bfactors) != n_atoms:
            raise ValueError('len(beta) must be equal to number of atoms')

    atomnames = atoms.getNames()
    if atomnames is None:
        raise ValueError('atom names are not set')
    for i, an in enumerate(atomnames):
        if len(an) < 4:
            atomnames[i] = ' ' + an

    s_or_u = np.array(['a']).dtype.char

    altlocs = atoms._getAltlocs()
    if altlocs is None:
        altlocs = np.zeros(n_atoms, s_or_u + '1')

    resnames = atoms._getResnames()
    if resnames is None:
        resnames = ['UNK'] * n_atoms

    chainids = atoms._getChids()
    if chainids is None:
        chainids = np.zeros(n_atoms, s_or_u + '1')

    resnums = atoms._getResnums()
    if resnums is None:
        resnums = np.ones(n_atoms, int)

    serials = atoms._getSerials()
    if serials is None or renumber:
        serials = np.arange(n_atoms, dtype=int) + 1

    icodes = atoms._getIcodes()
    if icodes is None:
        icodes = np.zeros(n_atoms, s_or_u + '1')

    hetero = ['ATOM'] * n_atoms
    heteroflags = atoms._getFlags('hetatm')
    if heteroflags is None:
        heteroflags = atoms._getFlags('hetero')
    if heteroflags is not None:
        hetero = np.array(hetero, s_or_u + '6')
        hetero[heteroflags] = 'HETATM'

    elements = atoms._getElements()
    if elements is None:
        elements = np.zeros(n_atoms, s_or_u + '1')
    else:
        elements = np.char.rjust(elements, 2)

    segments = atoms._getSegnames()
    if segments is None:
        segments = np.zeros(n_atoms, s_or_u + '6')

    # write remarks
    stream.write('REMARK {0}\n'.format(remark))

    # write secondary structures (if any)
    secondary = kwargs.get('secondary', True)
    secstrs = atoms._getSecstrs()
    if secstrs is not None and secondary:
        secindices = atoms._getSecindices()
        secclasses = atoms._getSecclasses()
        secids = atoms._getSecids()

        # write helices
        for i in range(1,max(secindices)+1):
            torf = np.logical_and(isHelix(secstrs), secindices==i)
            if torf.any():
                helix_resnums = resnums[torf]
                helix_chainids = chainids[torf]
                helix_resnames = resnames[torf]
                helix_secclasses = secclasses[torf]
                helix_secids = secids[torf]
                helix_icodes = icodes[torf]
                L = helix_resnums[-1] - helix_resnums[0] + 1

                stream.write(HELIXLINE.format(serNum=i, helixID=helix_secids[0], 
                            initResName=helix_resnames[0], initChainID=helix_chainids[0], 
                            initSeqNum=helix_resnums[0], initICode=helix_icodes[0],
                            endResName=helix_resnames[-1], endChainID=helix_chainids[-1], 
                            endSeqNum=helix_resnums[-1], endICode=helix_icodes[-1],
                            helixClass=helix_secclasses[0], length=L))
        
        # write strands
        torf_all_sheets = isSheet(secstrs)
        sheet_secids = secids[torf_all_sheets]

        for sheet_id in np.unique(sheet_secids):
            torf_strands_in_sheet = np.logical_and(torf_all_sheets, secids==sheet_id)
            strand_indices = secindices[torf_strands_in_sheet]
            numStrands = len(np.unique(strand_indices))

            for i in np.unique(strand_indices):
                torf_strand = np.logical_and(torf_strands_in_sheet, secindices==i)
                strand_resnums = resnums[torf_strand]
                strand_chainids = chainids[torf_strand]
                strand_resnames = resnames[torf_strand]
                strand_secclasses = secclasses[torf_strand]
                strand_icodes = icodes[torf_strand]

                stream.write(SHEETLINE.format(strand=i, sheetID=sheet_id, numStrands=numStrands,
                            initResName=strand_resnames[0], initChainID=strand_chainids[0], 
                            initSeqNum=strand_resnums[0], initICode=strand_icodes[0],
                            endResName=strand_resnames[-1], endChainID=strand_chainids[-1], 
                            endSeqNum=strand_resnums[-1], endICode=strand_icodes[-1],
                            sense=strand_secclasses[0]))
        pass

    # write atoms
    multi = len(coordsets) > 1
    write = stream.write
    for m, coords in enumerate(coordsets):
        pdbline = PDBLINE_LT100K
        if multi:
            write('MODEL{0:9d}\n'.format(m+1))
        for i, xyz in enumerate(coords):
            if pdbline != PDBLINE_GE100K and (i == MAX_N_ATOM or serials[i] > MAX_N_ATOM):
                LOGGER.warn('Indices are exceeding 99999 and hexadecimal format is being used')
                pdbline = PDBLINE_GE100K
            write(pdbline % (hetero[i], serials[i],
                             atomnames[i], altlocs[i],
                             resnames[i], chainids[i], resnums[i],
                             icodes[i],
                             xyz[0], xyz[1], xyz[2],
                             occupancies[i], bfactors[i],
                             segments[i], elements[i]))
        if multi:
            write('ENDMDL\n')
            altlocs = np.zeros(n_atoms, s_or_u + '1')
示例#60
0
文件: pdbfile.py 项目: prody/ProDy
def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                   altloc_torf, format='PDB'):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: PDB/PQR lines
    :arg split: starting index for coordinate data lines"""

    format = format.upper()
    if format == 'PDB':
        isPDB = True
    else:
        isPDB = False

    if subset:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        only_subset = True
        protein_resnames = flags.AMINOACIDS
    else:
        only_subset = False
    if chain is None:
        only_chains = False
    else:
        only_chains = True
    onlycoords = False
    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms
    else:
        asize = len(lines) - split
    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
    alength = asize
    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype)
    if isPDB:
        segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
        elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
        bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
        occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)
        anisou = None
        siguij = None
    else:
        radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype)

    asize = 2000 # increase array length by this much when needed

    start = split
    stop = len(lines)
    nmodel = 0
    # if a specific model is requested, skip lines until that one
    if isPDB and model is not None and model != 1:
        for i in range(split, len(lines)):
            if lines[i][:5] == 'MODEL':
                nmodel += 1
                if model == nmodel:
                    start = i+1
                    stop = len(lines)
                    break
        if nmodel != model:
            raise PDBParseError('model {0} is not found'.format(model))
    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = ' ' + ''.join(altloc_torf.split())
        else:
            which_altlocs = ' A'
        altloc_torf = False
    else:
        which_altlocs = ' A'
        altloc_torf = True

    acount = 0
    coordsets = None
    altloc = defaultdict(list)
    i = start
    END = False
    while i < stop:
        line = lines[i]
        if not isPDB:
            fields = line.split()
            if len(fields) == 10:
                fields.insert(4, '')
            elif len(fields) != 11:
                LOGGER.warn('wrong number of fields for PQR format at line %d'%i)
                i += 1
                continue

        if isPDB:
            startswith = line[0:6].strip()
        else:
            startswith = fields[0]
        
        if startswith == 'ATOM' or startswith == 'HETATM':
            if isPDB:
                atomname = line[12:16].strip()
                resname = line[17:21].strip()
            else:
                atomname= fields[2]
                resname = fields[3]

            if only_subset:
                if not (atomname in subset and resname in protein_resnames):
                    i += 1
                    continue

            if isPDB:
                chid = line[21]
            else:
                chid = fields[4]

            if only_chains:
                if not chid in chain:
                    i += 1
                    continue
            
            if isPDB:
                alt = line[16]
                if alt not in which_altlocs:
                    altloc[alt].append((line, i))
                    i += 1
                    continue
            else:
                alt = ' '
            try:
                if isPDB:
                    coordinates[acount, 0] = line[30:38]
                    coordinates[acount, 1] = line[38:46]
                    coordinates[acount, 2] = line[46:54]
                else:
                    coordinates[acount, 0] = fields[6]
                    coordinates[acount, 1] = fields[7]
                    coordinates[acount, 2] = fields[8]
            except:
                if acount >= n_atoms > 0:
                    if nmodel == 0:
                        raise ValueError(format + 'file and AtomGroup ag must '
                                         'have same number of atoms')
                    LOGGER.warn('Discarding model {0}, which contains {1} more '
                                'atoms than first model does.'
                                .format(nmodel+1,acount-n_atoms+1))
                    acount = 0
                    nmodel += 1
                    coordinates = np.zeros((n_atoms, 3), dtype=float)
                    if isPDB:
                        while lines[i][:6] != 'ENDMDL':
                            i += 1
                else:
                    raise PDBParseError('invalid or missing coordinate(s) at '
                                         'line {0}'.format(i+1))
            if onlycoords:
                acount += 1
                i += 1
                continue

            try:
                serials[acount] = int(line[6:11]) if isPDB else int(fields[1])
            except ValueError:
                try:
                    serials[acount] = int(line[6:11], 16) if isPDB else int(fields[1], 16)
                except ValueError:
                    LOGGER.warn('failed to parse serial number in line {0}'
                                .format(i))
                    serials[acount] = serials[acount-1]+1
            altlocs[acount] = alt
            atomnames[acount] = atomname
            resnames[acount] = resname
            chainids[acount] = chid
            if isPDB:
                resnums[acount] = line[22:26] 
                icodes[acount] = line[26] 
            else:
                resnum = fields[5]
                if resnum[-1].isalpha():
                    icode = resnum[-1]
                else:
                    icode = ' '
                resnums[acount] = resnum
                icodes[acount] = icode

            if isPDB:
                try:
                    occupancies[acount] = line[54:60]
                except:
                    LOGGER.warn('failed to parse occupancy at line {0}'
                                .format(i))
                try:
                    bfactors[acount] = line[60:66]
                except:
                    LOGGER.warn('failed to parse beta-factor at line {0}'
                                .format(i))
                hetero[acount] = startswith[0] == 'H'
                segnames[acount] = line[72:76]
                elements[acount] = line[76:78]
                try:
                    charges[acount] = int(line[79] + line[78])
                except:
                    charges[acount] = 0
            else:
                try:
                    charges[acount] = fields[9]
                except:
                    LOGGER.warn('failed to parse charge at line {0}'
                                .format(i))
                try:
                    radii[acount] = fields[10]
                except:
                    LOGGER.warn('failed to parse radius at line {0}'
                                .format(i))
            acount += 1
            if n_atoms == 0 and acount >= alength:
                # if arrays are short extend them with zeros
                alength += asize
                coordinates = np.concatenate(
                    (coordinates, np.zeros((asize, 3), float)))
                atomnames = np.concatenate((atomnames,
                    np.zeros(asize, ATOMIC_FIELDS['name'].dtype)))
                resnames = np.concatenate((resnames,
                    np.zeros(asize, ATOMIC_FIELDS['resname'].dtype)))
                resnums = np.concatenate((resnums,
                    np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype)))
                chainids = np.concatenate((chainids,
                    np.zeros(asize, ATOMIC_FIELDS['chain'].dtype)))
                hetero = np.concatenate((hetero, np.zeros(asize, bool)))
                termini = np.concatenate((termini, np.zeros(asize, bool)))
                altlocs = np.concatenate((altlocs,
                    np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype)))
                icodes = np.concatenate((icodes,
                    np.zeros(asize, ATOMIC_FIELDS['icode'].dtype)))
                serials = np.concatenate((serials,
                    np.zeros(asize, ATOMIC_FIELDS['serial'].dtype)))
                if isPDB:
                    bfactors = np.concatenate((bfactors,
                        np.zeros(asize, ATOMIC_FIELDS['beta'].dtype)))
                    occupancies = np.concatenate((occupancies,
                        np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype)))
                    segnames = np.concatenate((segnames,
                        np.zeros(asize, ATOMIC_FIELDS['segment'].dtype)))
                    elements = np.concatenate((elements,
                        np.zeros(asize, ATOMIC_FIELDS['element'].dtype)))
                    if anisou is not None:
                        anisou = np.concatenate((anisou, np.zeros((asize, 6),
                            ATOMIC_FIELDS['anisou'].dtype)))
                    if siguij is not None:
                        siguij = np.concatenate((siguij, np.zeros((asize, 6),
                            ATOMIC_FIELDS['siguij'].dtype)))
                else:
                    charges = np.concatenate((charges,
                        np.zeros(asize, ATOMIC_FIELDS['charge'].dtype)))
                    radii = np.concatenate((radii,
                        np.zeros(asize, ATOMIC_FIELDS['radius'].dtype)))
        #elif startswith == 'END   ' or startswith == 'CONECT':
        #    i += 1
        #    break
        elif not onlycoords and (startswith == 'TER   ' or
            startswith.strip() == 'TER'):
            termini[acount - 1] = True
        elif startswith == 'ENDMDL' or startswith[:3] == 'END':
            if acount == 0:
                # If there is no atom record between ENDMDL & END skip to next
                i += 1
                continue
            if model is not None:
                i += 1
                break
            diff = stop - i - 1
            END = diff < acount
            if coordsets is not None:
                END = END or nmodel >= coordsets.shape[0]
            if onlycoords:
                if acount < n_atoms:
                    LOGGER.warn('Discarding model {0}, which contains '
                                '{1} fewer atoms than the first model '
                                'does.'.format(nmodel+1, n_atoms-acount))
                else:
                    coordsets[nmodel] = coordinates
                    nmodel += 1
                acount = 0
                if not END:
                    coordinates = coordsets[nmodel]
            else:
                if acount != n_atoms > 0:
                    raise ValueError('PDB file and AtomGroup ag must have '
                                    'same number of atoms')
                # this is where to decide if more coordsets should be expected
                if END:
                    coordinates.resize((acount, 3), refcheck=False)
                    if addcoords:
                        atomgroup.addCoordset(coordinates)
                    else:
                        atomgroup._setCoords(coordinates)
                else:
                    coordsets = np.zeros((int(diff//acount+1), acount, 3))
                    coordsets[0] = coordinates[:acount]
                    onlycoords = True
                atomnames.resize(acount, refcheck=False)
                resnames.resize(acount, refcheck=False)
                resnums.resize(acount, refcheck=False)
                chainids.resize(acount, refcheck=False)
                hetero.resize(acount, refcheck=False)
                termini.resize(acount, refcheck=False)
                altlocs.resize(acount, refcheck=False)
                icodes.resize(acount, refcheck=False)
                serials.resize(acount, refcheck=False)
                if not only_subset:
                    atomnames = np.char.strip(atomnames)
                    resnames = np.char.strip(resnames)
                atomgroup.setNames(atomnames)
                atomgroup.setResnames(resnames)
                atomgroup.setResnums(resnums)
                atomgroup.setChids(chainids)
                atomgroup.setFlags('hetatm', hetero)
                atomgroup.setFlags('pdbter', termini)
                atomgroup.setAltlocs(altlocs)
                atomgroup.setIcodes(np.char.strip(icodes))
                atomgroup.setSerials(serials)
                if isPDB:
                    bfactors.resize(acount, refcheck=False)
                    occupancies.resize(acount, refcheck=False)
                    segnames.resize(acount, refcheck=False)
                    elements.resize(acount, refcheck=False)
                    atomgroup.setBetas(bfactors)
                    atomgroup.setOccupancies(occupancies)
                    atomgroup.setSegnames(np.char.strip(segnames))
                    atomgroup.setElements(np.char.strip(elements))
                    from prody.utilities.misctools import getMasses
                    atomgroup.setMasses(getMasses(np.char.strip(elements)))
                    if anisou is not None:
                        anisou.resize((acount, 6), refcheck=False)
                        atomgroup.setAnisous(anisou / 10000)
                    if siguij is not None:
                        siguij.resize((acount, 6), refcheck=False)
                        atomgroup.setAnistds(siguij / 10000)
                else:
                    charges.resize(acount, refcheck=False)
                    radii.resize(acount, refcheck=False)
                    atomgroup.setCharges(charges)
                    atomgroup.setRadii(radii)

                nmodel += 1
                n_atoms = acount
                acount = 0
                coordinates = np.zeros((n_atoms, 3), dtype=float)
                if altloc and altloc_torf:
                    _evalAltlocs(atomgroup, altloc, chainids, resnums,
                                 resnames, atomnames)
                    altloc = defaultdict(list)
                if END:
                    break
        elif isPDB and startswith == 'ANISOU':
            if anisou is None:
                anisou = True
                anisou = np.zeros((alength, 6),
                    dtype=ATOMIC_FIELDS['anisou'].dtype)
            try:
                index = acount - 1
                anisou[index, 0] = line[28:35]
                anisou[index, 1] = line[35:42]
                anisou[index, 2] = line[43:49]
                anisou[index, 3] = line[49:56]
                anisou[index, 4] = line[56:63]
                anisou[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse anisotropic temperature '
                    'factors at line {0}'.format(i))
        elif isPDB and startswith =='SIGUIJ':
            if siguij is None:
                siguij = np.zeros((alength, 6),
                    dtype=ATOMIC_FIELDS['siguij'].dtype)
            try:
                index = acount - 1
                siguij[index, 0] = line[28:35]
                siguij[index, 1] = line[35:42]
                siguij[index, 2] = line[43:49]
                siguij[index, 3] = line[49:56]
                siguij[index, 4] = line[56:63]
                siguij[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse standard deviations of '
                    'anisotropic temperature factors at line {0}'.format(i))
        elif startswith =='SIGATM':
            pass
        i += 1
    if onlycoords:
        if acount == atomgroup.numAtoms():
            coordsets[nmodel] = coordinates
            nmodel += 1
        del coordinates
        coordsets.resize((nmodel, atomgroup.numAtoms(), 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordsets)
        else:
            atomgroup._setCoords(coordsets)
    elif not END:
        # this means last line was an ATOM line, so atomgroup is not decorated
        coordinates.resize((acount, 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordinates)
        else:
            atomgroup._setCoords(coordinates)
        atomnames.resize(acount, refcheck=False)
        resnames.resize(acount, refcheck=False)
        resnums.resize(acount, refcheck=False)
        chainids.resize(acount, refcheck=False)
        hetero.resize(acount, refcheck=False)
        termini.resize(acount, refcheck=False)
        altlocs.resize(acount, refcheck=False)
        icodes.resize(acount, refcheck=False)
        serials.resize(acount, refcheck=False)
        if not only_subset:
            atomnames = np.char.strip(atomnames)
            resnames = np.char.strip(resnames)
        atomgroup.setNames(atomnames)
        atomgroup.setResnames(resnames)
        atomgroup.setResnums(resnums)
        atomgroup.setChids(chainids)
        atomgroup.setFlags('hetatm', hetero)
        atomgroup.setFlags('pdbter', termini)
        atomgroup.setAltlocs(altlocs)
        atomgroup.setIcodes(np.char.strip(icodes))
        atomgroup.setSerials(serials)
        if isPDB:
            if anisou is not None:
                anisou.resize((acount, 6), refcheck=False)
                atomgroup.setAnisous(anisou / 10000)
            if siguij is not None:
                siguij.resize((acount, 6), refcheck=False)
                atomgroup.setAnistds(siguij / 10000)
            bfactors.resize(acount, refcheck=False)
            occupancies.resize(acount, refcheck=False)
            segnames.resize(acount, refcheck=False)
            elements.resize(acount, refcheck=False)
            atomgroup.setSegnames(np.char.strip(segnames))
            atomgroup.setElements(np.char.strip(elements))
            from prody.utilities.misctools import getMasses
            atomgroup.setMasses(getMasses(np.char.strip(elements)))
            atomgroup.setBetas(bfactors)
            atomgroup.setOccupancies(occupancies)
        else:
            charges.resize(acount, refcheck=False)
            radii.resize(acount, refcheck=False)
            atomgroup.setCharges(charges)
            atomgroup.setRadii(radii)

    if altloc and altloc_torf:
        _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)

    return atomgroup