示例#1
0
    def mergeProfiles(self, p0, p1, maxOverlap=3):
        """
        Merge profile p0 with profile p1, as long as they overlap in
        at most maxOverlap positions

        @param p0: profile
        @type  p0: [float]
        @param p1: profile
        @type  p1: [float]
        @param maxOverlap: maximal allowed overlap between profiles
        @type  maxOverlap: int
        
        @return: array
        @rtype: 
        """
        p0 = self.__list2array(p0)
        p1 = self.__list2array(p1)

        overlap = N0.greater(N0.greater(p0, 0) + N0.greater(p1, 0), 1)

        if N0.sum(overlap) <= maxOverlap:
            ## one of the two profiles will in most cases not belong to these
            ## positions. We can't decide which one is wrong, let's eliminate
            ## both values. Alternatively we could keep one, or the average, ..
            N0.put(p1, N0.nonzero(overlap), 0)
            N0.put(p0, N0.nonzero(overlap), 0)

            p0 = p0 + p1

        return p0
示例#2
0
文件: WhatIf.py 项目: graik/biskit
    def __exposedResidues( self, ASA_values, sidechainCut=0.0,
                         backboneCut=0.0, totalCut=0.0  ):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut )
        col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut )
        col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut )

        col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) 

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList
示例#3
0
文件: WhatIf.py 项目: suliat16/biskit
    def __exposedResidues(self,
                          ASA_values,
                          sidechainCut=0.0,
                          backboneCut=0.0,
                          totalCut=0.0):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut)
        col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut)
        col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut)

        col_012 = N0.concatenate(([col_0], [col_1], [col_2]))

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList
示例#4
0
文件: Hmmer.py 项目: graik/biskit
    def mergeProfiles( self, p0, p1, maxOverlap=3 ):
        """
        Merge profile p0 with profile p1, as long as they overlap in
        at most maxOverlap positions

        @param p0: profile
        @type  p0: [float]
        @param p1: profile
        @type  p1: [float]
        @param maxOverlap: maximal allowed overlap between profiles
        @type  maxOverlap: int
        
        @return: array
        @rtype: 
        """
        p0 = self.__list2array( p0 )
        p1 = self.__list2array( p1 )

        overlap = N0.greater( N0.greater(p0,0) + N0.greater(p1,0), 1 )

        if N0.sum( overlap ) <= maxOverlap:
            ## one of the two profiles will in most cases not belong to these
            ## positions. We can't decide which one is wrong, let's eliminate
            ## both values. Alternatively we could keep one, or the average, ..
            N0.put( p1, N0.nonzero( overlap ), 0 )
            N0.put( p0, N0.nonzero( overlap ), 0 )

            p0 = p0 + p1

        return p0
示例#5
0
    def __checkProfileIntegrity( self, profile, upperLimit=1.0,
                                 lowerLimit=-1.0):
        """
        In some cases SurfaceRacer generates incorrect curvature
        values for some atoms. This function sets values outside
        a given range to 0

        @param profile: profile name
        @type  profile: str
        @param upperLimit: upper limit for a valid value (default: 1.0)
        @type  upperLimit: float
        @param lowerLimit: lower limit for a valid value (default: -1.0)
        @type  lowerLimit: float

        @return: profile with inspected values
        @rtype: [float]
        """
        mask = N0.greater( profile, upperLimit )
        mask += N0.less( profile, lowerLimit )

        for i in  N0.nonzero(mask):
            print 'WARNING! Profile value %.2f set to O\n'%profile[i]
            profile[i] = 0

        return profile
示例#6
0
    def __categorizeHexSurf(self, cutoff=0.1):
        """
        Compare complexes of list to native complex to see if
        their contact surfaces overlapp with the native complex.
        
        @param cutoff: fraction cutoff for defining a overlap (default: 0.1)
        @type  cutoff: float
        
        @return: list of len(self.hexContacts) overlapping with
                 native contact surface of lig and rec (0 - no overlap,
                 1 - rec OR lig overlapps, 2- rec AND lig overlapps)
        @rtype: [0|1|2]
        """
        result = [ self.com.fractionNativeSurface( c, self.contacts )
                   for c in self.hexContacts ]

        result = [ N0.sum( N0.greater( o, cutoff ) ) for o in result ]
        return result
示例#7
0
    def __find_intervals(self, l):
        l = N0.array(l)
        l = N0.take(l, N0.argsort(l))

        globals().update(locals())

        break_points = N0.nonzero(N0.greater(l[1:] - l[:-1], 1))

        start = 0
        intervals = []

        for i in range(len(break_points)):
            index = break_points[i]
            intervals.append(tuple(N0.take(l, range(start, index + 1))))
            start = index + 1

        intervals.append(tuple(l[start:]))

        return intervals
示例#8
0
    def memberFrames(self, threshold=0.):
        """
        Get indices of all frames belonging to each cluster. Each frame
        is guaranteed to belong, at least, to the cluster for which it has
        its maximum membership. If threshold > 0, it can additionally pop
        up in other clusters.

        @param threshold: minimal cluster membership or 0 to consider
                          only max membership (default: 0)
        @type  threshold: float

        @return: n_cluster, lst of lst of int, frame indices
        @rtype: [[int]]
        """
        ## best cluster for each frame
        msm = self.memberships()
        maxMemb = N0.argmax(msm, 0)

        r = [
            N0.nonzero(N0.equal(maxMemb, i))
            for i in range(0, self.n_clusters)
        ]
        r = [x.tolist() for x in r]

        ## same thing but now taking all above threshold
        ## -> same frame can end up in several clusters
        if threshold > 0.:
            r2 = [N0.nonzero(N0.greater(l, threshold)) for l in msm]

            ## add only additional frames
            for i in range(0, len(r)):
                try:
                    frames = r[i].tolist()
                except:
                    frames = r[i]

                r[i] = frames + [fr for fr in r2[i] if fr not in r[i]]

        ## sort frames within each cluster by their membership
        r = [self.membershipSort(r[i], i) for i in range(0, len(r))]

        return r
示例#9
0
    def identities(self, aln_dictionary):
        """
        Create a dictionary that contains information about all the
        alignments in the aln_dictionary using pairwise comparisons.

        @param aln_dictionary: alignment dictionary
        @type  aln_dictionary: dict

        @return: a dictionary of dictionaries with the sequence name as the
        top key. Each sub dictionary then has the keys: 
         - 'name' - str, sequence name
         - 'seq' - str, sequence of
         - 'template_info' - list of the same length as the 'key'
             sequence excluding deletions. The number of sequences
             in the multiple alignment that contain information at
             this position.
         - 'ID' - dict, sequence identity in percent comparing the
            'key'  sequence to all other sequences (excluding deletions)
         - 'info_ID' - dict, same as 'ID' but compared to the template
             sequence length (i.e excluding deletions and insertions
             in the 'key' sequence )
         - 'cov_ID' - dict, same as 'info_ID' but insertions are defined
             comparing to all template sequences (i.e where
             'template_info' is zero )
        @rtype: dict
        """
        ## loop over all sequences in alignment
        for i in self.sequences_name:
            template_names = []

            ## don't compare to self, remove current sequence
            for name in self.sequences_name:
                if(name is not i):
                    template_names.append(name)

            ## loop over all sequences in alignment
            info_ID, ID, cov_ID  = {}, {}, {}
            for y in self.sequences_name:
##                identity = 0
##                info_identity = 0
##                cov_identity = 0
                nb_of_identities = 0
                nb_of_template = 0 
                template_info = []
                nb_of_residues = 0

                ## loop over the full length of the alignment
                for w in range(len(aln_dictionary["target"]["seq"])):

                    ## skip deletions
                    nb_of_info_res=0
                    if(aln_dictionary[i]["seq"][w] is not '-'):
                        nb_of_residues += 1

                        ## count identities
                        if(aln_dictionary[i]["seq"][w] == \
                           aln_dictionary[y]["seq"][w]):
                            nb_of_identities += 1

                        ## length excluding insertions
                        if(aln_dictionary[y]["seq"][w] is not '-'):
                            nb_of_template += 1

                        ## loop over all sequences but self
                        for z in template_names:
                            ## count how many sequences contain alignment
                            ## information at this position
                            if(aln_dictionary[z]["seq"][w] is not '-'):
                                nb_of_info_res += 1

                        template_info.append(nb_of_info_res)

                ## number of positions in which any other sequence
                ## contains alignment information
                nb_cov_res = N0.sum( N0.greater(template_info, 0) )

                ## calculate identities
                info_ID[y] = ID[y] = cov_ID[y] = 0
                ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments
                if nb_of_template:
                    info_ID[y] = 100. * nb_of_identities / nb_of_template
                if nb_of_residues:
                    ID[y]      = 100. * nb_of_identities / nb_of_residues
                if nb_cov_res:
                    cov_ID[y]  = 100. * nb_of_identities / nb_cov_res

            aln_dictionary[i]["info_ID"] = info_ID 
            aln_dictionary[i]["ID"] = ID
            aln_dictionary[i]["cov_ID"] = cov_ID
            aln_dictionary[i]["template_info"] = template_info

        return aln_dictionary        
示例#10
0
    def identities(self, aln_dictionary):
        """
        Create a dictionary that contains information about all the
        alignments in the aln_dictionary using pairwise comparisons.

        @param aln_dictionary: alignment dictionary
        @type  aln_dictionary: dict

        @return: a dictionary of dictionaries with the sequence name as the
        top key. Each sub dictionary then has the keys: 
         - 'name' - str, sequence name
         - 'seq' - str, sequence of
         - 'template_info' - list of the same length as the 'key'
             sequence excluding deletions. The number of sequences
             in the multiple alignment that contain information at
             this position.
         - 'ID' - dict, sequence identity in percent comparing the
            'key'  sequence to all other sequences (excluding deletions)
         - 'info_ID' - dict, same as 'ID' but compared to the template
             sequence length (i.e excluding deletions and insertions
             in the 'key' sequence )
         - 'cov_ID' - dict, same as 'info_ID' but insertions are defined
             comparing to all template sequences (i.e where
             'template_info' is zero )
        @rtype: dict
        """
        ## loop over all sequences in alignment
        for i in self.sequences_name:
            template_names = []

            ## don't compare to self, remove current sequence
            for name in self.sequences_name:
                if (name is not i):
                    template_names.append(name)

            ## loop over all sequences in alignment
            info_ID, ID, cov_ID = {}, {}, {}
            for y in self.sequences_name:
                ##                identity = 0
                ##                info_identity = 0
                ##                cov_identity = 0
                nb_of_identities = 0
                nb_of_template = 0
                template_info = []
                nb_of_residues = 0

                ## loop over the full length of the alignment
                for w in range(len(aln_dictionary["target"]["seq"])):

                    ## skip deletions
                    nb_of_info_res = 0
                    if (aln_dictionary[i]["seq"][w] is not '-'):
                        nb_of_residues += 1

                        ## count identities
                        if(aln_dictionary[i]["seq"][w] == \
                           aln_dictionary[y]["seq"][w]):
                            nb_of_identities += 1

                        ## length excluding insertions
                        if (aln_dictionary[y]["seq"][w] is not '-'):
                            nb_of_template += 1

                        ## loop over all sequences but self
                        for z in template_names:
                            ## count how many sequences contain alignment
                            ## information at this position
                            if (aln_dictionary[z]["seq"][w] is not '-'):
                                nb_of_info_res += 1

                        template_info.append(nb_of_info_res)

                ## number of positions in which any other sequence
                ## contains alignment information
                nb_cov_res = N0.sum(N0.greater(template_info, 0))

                ## calculate identities
                info_ID[y] = ID[y] = cov_ID[y] = 0
                ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments
                if nb_of_template:
                    info_ID[y] = 100. * nb_of_identities / nb_of_template
                if nb_of_residues:
                    ID[y] = 100. * nb_of_identities / nb_of_residues
                if nb_cov_res:
                    cov_ID[y] = 100. * nb_of_identities / nb_cov_res

            aln_dictionary[i]["info_ID"] = info_ID
            aln_dictionary[i]["ID"] = ID
            aln_dictionary[i]["cov_ID"] = cov_ID
            aln_dictionary[i]["template_info"] = template_info

        return aln_dictionary
示例#11
0
文件: hexTools.py 项目: tybiot/biskit
def createHexInp(recPdb,
                 recModel,
                 ligPdb,
                 ligModel,
                 comPdb=None,
                 outFile=None,
                 macDock=None,
                 silent=0,
                 sol=512):
    """
    Prepare a Hex macro file for the docking of the receptor(s)
    against ligand(s).

    @param recPdb: hex-formatted PDB
    @type  recPdb: str
    @param recModel: hex-formatted PDB
    @type  recModel: str
    @param ligPdb: PDBModel, get distances from this one
    @type  ligPdb: PDBModel
    @param ligModel: PDBModel, getdistances from this one
    @type  ligModel: PDBModel
    @param comPdb: reference PDB
    @type  comPdb: str
    @param outFile: base of file name for mac and out
    @type  outFile: str

    @param macDock: None -> hex decides (from the size of the molecule),
                    1 -> force macroDock, 0-> force off (default: None)
    @type  macDock: None|1|0
    @param silent: don't print distances and macro warnings (default: 0)
    @type  silent: 0|1
    @param sol: number of solutions that HEx should save (default: 512)
    @type  sol: int

    @return: HEX macro file name, HEX out generated bu the macro,
             macro docking status
    @rtype: str, str, boolean
    """
    ## files and names
    recCode = t.stripFilename(recPdb)[0:4]
    ligCode = t.stripFilename(ligPdb)[0:4]

    outFile = outFile or recCode + '-' + ligCode

    ## hex macro name
    macName = t.absfile(outFile + '_hex.mac')

    ## hex rotation matrix output name
    outName_all = t.absfile(outFile + '_hex.out')
    outName_clust = t.absfile(outFile + '_hex_cluster.out')

    ## add surface profiles if not there
    if not recModel.atoms.has_key('relAS'):
        #t.flushPrint('\nCalculating receptor surface profile')
        rec_asa = PDBDope(recModel)
        rec_asa.addSurfaceRacer()
    if not ligModel.atoms.has_key('relAS'):
        #t.flushPrint('\nCalculating ligand surface profile')
        lig_asa = PDBDope(ligModel)
        lig_asa.addSurfaceRacer()

    ## surface masks, > 95% exposed
    rec_surf_mask = N0.greater(recModel.profile('relAS'), 95)
    lig_surf_mask = N0.greater(ligModel.profile('relAS'), 95)

    ## maximun and medisn distance from centre of mass to any surface atom
    recMax, recMin = centerSurfDist(recModel, rec_surf_mask)
    ligMax, ligMin = centerSurfDist(ligModel, lig_surf_mask)

    ## approxinate max and min center to centre distance
    maxDist = recMax + ligMax
    minDist = recMin + ligMin

    ## molecular separation and search range to be used in the docking
    molSep = (maxDist + minDist) / 2
    molRange = 2 * (maxDist - molSep)

    if not silent:
        print 'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n' % (
            outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist, molSep,
            molRange)

    if recMax > 30 and ligMax > 30 and not silent:
        print '\nWARNING! Both the receptor and ligand radius is ',
        print 'greater than 30A.\n'

    ## determine docking mode to use
    macroDocking = 0

    if macDock == None:
        if recMax > 35 and not silent:
            print '\nReceptor has a radius that exceeds 35A ',
            print '-> Macro docking will be used'
            macroDocking = 1
    else:
        macroDocking = macDock

    #####################
    ## write macro file

    macOpen = open(macName, 'w')

    macOpen.write('# -- ' + macName + ' --\n')
    macOpen.write(' \n')
    macOpen.write('open_receptor ' + t.absfile(recPdb) + '\n')
    macOpen.write('open_ligand ' + t.absfile(ligPdb) + '\n')

    if comPdb and comPdb[-4:] == '.pdb':
        macOpen.write('open_complex ' + comPdb + '\n')

    macOpen.write('\n')

    head = """
# -------------- general settings ----------------
disc_cache 1                   # disc cache on (0 off)
docking_sort_mode 1            # Sort solutions by cluster (0 by energy)
docking_cluster_mode 1         # Display all clusters (0 display best)
docking_cluster_threshold 2.00
# docking_cluster_bumps  number

# ------------ molecule orientation --------------
molecule_separation %(separation)i
commit_view """ % ({
        'separation': round(molSep)
    })

    macro = """
# -------------- macro docking -------------------
macro_min_coverage 25
macro_sphere_radius 15
macro_docking_separation 25
activate_macro_model"""


    tail = """
# -------------- docking setup -------------------
docking_search_mode 0          # full rotational search

receptor_range_angle  180      # 0, 15, 30, 45, 60, 75, 90, 180
docking_receptor_samples 720   # 362, 492, 642, 720, 980, 1280

ligand_range_angle  180
docking_ligand_samples 720

twist_range_angle 360          # 0, 15, 30, 60, 90, 180, 360
docking_alpha_samples 128      # 64, 128, 256

r12_step 0.500000              # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2
r12_range %(range)i

docking_radial_filter 0        # Radial Envelope Filter - None

grid_size 0.600                # 0.4, 0.5, 0.6, 0.75, 1.0
# docking_electrostatics 0       # use only surface complimentarity
docking_electrostatics 1      # use electrostatic term for scoring clusters

docking_main_scan 16     # 
docking_main_search 26

max_docking_solutions %(nr_sol)i # number of solutions to save

# -------------- post-processing ----------------
docking_refine 0    # None
#  docking_refine 1    # Backbone Bumps
#  docking_refine 2    # MM energies
#  docking_refine 3    # MM minimization

# ---------------- run docking ------------------
activate_docking
#  save_docking %(output_clust)s
#  save_range 1 512 ./ dock .pdb

# ------------ also save all solutions ----------
docking_sort_mode 0            # Sort solutions by energy (1 by cluster)
save_docking %(output_all)s""" \
         %({'range':round(molRange), 'output_all':outName_all,
            'nr_sol':int(sol), 'output_clust':outName_clust} )

    macOpen.writelines(head)

    ## macro docking will not work with multiple models, if both are added to
    ## the hex macro file - macrodocking will be skipped during the docking run
    if macroDocking:
        macOpen.writelines(macro)

    macOpen.writelines(tail)

    macOpen.close()

    return macName, outName_all, macroDocking