Пример #1
0
def cull_main(similarities, thresholdPercentage, representativeChains, adjType, representativesReverse={}):
    """Perform the PDB redundancy removal.

    @param similarities: A record of the percentage sequence identity between the chains/entries up for culling.
    @type similarities : dictionary or file name (string)
    @param thresholdPercentage: The maximum permissible percentage sequence identity that any two chains/entries may possess.
    @type thresholdPercentage : float
    
    """

    # Create the sparsematrix of the protein similarity graph.
    if adjType == 'chain':
        adjacent, proteinNames = adjlistcreation.pdb_chain_main(similarities, thresholdPercentage, representativeChains)
    elif adjType == 'entry':
        adjacent, proteinNames = adjlistcreation.pdb_entry_main(similarities, thresholdPercentage, representativeChains, representativesReverse)
    
    # Choose which proteins to remove from the similarity graph.
    if proteinNames == []:
        # This is True if there are no similarities greater than the given percentage sequence identity. If there are no
        # chains that are too similar, then there is no need to cull any chains from the network.
        proteinsToCull = []
    else:
        # Choose which chains to remove from the similarity graph.
        proteinsToCull, proteinsToKeep = Leafcull.main(adjacent, proteinNames)

    return proteinsToCull
Пример #2
0
def cull_main(
    similarities,
    thresholdPercentage,
    representativeChains,
    adjType,
    representativesReverse={},
    verboseOutput=False,
    startTime=0,
):
    """Perform the PDB redundancy removal.

    @param similarities: A record of the percentage sequence identity between the chains/entries up for culling.
    @type similarities : file name (string)
    @param thresholdPercentage: The maximum permissible percentage sequence identity that any two chains/entries may possess.
    @type thresholdPercentage : float
    @param representativeChains: The names of the chains/entries that will compose the protein similarity graph.
    @type representativeChains:  set
    @param adjType: 'chain' or 'entry' indicating the type of culling to be performed.
    @type adjType:  string
    @param representativesReverse: A mapping of representative chains to the chains that they represent.
    @type representativesReverse:  dictionary
    @param verboseOutput: Whether status updates should be printed out to the user.
    @type verboseOutput:  boolean
    @param startTime: The time when the culling began. Used to output elapsed time.
    @type startTime:  integer
    return @type: list
    return @use:  The redundant proteins to be removed from teh dataset.
    
    """

    # Create the sparsematrix of the protein similarity graph.
    if verboseOutput:
        print "Creating the adjacency matrix. Time elapsed: ", time.time() - startTime
    if adjType == "chain":
        adjacent, proteinNames = adjlistcreation.pdb_chain_main(similarities, thresholdPercentage, representativeChains)
    elif adjType == "entry":
        adjacent, proteinNames = adjlistcreation.pdb_entry_main(
            similarities, thresholdPercentage, representativeChains, representativesReverse
        )

    # Choose which proteins to remove from the similarity graph.
    if proteinNames == []:
        if verboseOutput:
            print "No similarities found. Culling not needed. Time elapsed: ", time.time() - startTime
        # This is True if there are no similarities greater than the given percentage sequence identity. If there are no
        # chains that are too similar, then there is no need to cull any chains from the network.
        proteinsToCull = []
    else:
        if verboseOutput:
            print "Performing the culling. Time elapsed: ", time.time() - startTime
        # Choose which chains to remove from the similarity graph.
        proteinsToCull, proteinsToKeep = Leafcull.main(adjacent, proteinNames)

    if verboseOutput:
        print "Culling finished. Time elapsed: ", time.time() - startTime

    return proteinsToCull