示例#1
0
def contactsTreeWalkGenerator(namesTrain, vectorsTrain, namesBase, vectorsBase, minYesThreshold= 60, topPercent = 5):
	contactsTrainMolsDict = dict(zip(namesTrain, vectorsTrain))
	contactsTrainMolsTree = distanceMatrixToTree(getDistanceMatrix(namesTrain, vectorsTrain))
	#drawTree(contactsTrainMolsTree)
	distances = getTrainigToBaseSimilarityMatrix(contactsTrainMolsDict, namesBase, vectorsBase)

	for subset in findYesClades(contactsTrainMolsTree, minYesThreshold):
		#print(subset)
		topBaseLikeTrainigSetNames = getTopSimilarCompounds(distances, subset, topPer = topPercent)
		yield subset, topBaseLikeTrainigSetNames
示例#2
0
def chemTreeWalkGenerator(namesTrain, vectorsTrain, namesBase, vectorsBase, minYesThreshold= 75, topPercent = 2):
	chemTrainMolsDict = dict(zip(namesTrain, vectorsTrain))
	chemTrainMolsTree = distanceMatrixToTree(getDistanceMatrix(namesTrain, vectorsTrain))
	#drawTree(chemTrainMolsTree)
	#print(chemTrainMolsTree)
	############################ Base bitVectors #####################################

	distances = getTrainigToBaseSimilarityMatrix(chemTrainMolsDict, namesBase, vectorsBase)
	#maxSimilarNames = set()
	for subset in findYesClades(chemTrainMolsTree, minYesThreshold):
		#print(subset)
		topBaseLikeTrainigSetNames = getTopSimilarCompounds(distances, subset, topPer = topPercent)
		yield subset, topBaseLikeTrainigSetNames