class ControlTypeLatentTopic: """Control entity whose function is to be a layer between the user and TypeLatentTopic model, also it will be the only thing the user can see and use to access and work with the model. Asymmetric Factorization X = F1*H = X*W1*H X' = F2*H = X'W2*H Parameters ---------- id : int, ID of the TypeLatentTopic name: String Name of the TypeLatentTopic. abreviature: String Abreviature of the TypeLatentTopic. LD: List of String or Documents, optional List of Documents H: array or matrix, Representation Matrix. F1: array or matrix, Basis Matrix. W1: array or matrix, Basis Matrix in terms of the documents (Documents vs Latent Topics). F2: array or matrix, Basis Matrix of the second matrix on the Asymmetric Factorization. W2: array or matrix, Basis Matrix in terms of the documents (Documents vs Latent Topics) on the Asymmetric Factorization. Attributes ---------- arrayControlLatentTopics : List of ControlLatentTopics who are correlated for the same list of documents. typeLatentTopic : TypeLatentTopic entity. """ def __init__(self, id, name, abreviature, LD, H, F1, W1, F2, W2): #Review matrix type. if not self.correctClass(H): raise ClassError("H matrix is not a numpy array or a numpy matrix") if not self.correctClass(F1): raise ClassError("F matrix is not a numpy array or a numpy matrix") if not self.correctClass(LD): raise ClassError("The list of document is not a numpy array or a numpy matrix") if W1!=None: if not self.correctClass(W1): raise ClassError("W matrix is not a numpy array or a numpy matrix") if F2!=None: if not self.correctClass(F2): raise ClassError("Assymetric F matrix is not a numpy array or a numpy matrix") if W2!=None: if not self.correctClass(W2): raise ClassError("Assymetric W matrix is not a numpy array or a numpy matrix") if(LD.shape[0] < LD.shape[1]): LD = np.transpose(LD) if(LD.shape[1] != 1): raise SizeError("Document List is not a vector") #Review the dimensions of each matrix. if not self.correctDimensions(LD, H, F1, W1, F2, W2): raise SizeError("Document List is not a vector") #Attributes initialization. self.__arrayControlLatentTopics=[] LLD =[] for i in LD.tolist(): LLD.append(i[0][0]) self.__typeLatentTopic=TypeLatentTopic(id, name, abreviature, self.createDictionary(LLD)) #ControlLatentTopics creation. for i in xrange(H.shape[0]): belongingVector = self.normalize(H[i]).tolist() sortedbelongingVector = self.sortVector(self.normalize(H[i])).tolist() sortedbelongingVector.reverse() representativeWords = self.normalize(np.transpose(F1)[i]).tolist() sortedIndexRepresentativeWords = self.sortVector(np.array(np.transpose(F1)[i])).tolist() sortedIndexRepresentativeWords.reverse() if W1!= None: representativeDocuments = self.normalize(np.transpose(W1)[i]).tolist() sortedIndexRepresentativeDocuments = self.sortVector(np.transpose(W1)[i]).tolist() sortedIndexRepresentativeDocuments.reverse() else: representativeDocuments = None sortedIndexRepresentativeDocuments = None if F2!=None: resumeWords = self.normalize(np.transpose(F2)[i]).tolist() sortedIndexResumeWords = self.sortVector(np.transpose(F2)[i]).tolist() sortedIndexResumeWords.reverse() else: resumeWords = None sortedIndexResumeWords = None if W2!=None: resumeDocuments = self.normalize(np.transpose(W2)[i]).tolist() sortedIndexResumeDocuments = self.sortVector(np.transpose(W2)[i]).tolist() sortedIndexResumeDocuments.reverse() else: resumeDocuments = None sortedIndexResumeDocuments = None CLT=ControlLatentTopic(i, belongingVector, sortedbelongingVector, representativeWords, sortedIndexRepresentativeWords, representativeDocuments, sortedIndexRepresentativeDocuments, resumeWords, sortedIndexResumeWords, resumeDocuments, sortedIndexResumeDocuments, self) self.__arrayControlLatentTopics.append(CLT) def correctDimensions(self, LD, H, F1, W1, F2, W2): """ Check if the matrix dimensions are related to each. return: True if the dimensions between matrix are ok, False otherwise. """ mi = min(H.shape[0],F1.shape[1]) ma = max(H.shape[0],F1.shape[1]) if(W1!=None): mi = min(mi,W1.shape[1]) ma = max(ma,W1.shape[1]) if(F2!=None): mi = min(mi,F2.shape[1]) ma = max(ma,F2.shape[1]) if(W2!=None): mi = min(mi,W2.shape[1]) ma = max(ma,W2.shape[1]) if(mi!=ma): return False mi = min(H.shape[1],LD.shape[0]) ma = max(H.shape[1],LD.shape[0]) if(W1!=None): mi = min(mi,W1.shape[0]) ma = max(ma,W1.shape[0]) if(W2!=None): mi = min(mi,W2.shape[0]) ma = max(ma,W2.shape[0]) if(mi!=ma): return False return True def correctClass(self, M): """ Check the type of the parameter. return: True if the parameter M is a matrix or an array, False otherwise. """ if type(M) == type(np.array([])) or type(M) == type(np.matrix([])): return True else: return False def normalize(self, M): """ Normalize M vector. return: M vector with |M|=1. """ if self.correctClass(M): s=sum(M) Mt=M/s return Mt else: raise ClassError("The given matrix is not a numpy matrx or a nupy array") def sortVector(self, M): """ Sort the index of a M vector depending on M. return: Vector with the sorted index. """ if self.correctClass(M): return np.argsort(M) else: raise ClassError("The given matrix is not a numpy matrx or a nupy array") def createDictionary(self, LD): """ Create a dictionary mapping between the list of documents and his position. Where the key is LD[i] and the object is the number i, which is the id-column of the ith document of the representation matrix. """ x=range(len(LD)) return dict(zip(LD, x)) def getDictionary(self,imaged): """ Given a id of a document check on the dictionary his position. return: i if the parameter imaged is a key of the dictionary, None otherwise. """ return self.__typeLatentTopic.getDictionary(imaged) def getLatentTopicsForImg(self,identificador): """ Return a sorted list of ControlLatenTopics. The other depends of the belong degree of the Latent Topic to the document. """ return sorted(self.__arrayControlLatentTopics, key=lambda ControlLatentTopic: ControlLatentTopic.getBelongingDegree(identificador), reverse=True) def getMostImportantLatentTopicForImg(self,identificador): """ Return a ControlLatenTopics with the highest belong degree of the Latent Topics to the document.""" return max(self.__arrayControlLatentTopics, key=lambda ControlLatentTopic: ControlLatentTopic.getBelongingDegree(identificador)) def getControlArrayLatentTopics(self): """ Return the list of ControlLatenTopics.""" return self.__arrayControlLatentTopics def getSizeDictionary(self): """ Return the length of the dictionary.""" return self.__typeLatentTopic.getSizeDictionary() def setTypeLatantTopicId(selsf, ids): """ Change the TypeLatentTopic id.""" self.__typeLatentTopic.setId(ids) def setTypeLatantTopicName(self, name): """ Change the TypeLatentTopic name.""" self.__typeLatentTopic.setName(name) def setTypeLatantTopicAbreviature(self, Abreviature): """ Change the TypeLatentTopic abreviature.""" self.__typeLatentTopic.setAbreviature(Abreviature) def getTypeLatantTopicId(self): """ Get the TypeLatentTopic id.""" return self.__typeLatentTopic.getId() def getTypeLatantTopicName(self): """ Get the TypeLatentTopic name.""" return self.__typeLatentTopic.getName() def getTypeLatantTopicAbreviature(self): """ Get the TypeLatentTopic abreviature.""" return self.__typeLatentTopic.getAbreviature()
def __init__(self, id, name, abreviature, LD, H, F1, W1, F2, W2): #Review matrix type. if not self.correctClass(H): raise ClassError("H matrix is not a numpy array or a numpy matrix") if not self.correctClass(F1): raise ClassError("F matrix is not a numpy array or a numpy matrix") if not self.correctClass(LD): raise ClassError("The list of document is not a numpy array or a numpy matrix") if W1!=None: if not self.correctClass(W1): raise ClassError("W matrix is not a numpy array or a numpy matrix") if F2!=None: if not self.correctClass(F2): raise ClassError("Assymetric F matrix is not a numpy array or a numpy matrix") if W2!=None: if not self.correctClass(W2): raise ClassError("Assymetric W matrix is not a numpy array or a numpy matrix") if(LD.shape[0] < LD.shape[1]): LD = np.transpose(LD) if(LD.shape[1] != 1): raise SizeError("Document List is not a vector") #Review the dimensions of each matrix. if not self.correctDimensions(LD, H, F1, W1, F2, W2): raise SizeError("Document List is not a vector") #Attributes initialization. self.__arrayControlLatentTopics=[] LLD =[] for i in LD.tolist(): LLD.append(i[0][0]) self.__typeLatentTopic=TypeLatentTopic(id, name, abreviature, self.createDictionary(LLD)) #ControlLatentTopics creation. for i in xrange(H.shape[0]): belongingVector = self.normalize(H[i]).tolist() sortedbelongingVector = self.sortVector(self.normalize(H[i])).tolist() sortedbelongingVector.reverse() representativeWords = self.normalize(np.transpose(F1)[i]).tolist() sortedIndexRepresentativeWords = self.sortVector(np.array(np.transpose(F1)[i])).tolist() sortedIndexRepresentativeWords.reverse() if W1!= None: representativeDocuments = self.normalize(np.transpose(W1)[i]).tolist() sortedIndexRepresentativeDocuments = self.sortVector(np.transpose(W1)[i]).tolist() sortedIndexRepresentativeDocuments.reverse() else: representativeDocuments = None sortedIndexRepresentativeDocuments = None if F2!=None: resumeWords = self.normalize(np.transpose(F2)[i]).tolist() sortedIndexResumeWords = self.sortVector(np.transpose(F2)[i]).tolist() sortedIndexResumeWords.reverse() else: resumeWords = None sortedIndexResumeWords = None if W2!=None: resumeDocuments = self.normalize(np.transpose(W2)[i]).tolist() sortedIndexResumeDocuments = self.sortVector(np.transpose(W2)[i]).tolist() sortedIndexResumeDocuments.reverse() else: resumeDocuments = None sortedIndexResumeDocuments = None CLT=ControlLatentTopic(i, belongingVector, sortedbelongingVector, representativeWords, sortedIndexRepresentativeWords, representativeDocuments, sortedIndexRepresentativeDocuments, resumeWords, sortedIndexResumeWords, resumeDocuments, sortedIndexResumeDocuments, self) self.__arrayControlLatentTopics.append(CLT)