Exemplo n.º 1
class ControlTypeLatentTopic:
    """Control entity whose function is to be a layer between the user and TypeLatentTopic model, 
    also it will be the only thing the user can see and use to access and work with the model.

    Asymmetric Factorization
    X  = F1*H = X*W1*H
    X' = F2*H = X'W2*H
    id : int,
        ID of the TypeLatentTopic
    name: String 
        Name of the TypeLatentTopic. 
    abreviature: String
        Abreviature of the TypeLatentTopic.
    LD: List of String or Documents, optional
        List of Documents 
    H: array or matrix, 
        Representation Matrix.
    F1: array or matrix, 
        Basis Matrix.
    W1: array or matrix, 
        Basis Matrix in terms of the documents (Documents vs Latent Topics).
    F2: array or matrix, 
        Basis Matrix of the second matrix on the Asymmetric Factorization.
    W2: array or matrix, 
        Basis Matrix in terms of the documents (Documents vs Latent Topics) on the Asymmetric Factorization.
        arrayControlLatentTopics : List of ControlLatentTopics who are correlated for the same list of documents.
        typeLatentTopic : TypeLatentTopic entity. 	
    def __init__(self, id, name, abreviature, LD, H, F1, W1, F2, W2):
        #Review matrix type.
        if not self.correctClass(H):
            raise ClassError("H matrix is not a numpy array or a numpy matrix")
        if not self.correctClass(F1):
            raise ClassError("F matrix is not a numpy array or a numpy matrix")
        if not self.correctClass(LD):
            raise ClassError("The list of document is not a numpy array or a numpy matrix")
        if W1!=None:
            if not self.correctClass(W1):
                raise ClassError("W matrix is not a numpy array or a numpy matrix")
        if F2!=None:
            if not self.correctClass(F2):
                raise ClassError("Assymetric F matrix is not a numpy array or a numpy matrix")
        if W2!=None:
            if not self.correctClass(W2):
                raise ClassError("Assymetric W matrix is not a numpy array or a numpy matrix")
        if(LD.shape[0] < LD.shape[1]):
            LD = np.transpose(LD)
        if(LD.shape[1] != 1):
            raise SizeError("Document List is not a vector")
        #Review the dimensions of each matrix.
        if not self.correctDimensions(LD, H, F1, W1, F2, W2):
            raise SizeError("Document List is not a vector")

        #Attributes initialization.  
        LLD =[]
        for i in  LD.tolist():
        self.__typeLatentTopic=TypeLatentTopic(id, name, abreviature, self.createDictionary(LLD))
        #ControlLatentTopics creation.
        for i in xrange(H.shape[0]):
            belongingVector = self.normalize(H[i]).tolist()
            sortedbelongingVector = self.sortVector(self.normalize(H[i])).tolist()
            representativeWords = self.normalize(np.transpose(F1)[i]).tolist()
            sortedIndexRepresentativeWords = self.sortVector(np.array(np.transpose(F1)[i])).tolist()
            if W1!= None:
                representativeDocuments = self.normalize(np.transpose(W1)[i]).tolist()
                sortedIndexRepresentativeDocuments = self.sortVector(np.transpose(W1)[i]).tolist()
                representativeDocuments = None
                sortedIndexRepresentativeDocuments = None
            if F2!=None:
                resumeWords = self.normalize(np.transpose(F2)[i]).tolist()
                sortedIndexResumeWords = self.sortVector(np.transpose(F2)[i]).tolist()
                resumeWords = None
                sortedIndexResumeWords = None
            if W2!=None:
                resumeDocuments = self.normalize(np.transpose(W2)[i]).tolist()
                sortedIndexResumeDocuments = self.sortVector(np.transpose(W2)[i]).tolist()
                resumeDocuments = None
                sortedIndexResumeDocuments = None   
            CLT=ControlLatentTopic(i, belongingVector, sortedbelongingVector, representativeWords, sortedIndexRepresentativeWords, representativeDocuments, sortedIndexRepresentativeDocuments, resumeWords, sortedIndexResumeWords, resumeDocuments, sortedIndexResumeDocuments, self)

    def correctDimensions(self, LD, H, F1, W1, F2, W2):
        """ Check if the matrix dimensions are related to each.
                True if the dimensions between matrix are ok, False otherwise. 
        mi = min(H.shape[0],F1.shape[1])
        ma = max(H.shape[0],F1.shape[1])
            mi = min(mi,W1.shape[1])
            ma = max(ma,W1.shape[1])
            mi = min(mi,F2.shape[1])
            ma = max(ma,F2.shape[1])
            mi = min(mi,W2.shape[1])
            ma = max(ma,W2.shape[1])
            return False
        mi = min(H.shape[1],LD.shape[0])
        ma = max(H.shape[1],LD.shape[0])
            mi = min(mi,W1.shape[0])
            ma = max(ma,W1.shape[0])
            mi = min(mi,W2.shape[0])
            ma = max(ma,W2.shape[0])
            return False
        return True

    def correctClass(self, M):
        """ Check the type of the parameter.
                True if the parameter M is a matrix or an array, False otherwise. 
        if type(M) == type(np.array([])) or type(M) == type(np.matrix([])):
            return True
            return False

    def normalize(self, M):
        """ Normalize M vector.
                M vector with |M|=1. 
        if self.correctClass(M):
	    return Mt
            raise ClassError("The given matrix is not a numpy matrx or a nupy array")

    def sortVector(self, M):
        """ Sort the index of a M vector depending on M.
                Vector with the sorted index. 
        if self.correctClass(M):
            return np.argsort(M) 
            raise ClassError("The given matrix is not a numpy matrx or a nupy array")

    def createDictionary(self, LD):
        """ Create a dictionary mapping between the list of documents and his position.
            Where the key is LD[i] and the object is the number i, which is the id-column of the 
            ith document of the representation matrix.   
        return  dict(zip(LD, x))

    def getDictionary(self,imaged):
        """ Given a id of a document check on the dictionary his position. 
               i if the parameter imaged is a key of the dictionary, None otherwise.
        return  self.__typeLatentTopic.getDictionary(imaged)

    def getLatentTopicsForImg(self,identificador):
        """ Return a sorted list of ControlLatenTopics. The other depends of the belong degree of the Latent Topic to the document.
        return sorted(self.__arrayControlLatentTopics, key=lambda ControlLatentTopic: ControlLatentTopic.getBelongingDegree(identificador), reverse=True) 

    def getMostImportantLatentTopicForImg(self,identificador):
        """ Return a ControlLatenTopics with the highest belong degree of the Latent Topics to the document."""
        return max(self.__arrayControlLatentTopics, key=lambda ControlLatentTopic: ControlLatentTopic.getBelongingDegree(identificador))

    def getControlArrayLatentTopics(self):
        """ Return the list of ControlLatenTopics."""
        return self.__arrayControlLatentTopics

    def getSizeDictionary(self):
        """ Return the length of the dictionary."""
        return self.__typeLatentTopic.getSizeDictionary() 

    def setTypeLatantTopicId(selsf, ids):
        """ Change the TypeLatentTopic id."""

    def setTypeLatantTopicName(self, name):
        """ Change the TypeLatentTopic name."""

    def setTypeLatantTopicAbreviature(self, Abreviature):
        """ Change the TypeLatentTopic abreviature."""

    def getTypeLatantTopicId(self):
        """ Get the TypeLatentTopic id."""
        return self.__typeLatentTopic.getId()

    def getTypeLatantTopicName(self):
        """ Get the TypeLatentTopic name."""
        return self.__typeLatentTopic.getName()

    def getTypeLatantTopicAbreviature(self):
        """ Get the TypeLatentTopic abreviature."""
        return self.__typeLatentTopic.getAbreviature()
Exemplo n.º 2
    def __init__(self, id, name, abreviature, LD, H, F1, W1, F2, W2):
        #Review matrix type.
        if not self.correctClass(H):
            raise ClassError("H matrix is not a numpy array or a numpy matrix")
        if not self.correctClass(F1):
            raise ClassError("F matrix is not a numpy array or a numpy matrix")
        if not self.correctClass(LD):
            raise ClassError("The list of document is not a numpy array or a numpy matrix")
        if W1!=None:
            if not self.correctClass(W1):
                raise ClassError("W matrix is not a numpy array or a numpy matrix")
        if F2!=None:
            if not self.correctClass(F2):
                raise ClassError("Assymetric F matrix is not a numpy array or a numpy matrix")
        if W2!=None:
            if not self.correctClass(W2):
                raise ClassError("Assymetric W matrix is not a numpy array or a numpy matrix")
        if(LD.shape[0] < LD.shape[1]):
            LD = np.transpose(LD)
        if(LD.shape[1] != 1):
            raise SizeError("Document List is not a vector")
        #Review the dimensions of each matrix.
        if not self.correctDimensions(LD, H, F1, W1, F2, W2):
            raise SizeError("Document List is not a vector")

        #Attributes initialization.  
        LLD =[]
        for i in  LD.tolist():
        self.__typeLatentTopic=TypeLatentTopic(id, name, abreviature, self.createDictionary(LLD))
        #ControlLatentTopics creation.
        for i in xrange(H.shape[0]):
            belongingVector = self.normalize(H[i]).tolist()
            sortedbelongingVector = self.sortVector(self.normalize(H[i])).tolist()
            representativeWords = self.normalize(np.transpose(F1)[i]).tolist()
            sortedIndexRepresentativeWords = self.sortVector(np.array(np.transpose(F1)[i])).tolist()
            if W1!= None:
                representativeDocuments = self.normalize(np.transpose(W1)[i]).tolist()
                sortedIndexRepresentativeDocuments = self.sortVector(np.transpose(W1)[i]).tolist()
                representativeDocuments = None
                sortedIndexRepresentativeDocuments = None
            if F2!=None:
                resumeWords = self.normalize(np.transpose(F2)[i]).tolist()
                sortedIndexResumeWords = self.sortVector(np.transpose(F2)[i]).tolist()
                resumeWords = None
                sortedIndexResumeWords = None
            if W2!=None:
                resumeDocuments = self.normalize(np.transpose(W2)[i]).tolist()
                sortedIndexResumeDocuments = self.sortVector(np.transpose(W2)[i]).tolist()
                resumeDocuments = None
                sortedIndexResumeDocuments = None   
            CLT=ControlLatentTopic(i, belongingVector, sortedbelongingVector, representativeWords, sortedIndexRepresentativeWords, representativeDocuments, sortedIndexRepresentativeDocuments, resumeWords, sortedIndexResumeWords, resumeDocuments, sortedIndexResumeDocuments, self)