Exemplo n.º 1
0
    def _convertDocumentsToVector(self):
        self.vectors, self.masks, self.docIds = [], [], []
        dimensions = TwoWayMap()
        for docId, document in self.documents:
            for w in document.split():
                if not dimensions.contains(Clustering.PHRASE_TO_DIMENSION, w):
                    dimensions.set(Clustering.PHRASE_TO_DIMENSION, w,
                                   len(dimensions))
        for docId, document in self.documents:
            vector = zeros(len(dimensions))
            for w in document.split():
                vector[dimensions.get(Clustering.PHRASE_TO_DIMENSION, w)] += 1
            self.vectors.append(vector)
            self.masks.append(ones(len(dimensions)))
            self.docIds.append(docId)


#        self.vectors = whiten(self.vectors)
        self.dimensions = dimensions
Exemplo n.º 2
0
 def setUp(self):
     self.twoWayMap = TwoWayMap()
     self.assertRaises(TypeError, self.twoWayMap.set, (5, 1, 2))
     self.twoWayMap.set(TwoWayMap.MAP_FORWARD, 'a', 'A')
     self.twoWayMap.set(TwoWayMap.MAP_REVERSE, 'B', 'b')