def _convertDocumentsToVector(self): self.vectors, self.masks, self.docIds = [], [], [] dimensions = TwoWayMap() for docId, document in self.documents: for w in document.split(): if not dimensions.contains(Clustering.PHRASE_TO_DIMENSION, w): dimensions.set(Clustering.PHRASE_TO_DIMENSION, w, len(dimensions)) for docId, document in self.documents: vector = zeros(len(dimensions)) for w in document.split(): vector[dimensions.get(Clustering.PHRASE_TO_DIMENSION, w)] += 1 self.vectors.append(vector) self.masks.append(ones(len(dimensions))) self.docIds.append(docId) # self.vectors = whiten(self.vectors) self.dimensions = dimensions
def setUp(self): self.twoWayMap = TwoWayMap() self.assertRaises(TypeError, self.twoWayMap.set, (5, 1, 2)) self.twoWayMap.set(TwoWayMap.MAP_FORWARD, 'a', 'A') self.twoWayMap.set(TwoWayMap.MAP_REVERSE, 'B', 'b')