def map_segments_to_clusters(x): # print('mapper: %s working on %s' % (os.getpid(), x)) ((filename, start, end, size), config) = x clusterer = Clusterer(**config) lines = FileSegmentReader.read(filename, start, end, size) clusters = clusterer.find(lines) return [(FIXED_MAP_JOB_KEY, clusters)]
def test(self): clusterer = Clusterer(k1=1, k2=1, max_dist=0.5, variables=[]) clusters = clusterer.find([ 'hello 1 y 3', 'hello 1 x 3', 'abc m n q', ]) self.assertEqual( clusters, [ [['hello', '1', 'y', '3'], 2, ['hello', '1', '---', '3']], [['abc', 'm', 'n', 'q'], 1, ['abc', 'm', 'n', 'q']] ] )
def test_min_members(self): clusterer = Clusterer( k1=1, k2=1, max_dist=0.5, variables=[], min_members=2) clusters = clusterer.find([ 'hello 1 y 3', 'hello 1 x 3', 'abc m n q', ]) self.assertEqual( clusters, [ [['hello', '1', 'y', '3'], 2, ['hello', '1', '---', '3']], ] )