Пример #1
0
def map_segments_to_clusters(x):
    # print('mapper: %s working on %s' % (os.getpid(), x))
    ((filename, start, end, size), config) = x
    clusterer = Clusterer(**config)
    lines = FileSegmentReader.read(filename, start, end, size)
    clusters = clusterer.find(lines)
    return [(FIXED_MAP_JOB_KEY, clusters)]
Пример #2
0
 def test(self):
     clusterer = Clusterer(k1=1, k2=1, max_dist=0.5, variables=[])
     clusters = clusterer.find([
         'hello 1 y 3',
         'hello 1 x 3',
         'abc m n q',
     ])
     self.assertEqual(
         clusters,
         [
             [['hello', '1', 'y', '3'], 2, ['hello', '1', '---', '3']],
             [['abc', 'm', 'n', 'q'], 1, ['abc', 'm', 'n', 'q']]
         ]
     )
Пример #3
0
 def test_min_members(self):
     clusterer = Clusterer(
         k1=1, k2=1, max_dist=0.5, variables=[], min_members=2)
     clusters = clusterer.find([
         'hello 1 y 3',
         'hello 1 x 3',
         'abc m n q',
     ])
     self.assertEqual(
         clusters,
         [
             [['hello', '1', 'y', '3'], 2, ['hello', '1', '---', '3']],
         ]
     )