Пример #1
0
 def measure (self, stringset):
     """ Qualify measures.
     """
     measures = {}
     lass = containers.LengthAscendingStrings (stringset)
     edit_array2d = leven.edit_distance_array2d (lass)
     jaccdist_array2d = jaccard.jaccard_array2d(lass)['distance']
     size = len(edit_array2d)
     avg_edit = edit_array2d.sum() / float(size*(size-1))
     avg_jaccdist = jaccdist_array2d.sum() / float(size*(size-1))
     measures['EditAvg'] = avg_edit
     measures['JaccDistAvg'] = avg_jaccdist
     measures['Size'] = size
     #measures['timing'] =
     return measures
Пример #2
0
    def __init__ (self, strings, **kwargs):

        # Re-ordered strings (read-only)
        # Properties: lenbounds[OrderedDict]
        self.lass = containers.LengthAscendingStrings (strings)

        # Container[dict mapping int to set] for cluster manipulation
        # Properties: objs[TurboList], cids[list],
        # unclustered_objs[TurboList], merge()
        self.clusters = containers.Clusters (self.lass)

        # Corresponding distance metrics
        self.editdist = leven.edit_distance_array2d (self.lass)
        # Jaccard or charset related metrics
        self.jaccarrays = jaccard.jaccard_array2d(self.lass)