def __init__(self, text): self.text = text if len(text) < 32: ##self.hash = {"type": "textmatch", "value": text} self.hash = {"type": "simhash", "value": Simhash(text).value} elif len(text) < 256: self.hash = {"type": "simhash", "value": Simhash(text).value} else: self.hash = {"type": "shingleprint", "value": ShinglePrint(text).features}
def html(self, value): self._html = value self._html_hash = Simhash(value)