def literal_match(self, literal: Literal, surface: str): dtype = literal.datatype if hasattr(literal, "datatype") else None literal, surface = str(literal).strip(), str(surface).strip() score = 0.0 if dtype: # Typed literals should match well if str(dtype) == str(self.DATETIME): try: l = datetime.datetime.fromisoformat(literal).timestamp() yearmatch = YEAR_PATTERN.match(surface) if yearmatch: year = int(yearmatch.groups()[0]) s = datetime.datetime(year, 1, 1).timestamp() else: try: s = datetime.datetime.fromisoformat( surface).timestamp() except: s = self._dateparse(surface).timestamp() if s: score = max(0, 1 - (abs(s - l) / (60 * 60 * 24 * 365))) if score: yield LiteralMatchResult(score, literal, dtype) return # else: # log.debug(f"No date match ({l},{s}) = {score}") except Exception as e: pass else: try: s = float(surface.replace(",", "")) l = float(literal.replace(",", "")) score = max(0, 1 - (abs(s - l) / max(abs(s), abs(l)))) if score > 0.95: yield LiteralMatchResult(score, literal, dtype) return except Exception as e: pass score = bool(surface.lower() == literal.lower()) elif surface and literal: # Strings may match approximately if self.stringmatch == "jaccard": stok, ltok = set(surface.lower().split()), set( literal.lower().split()) if stok and ltok: score = len(stok & ltok) / len(stok | ltok) elif self.stringmatch == "levenshtein": import Levenshtein slow, llow = surface.lower(), literal.lower() if slow and llow: m = min(len(slow), len(llow)) score = max(0, (m - Levenshtein.distance(slow, llow)) / m) if score: yield LiteralMatchResult(score, literal, dtype)