Пример #1
0
    def literal_match(self, literal: Literal, surface: str):

        dtype = literal.datatype if hasattr(literal, "datatype") else None
        literal, surface = str(literal).strip(), str(surface).strip()

        score = 0.0
        if dtype:
            # Typed literals should match well

            if str(dtype) == str(self.DATETIME):
                try:
                    l = datetime.datetime.fromisoformat(literal).timestamp()

                    yearmatch = YEAR_PATTERN.match(surface)
                    if yearmatch:
                        year = int(yearmatch.groups()[0])
                        s = datetime.datetime(year, 1, 1).timestamp()
                    else:
                        try:
                            s = datetime.datetime.fromisoformat(
                                surface).timestamp()
                        except:
                            s = self._dateparse(surface).timestamp()
                    if s:
                        score = max(0, 1 - (abs(s - l) / (60 * 60 * 24 * 365)))

                        if score:
                            yield LiteralMatchResult(score, literal, dtype)
                            return
                #                         else:
                #                             log.debug(f"No date match ({l},{s}) = {score}")
                except Exception as e:
                    pass
            else:
                try:
                    s = float(surface.replace(",", ""))
                    l = float(literal.replace(",", ""))
                    score = max(0, 1 - (abs(s - l) / max(abs(s), abs(l))))
                    if score > 0.95:
                        yield LiteralMatchResult(score, literal, dtype)
                        return
                except Exception as e:
                    pass

            score = bool(surface.lower() == literal.lower())

        elif surface and literal:
            # Strings may match approximately
            if self.stringmatch == "jaccard":
                stok, ltok = set(surface.lower().split()), set(
                    literal.lower().split())
                if stok and ltok:
                    score = len(stok & ltok) / len(stok | ltok)
            elif self.stringmatch == "levenshtein":
                import Levenshtein

                slow, llow = surface.lower(), literal.lower()
                if slow and llow:
                    m = min(len(slow), len(llow))
                    score = max(0, (m - Levenshtein.distance(slow, llow)) / m)

        if score:
            yield LiteralMatchResult(score, literal, dtype)