示例#1
0
 def lookup(
     self, source_language, target_language, text: str, user, project, use_shared
 ):
     # Basic similarity for short strings
     length = len(text)
     threshold = 0.5
     # Adjust similarity based on string length to get more relevant matches
     # for long strings
     if length > 50:
         threshold = 1 - 28.1838 * math.log(0.0443791 * length) / length
     adjust_similarity_threshold(threshold)
     # Actual database query
     return self.filter_type(
         # Type filtering
         user=user,
         project=project,
         use_shared=use_shared,
         from_file=True,
     ).filter(
         # Full-text search on source
         source__search=text,
         # Language filtering
         source_language=source_language,
         target_language=target_language,
     )[
         :50
     ]
示例#2
0
    def download_translations(
        self,
        source,
        language,
        text: str,
        unit,
        user,
        search: bool,
        threshold: int = 75,
    ):
        """Download list of possible translations from a service."""
        # Filter based on user access
        if user:
            base = Unit.objects.filter_access(user)
        else:
            base = Unit.objects.all()

        # Use memory_db for the query in case it exists. This is supposed
        # to be a read-only replica for offloading expensive translation
        # queries.
        if "memory_db" in settings.DATABASES:
            base = base.using("memory_db")

        matching_units = base.filter(
            source__search=text,
            translation__component__source_language=source,
            translation__language=language,
            state__gte=STATE_TRANSLATED,
        ).prefetch()

        # We want only close matches here
        adjust_similarity_threshold(0.95)

        for munit in matching_units:
            source = munit.source_string
            if "forbidden" in munit.all_flags:
                continue
            quality = self.comparer.similarity(text, source)
            if quality < 10 or (quality < threshold and not search):
                continue
            yield {
                "text": munit.get_target_plurals()[0],
                "quality": quality,
                "service": self.name,
                "origin": str(munit.translation.component),
                "origin_url": munit.get_absolute_url(),
                "source": source,
            }
示例#3
0
    def download_batch_strings(
        self, source, language, units, texts: Set[str], user=None, threshold: int = 75
    ):
        if user:
            base = Unit.objects.filter_access(user)
        else:
            base = Unit.objects.all()
        query = reduce(lambda x, y: x | Q(source__search=y), texts, Q())
        matching_units = base.filter(
            query,
            translation__component__source_language=source,
            translation__language=language,
            state__gte=STATE_TRANSLATED,
        ).only("source", "target")

        # We want only close matches here
        adjust_similarity_threshold(0.95)

        for unit in matching_units:
            yield unit.source_string, unit.get_target_plurals()[0]
示例#4
0
    def download_translations(
        self,
        source,
        language,
        text: str,
        unit,
        user,
        search: bool,
        threshold: int = 75,
    ):
        """Download list of possible translations from a service."""
        if user:
            base = Unit.objects.filter_access(user)
        else:
            base = Unit.objects.all()
        matching_units = base.filter(
            source__search=text,
            translation__component__source_language=source,
            translation__language=language,
            state__gte=STATE_TRANSLATED,
        ).prefetch()

        # We want only close matches here
        adjust_similarity_threshold(0.95)

        for munit in matching_units:
            source = munit.source_string
            if "forbidden" in munit.all_flags:
                continue
            quality = self.comparer.similarity(text, source)
            if quality < 10 or (quality < threshold and not search):
                continue
            yield {
                "text": munit.get_target_plurals()[0],
                "quality": quality,
                "service": self.name,
                "origin": str(munit.translation.component),
                "origin_url": munit.get_absolute_url(),
                "source": source,
            }