def lookup( self, source_language, target_language, text: str, user, project, use_shared ): # Basic similarity for short strings length = len(text) threshold = 0.5 # Adjust similarity based on string length to get more relevant matches # for long strings if length > 50: threshold = 1 - 28.1838 * math.log(0.0443791 * length) / length adjust_similarity_threshold(threshold) # Actual database query return self.filter_type( # Type filtering user=user, project=project, use_shared=use_shared, from_file=True, ).filter( # Full-text search on source source__search=text, # Language filtering source_language=source_language, target_language=target_language, )[ :50 ]
def download_translations( self, source, language, text: str, unit, user, search: bool, threshold: int = 75, ): """Download list of possible translations from a service.""" # Filter based on user access if user: base = Unit.objects.filter_access(user) else: base = Unit.objects.all() # Use memory_db for the query in case it exists. This is supposed # to be a read-only replica for offloading expensive translation # queries. if "memory_db" in settings.DATABASES: base = base.using("memory_db") matching_units = base.filter( source__search=text, translation__component__source_language=source, translation__language=language, state__gte=STATE_TRANSLATED, ).prefetch() # We want only close matches here adjust_similarity_threshold(0.95) for munit in matching_units: source = munit.source_string if "forbidden" in munit.all_flags: continue quality = self.comparer.similarity(text, source) if quality < 10 or (quality < threshold and not search): continue yield { "text": munit.get_target_plurals()[0], "quality": quality, "service": self.name, "origin": str(munit.translation.component), "origin_url": munit.get_absolute_url(), "source": source, }
def download_batch_strings( self, source, language, units, texts: Set[str], user=None, threshold: int = 75 ): if user: base = Unit.objects.filter_access(user) else: base = Unit.objects.all() query = reduce(lambda x, y: x | Q(source__search=y), texts, Q()) matching_units = base.filter( query, translation__component__source_language=source, translation__language=language, state__gte=STATE_TRANSLATED, ).only("source", "target") # We want only close matches here adjust_similarity_threshold(0.95) for unit in matching_units: yield unit.source_string, unit.get_target_plurals()[0]
def download_translations( self, source, language, text: str, unit, user, search: bool, threshold: int = 75, ): """Download list of possible translations from a service.""" if user: base = Unit.objects.filter_access(user) else: base = Unit.objects.all() matching_units = base.filter( source__search=text, translation__component__source_language=source, translation__language=language, state__gte=STATE_TRANSLATED, ).prefetch() # We want only close matches here adjust_similarity_threshold(0.95) for munit in matching_units: source = munit.source_string if "forbidden" in munit.all_flags: continue quality = self.comparer.similarity(text, source) if quality < 10 or (quality < threshold and not search): continue yield { "text": munit.get_target_plurals()[0], "quality": quality, "service": self.name, "origin": str(munit.translation.component), "origin_url": munit.get_absolute_url(), "source": source, }