示例#1
0
文件: unit.py 项目: tbikeev/weblate
    def more_like_this(self, unit, top=5):
        '''
        Finds closely similar units.
        '''
        index = FULLTEXT_INDEX.source_searcher()
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            if len(results) == 0:
                return self.none()
            first_hit = results[0]
            # Find similar results to first one
            more_results = first_hit.more_like_this(
                'source',
                source_string,
                top
            )
            # Include all more like this results
            for result in more_results:
                checksums.add(result['checksum'])
            # Remove all original matches
            for result in results:
                checksums.discard(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
示例#2
0
    def search(self, query, source=True, context=True, translation=True, checksums=False):
        """
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        """
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING)
            with index as searcher:
                if source:
                    results = self.__search(searcher, "source", SOURCE_SCHEMA, query)
                    ret = ret.union(results)
                if context:
                    results = self.__search(searcher, "context", SOURCE_SCHEMA, query)
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(sample.translation.language.code, not appsettings.OFFLOAD_INDEXING)
            with index as searcher:
                results = self.__search(searcher, "target", TARGET_SCHEMA, query)
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
示例#3
0
文件: unit.py 项目: madhuracj/weblate
    def fulltext(self, query, source=True, context=True, translation=True,
                 checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher(
                not appsettings.OFFLOAD_INDEXING
            )
            with index as searcher:
                if source:
                    results = self.__search(
                        searcher,
                        'source',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)
                if context:
                    results = self.__search(
                        searcher,
                        'context',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code,
                not appsettings.OFFLOAD_INDEXING
            )
            with index as searcher:
                results = self.__search(
                    searcher,
                    'target',
                    TARGET_SCHEMA,
                    query
                )
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
示例#4
0
文件: unit.py 项目: tbikeev/weblate
    def fulltext(self, query, source=True, context=True, translation=True,
                 checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher()
            with index as searcher:
                if source:
                    results = self.__search(
                        searcher,
                        'source',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)
                if context:
                    results = self.__search(
                        searcher,
                        'context',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code,
            )
            with index as searcher:
                results = self.__search(
                    searcher,
                    'target',
                    TARGET_SCHEMA,
                    query
                )
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
示例#5
0
文件: unit.py 项目: tbikeev/weblate
    def same_source(self, unit):
        '''
        Finds units with same source.
        '''
        index = FULLTEXT_INDEX.source_searcher()
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            for result in results:
                checksums.add(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
示例#6
0
    def similar(self, unit):
        """
        Finds similar units to current unit.
        """
        ret = set([unit.checksum])
        index = FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING)
        with index as searcher:
            # Extract up to 10 terms from the source
            key_terms = searcher.key_terms_from_text("source", unit.source, numterms=10)
            terms = [kw[0] for kw in key_terms if not kw in IGNORE_SIMILAR]
            cnt = len(terms)
            # Try to find at least configured number of similar strings,
            # remove up to 4 words
            while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    results = self.search(" ".join(search), True, False, False, True)
                    ret = ret.union(results)
                cnt -= 1

        project = unit.translation.subproject.project
        return self.filter(
            translation__subproject__project=project, translation__language=unit.translation.language, checksum__in=ret
        ).exclude(target__in=["", unit.target])
示例#7
0
文件: unit.py 项目: madhuracj/weblate
    def more_like_this(self, unit):
        '''
        Finds closely similar units.
        '''
        index = FULLTEXT_INDEX.source_searcher(
            not appsettings.OFFLOAD_INDEXING
        )
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            if len(results) == 0:
                return self.none()
            first_hit = results[0]
            # Find similar results to first one
            more_results = first_hit.more_like_this(
                'source',
                source_string,
                500
            )
            # Include all more like this results
            for result in more_results:
                checksums.add(result['checksum'])
            # Remove all original matches
            for result in results:
                checksums.discard(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
示例#8
0
文件: unit.py 项目: madhuracj/weblate
    def same_source(self, unit):
        '''
        Finds units with same source.
        '''
        index = FULLTEXT_INDEX.source_searcher(
            not appsettings.OFFLOAD_INDEXING
        )
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            for result in results:
                checksums.add(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )