Python HierarchicalNormalizerChain示例

编程语言: Python

命名空间/包名称: cahoots.confidence.normalizer

hotexamples.com的示例: 4

Python HierarchicalNormalizerChain - 已找到4个示例。这些是从开源项目中提取的最受好评的cahoots.confidence.normalizer.HierarchicalNormalizerChain现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

HierarchicalNormalizerChain(2)

normalize(2)

示例#1

显示文件

文件： parser.py 项目： msabramo/cahoots

    def parse(self, data_string):
        """Parses input data and returns a dict of result data"""

        start_time = time.time()
        results = []
        threads = []

        # Creating/starting a thread for each parser module
        for module in self.config.enabled_modules:
            thread = ParserThread(self.config, module, data_string)
            thread.start()
            threads.append(thread)

        # Synchronizing/finishing parser threads
        for thr in threads:
            thr.join()

        # The threads are done, let's get the results out of them
        for thr in threads:
            results.extend(thr.results)

        # Unique list of all major types
        types = list(set([result.type for result in results]))

        if results:
            # Getting a unique list of result types.
            all_types = []
            for res in results:
                all_types.extend([res.type, res.subtype])

            # Hierarchical Confidence Normalization
            normalizer_chain = HierarchicalNormalizerChain(
                self.config,
                types,
                list(set(all_types))
            )
            results = normalizer_chain.normalize(results)

            # Sorting our results by confidence value
            results = sorted(
                results,
                key=lambda result: result.confidence,
                reverse=True
            )

        return {
            'query': truncate_text(data_string),
            'date': datetime.datetime.now(),
            'execution_seconds': time.time() - start_time,
            'top': results[0] if len(results) > 0 else None,
            'results': {
                'count': len(results),
                'types': types,
                'matches': results
            }
        }

示例#2

显示文件

文件： normalizer.py 项目： SerenitySoftwareLLC/cahoots

    def test_normalizer_normalizes(self):
        res = [
            ParseResult('Test', 'Test', 100),
            ParseResult('Test', 'Test', 0)
        ]

        conf = TestConfig()
        conf.enabled_confidence_normalizers.append(NormalizerStub)
        hnc = HierarchicalNormalizerChain(conf, [], [])
        results = hnc.normalize(res)

        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0], ParseResult)

示例#3

显示文件

文件： normalizer.py 项目： pombredanne/cahoots

    def test_normalizer_normalizes(self):
        res = [
            ParseResult('Test', 'Test', 100),
            ParseResult('Test', 'Test', 0)
        ]

        conf = TestConfig()
        conf.enabled_confidence_normalizers.append(NormalizerStub)
        hnc = HierarchicalNormalizerChain(conf, [], [])
        results = hnc.normalize(res)

        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0], ParseResult)

示例#4

显示文件

    def parse(self, data_string):
        """
        Parses input data and returns a dict of result data

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result data if there is any
        :rtype: dict
        """
        start_time = time.time()
        results = []
        threads = []

        # Creating/starting a thread for each parser module
        for module in self.config.enabled_modules:
            thread = ParserThread(self.config, module, data_string)
            thread.start()
            threads.append(thread)

        # Synchronizing/finishing parser threads
        for thr in threads:
            thr.join()

        # The threads are done, let's get the results out of them
        for thr in threads:
            results.extend(thr.results)

        # Unique list of all major types
        types = list(set([result.type for result in results]))

        if results:
            # Getting a unique list of result types.
            all_types = []
            for res in results:
                all_types.extend([res.type, res.subtype])

            # Hierarchical Confidence Normalization
            normalizer_chain = HierarchicalNormalizerChain(
                self.config,
                types,
                list(set(all_types))
            )
            results = normalizer_chain.normalize(results)

            # Sorting our results by confidence value
            results = sorted(
                results,
                key=lambda result: result.confidence,
                reverse=True
            )

        return {
            'query': truncate_text(data_string),
            'date': datetime.datetime.now(),
            'execution_seconds': time.time() - start_time,
            'top': results[0] if len(results) > 0 else None,
            'results': {
                'count': len(results),
                'types': types,
                'matches': results
            }
        }