Пример #1
0
    def test_too_long_string(self):
        testString = 'Lorem ipsum dolor sit amet, consectetur adipiscing' \
                     ' elit. Suspendisse non risus risus amet.'
        truncatedTestString = 'Lorem ipsum dolor sit amet, consectetur' \
                              ' adipiscing elit. Suspendisse non risu...'

        self.assertEquals(truncate_text(testString), truncatedTestString)
Пример #2
0
    def parse(self, data_string):
        """Parses input data and returns a dict of result data"""

        start_time = time.time()
        results = []
        threads = []

        # Creating/starting a thread for each parser module
        for module in self.config.enabled_modules:
            thread = ParserThread(self.config, module, data_string)
            thread.start()
            threads.append(thread)

        # Synchronizing/finishing parser threads
        for thr in threads:
            thr.join()

        # The threads are done, let's get the results out of them
        for thr in threads:
            results.extend(thr.results)

        # Unique list of all major types
        types = list(set([result.type for result in results]))

        if results:
            # Getting a unique list of result types.
            all_types = []
            for res in results:
                all_types.extend([res.type, res.subtype])

            # Hierarchical Confidence Normalization
            normalizer_chain = HierarchicalNormalizerChain(
                self.config,
                types,
                list(set(all_types))
            )
            results = normalizer_chain.normalize(results)

            # Sorting our results by confidence value
            results = sorted(
                results,
                key=lambda result: result.confidence,
                reverse=True
            )

        return {
            'query': truncate_text(data_string),
            'date': datetime.datetime.now(),
            'execution_seconds': time.time() - start_time,
            'top': results[0] if len(results) > 0 else None,
            'results': {
                'count': len(results),
                'types': types,
                'matches': results
            }
        }
Пример #3
0
    def parse(self, data_string):
        """
        Parses input data and returns a dict of result data

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result data if there is any
        :rtype: dict
        """
        start_time = time.time()
        results = []
        threads = []

        # Creating/starting a thread for each parser module
        for module in self.config.enabled_modules:
            thread = ParserThread(self.config, module, data_string)
            thread.start()
            threads.append(thread)

        # Synchronizing/finishing parser threads
        for thr in threads:
            thr.join()

        # The threads are done, let's get the results out of them
        for thr in threads:
            results.extend(thr.results)

        # Unique list of all major types
        types = list(set([result.type for result in results]))

        if results:
            # Getting a unique list of result types.
            all_types = []
            for res in results:
                all_types.extend([res.type, res.subtype])

            # Hierarchical Confidence Normalization
            normalizer_chain = HierarchicalNormalizerChain(
                self.config,
                types,
                list(set(all_types))
            )
            results = normalizer_chain.normalize(results)

            # Sorting our results by confidence value
            results = sorted(
                results,
                key=lambda result: result.confidence,
                reverse=True
            )

        return {
            'query': truncate_text(data_string),
            'date': datetime.datetime.now(),
            'execution_seconds': time.time() - start_time,
            'top': results[0] if len(results) > 0 else None,
            'results': {
                'count': len(results),
                'types': types,
                'matches': results
            }
        }
Пример #4
0
 def test_short_limit(self):
     self.assertEquals(truncate_text(self.testString, 10), 'The qui...')
Пример #5
0
 def test_too_short_string(self):
     self.assertEquals(truncate_text(self.testString), self.testString)