Пример #1
0
    def parse(self, data):
        """
        Determines if the data is an example of one of our trained languages

        :param data: the string we want to parse
        :type data: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        dataset = self.create_dataset(data)

        # Step 1: Is this possibly code?
        if not self.find_common_tokens(dataset):
            return

        # Step 2: Which languages match, based on keywords alone?
        matched_languages = self.get_possible_languages(dataset)

        # Step 3: Which languages match, based on a smarter lexer?
        lexer = ProgrammingLexer(matched_languages, data.lower())
        lex_languages = lexer.lex()

        if not lex_languages:
            return

        # Step 4: Using a Naive Bayes Classifier
        # to pinpoint the best language fits
        classifier = ProgrammingBayesianClassifier()
        bayes_languages = classifier.classify(data)

        scores = self.calculate_confidence(lex_languages, bayes_languages)

        for lang_id, scorecard in scores.items():
            yield self.result(self.language_keywords[lang_id]["name"], scorecard["confidence"], scorecard)
Пример #2
0
    def parse(self, data):
        """
        Determines if the data is an example of one of our trained languages
        """

        dataset = self.create_dataset(data)

        # Step 1: Is this possibly code?
        if not self.find_common_tokens(dataset):
            return

        # Step 2: Which languages match, based on keywords alone?
        matched_languages = self.get_possible_languages(dataset)

        # Step 3: Which languages match, based on a smarter lexer?
        lexer = ProgrammingLexer(matched_languages, data.lower())
        lex_languages = lexer.lex()

        if not lex_languages:
            return

        # Step 4: Using a Naive Bayes Classifier
        # to pinpoint the best language fits
        classifier = ProgrammingBayesianClassifier()
        bayes_languages = classifier.classify(data)

        scores = self.normalize_scores(data, lex_languages, bayes_languages)

        for lang_id, confidence in scores.items():
            yield ParseResult(self.type,
                              self.language_keywords[lang_id]['name'],
                              confidence)
Пример #3
0
    def parse(self, data):
        """
        Determines if the data is an example of one of our trained languages

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        dataset = self.create_dataset(data)

        # Step 1: Is this possibly code?
        if not self.find_common_tokens(dataset):
            return

        # Step 2: Which languages match, based on keywords alone?
        matched_languages = self.get_possible_languages(dataset)

        # Step 3: Which languages match, based on a smarter lexer?
        lexer = ProgrammingLexer(matched_languages, data.lower())
        lex_languages = lexer.lex()

        if not lex_languages:
            return

        # Step 4: Using a Naive Bayes Classifier
        # to pinpoint the best language fits
        classifier = ProgrammingBayesianClassifier()
        bayes_languages = classifier.classify(data)

        scores = self.calculate_confidence(lex_languages, bayes_languages)

        for lang_id, scorecard in scores.items():
            yield self.result(self.language_keywords[lang_id]['name'],
                              scorecard['confidence'], scorecard)
Пример #4
0
    def test_classifierProducesExpectedResult(self):

        ProgrammingBayesianClassifier.bootstrap(TestConfig)

        classifier = ProgrammingBayesianClassifier()
        result = classifier.classify('echo "Hello World";')

        self.assertEqual('echo "Hello World";', SimpleBayesStub.data_string)
        self.assertEqual('FooBar', result)
Пример #5
0
    def test_classifierProducesExpectedResult(self):

        ProgrammingBayesianClassifier.bootstrap(TestConfig)

        classifier = ProgrammingBayesianClassifier()
        result = classifier.classify('echo "Hello World";')

        self.assertEqual('echo "Hello World";', SimpleBayesStub.data_string)
        self.assertEqual('FooBar', result)