示例#1
0
    def analyze_comment(self, comment, stats=True):
        # Primary analisys
        if stats:
            self.stat.analyzed += 1

        if not helper.is_correctly_formatted(comment):
            return ProcessingResult.BAD_FORMAT

        if not helper.should_be_parsed(comment.body):
            return ProcessingResult.NOT_NEEDED

        if not helper.can_be_parsed(comment.body):
            return ProcessingResult.NOT_PARSABLE

        # Parse comment
        parsed = self.parser.parse(comment.body_html)
        if stats:
            self.stat.parsed += 1

        # Check complexity
        if cfg.CHECK_COMPLEXITY:
            complexity = helper.estimate_complexity(parsed.parsable)
            if complexity > cfg.MAX_COMPLEXITY:
                if stats:
                    self.stat.too_complex += 1
                return ProcessingResult.TOO_COMPLEX

        # Check unmatched parenthesis
        if not self.parser.detect_unbalance(parsed.parsable):
            return ProcessingResult.CORRECTLY_CLOSED

        # Check subreddit ban
        if str(comment.subreddit) in cfg.BANNED_FROM:
            return ProcessingResult.BANNED_FROM_SUBREDDIT

        # Check author
        if helper.comment_is_by_me(comment):
            return ProcessingResult.OWN_COMMENT

        if comment.author.name in cfg.IGNORE_USERS:
            return ProcessingResult.AUTHOR_BLOCKED

        if comment.author.name.lower().endswith("bot"):
            return ProcessingResult.AUTHOR_BLOCKED

        # Found unmatched
        return ProcessingResult.UNBALANCED
    def analyze_comment(self, comment, stats=True):
        # Primary analisys
        if stats:
            self.stat.analyzed += 1
        
        if not helper.is_correctly_formatted(comment):
            return ProcessingResult.BAD_FORMAT
        
        if not helper.should_be_parsed(comment.body):
            return ProcessingResult.NOT_NEEDED
        
        if not helper.can_be_parsed(comment.body):
            return ProcessingResult.NOT_PARSABLE
        
        # Parse comment
        parsed = self.parser.parse(comment.body_html)   
        if stats:
            self.stat.parsed += 1     
        
        # Check complexity
        if cfg.CHECK_COMPLEXITY:
            complexity = helper.estimate_complexity(parsed.parsable)
            if complexity > cfg.MAX_COMPLEXITY:
                if stats:
                    self.stat.too_complex += 1
                return ProcessingResult.TOO_COMPLEX
        
        # Check unmatched parenthesis
        if not self.parser.detect_unbalance(parsed.parsable):
            return ProcessingResult.CORRECTLY_CLOSED

        # Check subreddit ban
        if str(comment.subreddit) in cfg.BANNED_FROM:
            return ProcessingResult.BANNED_FROM_SUBREDDIT

        # Check author
        if helper.comment_is_by_me(comment):
            return ProcessingResult.OWN_COMMENT
        
        if comment.author.name in cfg.IGNORE_USERS:
            return ProcessingResult.AUTHOR_BLOCKED
        
        if comment.author.name.lower().endswith("bot"):
            return ProcessingResult.AUTHOR_BLOCKED

        # Found unmatched
        return ProcessingResult.UNBALANCED
示例#3
0
 def testParseabilityDetector(self):
     for s in test.resources.TEST_STRINGS_CAN_NOT_BE_PARSED:
         self.assertFalse(helper.can_be_parsed(s))
     for s in test.resources.TEST_STRINGS_CAN_BE_PARSED:
         self.assertTrue(helper.can_be_parsed(s))
 def testParseabilityDetector(self):
     for s in test.resources.TEST_STRINGS_CAN_NOT_BE_PARSED:
         self.assertFalse(helper.can_be_parsed(s))
     for s in test.resources.TEST_STRINGS_CAN_BE_PARSED:
         self.assertTrue(helper.can_be_parsed(s))