Exemplos de multi_matcher em Python, exemplos de repo_scraper.matchers.multi_matcher em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: DiffChecker.py Projeto: digideskio/repo-scraper

    def check(self):
        #Build the identifier using the filename and commit hashes
        identifier = '%s (%s)' % (self.filename, self.commit_hashes[1])

        #The comments is a list to keep track of useful information
        #encountered when checking, right now, its only being used
        #to annotate when base64 code was removed
        comments = []

        #Check the number of additions, if there are too many
        #send a warning and skip, this may be due to a big data file addition
        if self.error:
            return Result(self.filename, self.error)

        #Check if extension/mimetype is allowed
        if filetype.get_extension(self.filename) not in self.allowed_extensions:
            return Result(identifier, FILETYPE_NOT_ALLOWED)
        
        #Start applying rules...
        #First check if additions contain base64, if there is remove it
        has_base64, self.content = m.base64_matcher(self.content, remove=True)
        if has_base64:
            comments.append('BASE64_REMOVED')
        
        #Create matcher for amazonaws.com
        amazonaws_matcher = m.create_domain_matcher('amazonaws.com')
        #Apply matchers: password, ips and aws
        match, matches = m.multi_matcher(self.content, m.password_matcher, m.ip_matcher, amazonaws_matcher)

        if match:
            return Result(identifier, MATCH, matches=matches, comments=comments)
        else:
            return Result(identifier, NOT_MATCH, comments=comments)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: DiffChecker.py Projeto: yinny/repo-scraper

    def check(self):
        #Build the identifier using the filename and commit hashes
        identifier = '%s (%s)' % (self.filename, self.commit_hashes[1])

        #The comments is a list to keep track of useful information
        #encountered when checking, right now, its only being used
        #to annotate when base64 code was removed
        comments = []

        #Check the number of additions, if there are too many
        #send a warning and skip, this may be due to a big data file addition
        if self.error:
            return Result(self.filename, self.error)

        #Check if extension/mimetype is allowed
        if filetype.get_extension(
                self.filename) not in self.allowed_extensions:
            return Result(identifier, FILETYPE_NOT_ALLOWED)

        #Start applying rules...
        #First check if additions contain base64, if there is remove it
        has_base64, self.content = m.base64_matcher(self.content, remove=True)
        if has_base64:
            comments.append('BASE64_REMOVED')

        #Create matcher for amazonaws.com
        amazonaws_matcher = m.create_domain_matcher('amazonaws.com')
        #Apply matchers: password, ips and aws
        match, matches = m.multi_matcher(self.content, m.password_matcher,
                                         m.ip_matcher, amazonaws_matcher)

        if match:
            return Result(identifier,
                          MATCH,
                          matches=matches,
                          comments=comments)
        else:
            return Result(identifier, NOT_MATCH, comments=comments)

Exemplo n.º 3

0

Exibir arquivo

    def check(self):
        #The comments is a list to keep track of useful information
        #encountered when checking, right now, its only being used
        #to annotate when base64 code was removed
        comments = []

        #Check file size if it's more than max_file_size_bytes (default is 1MB)
        #send just a warning and do not open the file,
        #since pattern matching is going to be really slow
        f_size = os.stat(self.path).st_size
        if f_size > self.max_file_size_bytes:
            return Result(self.path, BIG_FILE)

        #Check if extension is allowed
        if filetype.get_extension(self.path) not in self.allowed_extensions:
            return Result(self.path, FILETYPE_NOT_ALLOWED)

        #At this point you only have files with allowed extensions and
        #smaller than max_file_size_bytes
        #open the file and then apply all rules
        with open(self.path, 'r') as f:
            content = f.read()

        #Last check: search for potential base64 strings and remove them, send a warning
        has_base64, content = m.base64_matcher(content, remove=True)
        if has_base64:
            comments.append('BASE64_REMOVED')

        #Create matcher for amazonaws.com
        amazonaws_matcher = m.create_domain_matcher('amazonaws.com')
        #Apply matchers: password, ips and aws
        match, matches = m.multi_matcher(content, m.password_matcher,
                                         m.ip_matcher, amazonaws_matcher)

        if match:
            return Result(self.path, MATCH, matches=matches, comments=comments)
        else:
            return Result(self.path, NOT_MATCH, comments=comments)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: FileChecker.py Projeto: digideskio/repo-scraper

    def check(self):
        #The comments is a list to keep track of useful information
        #encountered when checking, right now, its only being used
        #to annotate when base64 code was removed
        comments = []

        #Check file size if it's more than max_file_size_bytes (default is 1MB)
        #send just a warning and do not open the file,
        #since pattern matching is going to be really slow
        f_size = os.stat(self.path).st_size
        if f_size > self.max_file_size_bytes:
            return Result(self.path, BIG_FILE)
  
        #Check if extension is allowed
        if filetype.get_extension(self.path) not in self.allowed_extensions:
            return Result(self.path, FILETYPE_NOT_ALLOWED)

        #At this point you only have files with allowed extensions and
        #smaller than max_file_size_bytes
        #open the file and then apply all rules
        with open(self.path, 'r') as f:
            content = f.read()

        #Last check: search for potential base64 strings and remove them, send a warning
        has_base64, content = m.base64_matcher(content, remove=True)
        if has_base64:
            comments.append('BASE64_REMOVED')

        #Create matcher for amazonaws.com
        amazonaws_matcher = m.create_domain_matcher('amazonaws.com')
        #Apply matchers: password, ips and aws
        match, matches = m.multi_matcher(content, m.password_matcher, m.ip_matcher, amazonaws_matcher)

        if match:
            return Result(self.path, MATCH, matches=matches, comments=comments)
        else:
            return Result(self.path, NOT_MATCH, comments=comments)