Пример #1
0
 def __init__(self, ruleset, props, data, flags, replace):
     Rule.__init__(
         self, ruleset, data.get('name', ''), data.get('comments', []), {
             'prefix': data.get('prefix', props['prefix']),
             'suffix': data.get('suffix', props['suffix']),
         })
     for prop, value in self.props.items():
         if '\b' in value:
             LOG.warn(r'\b found in {0}. \
                 To match word boundaries use \\b instead.'.format(prop))
     self.data = data
     self.flags = process_flags(data.get('flags', ''), flags)
     self.replace = data.get('replace', replace)
     self.re = data.get('re', [])
     if isinstance(self.re, str):
         patterns = [self.re]
     elif hasattr(self.re, '__getitem__'):
         patterns = self.re
     for patern in patterns:
         if '\b' in patern:
             LOG.warn(r'\b found in pattern {0}. \
                 To match word boundaries use \\b instead.'.format(
                 patern.replace('\b', r'\b')))
     self.patterns = []
     for patern in patterns:
         original_pattern = patern
         for search_string, replacement in self.replace.items():
             patern = re.sub(search_string, replacement, patern)
         compiled = re.compile(
             self.props['prefix'] + patern + self.props['suffix'],
             self.flags)
         self.patterns.append({
             'compiled': compiled,
             'original': original_pattern
         })
Пример #2
0
def get_rulesets(masks=None):
    '''
    Load rulesets from masks list.
    `masks` is a string containing masks separated by semicolon.
    Shell wildcards are allowed.
    Load from `rules` directory by default.
    '''
    from glob import glob
    jsoncomment = re.compile('^\s*//')
    rules_folder = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), 'rules')
    rulesets = []
    if not masks:
        masks = os.path.join(rules_folder, '*')
    for mask in masks.split(';'):
        empty_mask = True
        for rule_file_or_dir in glob(mask):
            empty_mask = False
            if os.path.isdir(rule_file_or_dir):
                rule_files = glob(os.path.join(rule_file_or_dir, '*'))
            else:
                rule_files = (rule_file_or_dir,)
            for rule_file in rule_files:
                try:
                    # remove comments but preserve the same number of lines
                    jsonrule = ''.join(
                        '\n' if jsoncomment.search(line)
                        else line
                        for line
                        in codecs.open(rule_file, encoding='utf-8').readlines()
                    )
                    ruleset_dict = json.loads(jsonrule)
                except ValueError as exc:
                    LOG.error(exc)
                    LOG.error("In file: " + rule_file)
                    continue
                ruleset = RegularExpressionRuleset(ruleset_dict,
                    os.path.relpath(rule_file, rules_folder))
                rulesets.append(ruleset)
        if empty_mask:
            LOG.warn('No files matching "{0}" found'.format(mask))
    return rulesets
def process_flags(flags, default=0):
    '''
    Process a string of regular expression flags like "+LUX-M".
    Return a combination of corresponding re.X values.
    '''
    mode = '+'
    result = default
    for flag in flags:
        if flag in '+-':
            mode = flag
            continue
        elif flag in 'SLUMIX':
            if mode == '-':
                result &= ~getattr(re, flag)
            else:
                result |= getattr(re, flag)
        else:
            LOG.warn('unknown flag: ' + flag)
            continue
    return result
Пример #4
0
def process_flags(flags, default=0):
    '''
    Process a string of regular expression flags like "+LUX-M".
    Return a combination of corresponding re.X values.
    '''
    mode = '+'
    result = default
    for flag in flags:
        if flag in '+-':
            mode = flag
            continue
        elif flag in 'SLUMIX':
            if mode == '-':
                result &= ~getattr(re, flag)
            else:
                result |= getattr(re, flag)
        else:
            LOG.warn('unknown flag: ' + flag)
            continue
    return result
Пример #5
0
def get_rulesets(masks=None):
    '''
    Load rulesets from masks list.
    `masks` is a string containing masks separated by semicolon.
    Shell wildcards are allowed.
    Load from `rules` directory by default.
    '''
    from glob import glob
    jsoncomment = re.compile('^\s*//')
    rules_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                'rules')
    rulesets = []
    if not masks:
        masks = os.path.join(rules_folder, '*')
    for mask in masks.split(';'):
        empty_mask = True
        for rule_file_or_dir in glob(mask):
            empty_mask = False
            if os.path.isdir(rule_file_or_dir):
                rule_files = glob(os.path.join(rule_file_or_dir, '*'))
            else:
                rule_files = (rule_file_or_dir, )
            for rule_file in rule_files:
                try:
                    # remove comments but preserve the same number of lines
                    jsonrule = ''.join(
                        '\n' if jsoncomment.search(line) else line
                        for line in codecs.open(rule_file,
                                                encoding='utf-8').readlines())
                    ruleset_dict = json.loads(jsonrule)
                except ValueError as exc:
                    LOG.error(exc)
                    LOG.error("In file: " + rule_file)
                    continue
                ruleset = RegularExpressionRuleset(
                    ruleset_dict, os.path.relpath(rule_file, rules_folder))
                rulesets.append(ruleset)
        if empty_mask:
            LOG.warn('No files matching "{0}" found'.format(mask))
    return rulesets
 def __init__(self, ruleset, props, data, flags, replace):
     Rule.__init__(self, ruleset,
         data.get('name', ''),
         data.get('comments', []),
         {
             'prefix': data.get('prefix', props['prefix']),
             'suffix': data.get('suffix', props['suffix']),
         }
     )
     for prop, value in self.props.iteritems():
         if '\b' in value:
             LOG.warn(r'\b found in {0}. \
                 To match word boundaries use \\b instead.'.format(prop))
     self.data = data
     self.flags = process_flags(data.get('flags', ''), flags)
     self.replace = data.get('replace', replace)
     self.re = data.get('re', [])
     if isinstance(self.re, basestring):
         patterns = [self.re]
     elif hasattr(self.re, '__getitem__'):
         patterns = self.re
     for patern in patterns:
         if '\b' in patern:
             LOG.warn(r'\b found in pattern {0}. \
                 To match word boundaries use \\b instead.'
                 .format(patern.replace('\b', r'\b')))
     self.patterns = []
     for patern in patterns:
         original_pattern = patern
         for search_string, replacement in self.replace.iteritems():
             patern = re.sub(search_string, replacement, patern)
         compiled = re.compile(
             self.props['prefix'] + patern + self.props['suffix'],
             self.flags
         )
         self.patterns.append({'compiled': compiled,
                               'original': original_pattern})