def lxml_soup(self, string): 'Safe processing of any tag soup (which is a norm on the internets).' from lxml.html import fromstring as lxml_fromstring from lxml.etree import ( XMLSyntaxError as lxml_SyntaxError, ParserError as lxml_ParserError ) try: doc = lxml_fromstring(force_unicode(string)) except (lxml_SyntaxError, lxml_ParserError): # last resort for "tag soup" from lxml.html.soupparser import fromstring as soup doc = soup(force_unicode(string)) return doc
def dispatch(self, msg): match = self.regex.search(msg) if not match: log.debug('Failed to match snort rule-sid in msg: {!r}'.format(msg)) return msg sid = match.group('sid') if self.gid_ignore: try: gid = match.group('gid') except IndexError: pass else: if gid in self.gid_ignore: return msg ts = time() if self.sid_db_ts < ts - self.conf.sid_db_mtime_check_interval: if not os.path.exists(self.conf.paths.sid_db)\ or max(0, *( os.stat(p).st_mtime for p in [self.conf.paths.sid_src, self.conf.paths.refs] if os.path.exists(p) )) > os.stat(self.conf.paths.sid_db).st_mtime: self.update_sid_db() self.sid_db = anydbm.open(self.conf.paths.sid_db) try: ref = force_unicode(self.sid_db[force_bytes(sid)]) except KeyError: log.info('Failed to find refs for sid: {!r} (msg: {!r})'.format(sid, msg)) else: msg += u'\n refs: {}'.format(ref) return msg
def shorten(self, url): url = force_bytes(url) if len(url) >= self.conf.length_min: try: func = getattr(self, 'shorten_{}'.format(self.conf.api.type)) except AttributeError: raise ValueError('URL shortener "{}" is not supported') url = yield defer.maybeDeferred(func, url, self.conf.api.parameters) defer.returnValue(force_unicode(re.sub(r'^(?i)(https?|spdy)://', '', url)))
def __init__(self, *argz, **kwz): super(AtomOStatusLink, self).__init__(*argz, **kwz) # Pre-process warning templates if self.conf.warn and self.conf.warn.has_keys: if isinstance(self.conf.warn.has_keys, types.StringTypes): self.conf.warn.has_keys = [self.conf.warn.has_keys] warn_list = list() for tpl in self.conf.warn.has_keys: if not (tpl.startswith('{') and tpl.endswith('}')): tpl = '{{{}}}'.format(tpl) warn_list.append(tpl) self.conf.warn.has_keys = warn_list self.conf.warn.template = force_unicode(self.conf.warn.template) else: self.conf.warn = None
def dispatch(self, msg): # Generate message id convo_id = 'none?' for link in msg.data.post.links: if link.rel == 'ostatus:conversation': convo_id = hashlib.sha1(link.href)\ .digest().encode('base64').replace('/', '-')[:self.conf.id_length] break # Pick template atype, tpl = 'other', self.conf.template.other msg_obj_type = msg.data.post['activity_object-type'] if self.conf.skip.poll_response\ and re.search(r'/poll-response$', msg_obj_type): return for k, obj_type in [('note', r'/note$'), ('comment', r'/comment$')]: if not re.search(obj_type, msg_obj_type): continue atype, tpl = k, self.conf.template[k] break # Check for RTs if self.conf.skip.rts: try: msg_base = msg.data.post.content[0].value except self._lookup_error: pass else: if atype == 'other' and msg_base.startswith('RT @'): return # Format res = [force_unicode(tpl).format(msg=msg, id=convo_id)] # Add warnings, if necessary if self.conf.warn: msg_data = msg.data._asdict() for tpl in self.conf.warn.has_keys: try: val = tpl.format(**msg_data) except self._lookup_error: continue val = dict(id=convo_id, key=tpl.strip('{}'), value=val) try: val = self.conf.warn.template.format(**val) except self._lookup_error as err: raise ValueError( 'Failed to format template' ' {!r} (data: {}): {}'.format(self.conf.warn.template, val, err) ) res.append(val) return res
def handle_line(self, line, path): log.noise('New line: {!r}'.format(line)) event = RelayedEvent(force_unicode(line)) event.data = AttrDict(path=path.path) reactor.callLater(0, self.interface.dispatch, event, source=self)
def dispatch(self, msg): if not msg.strip(): return ## Event lines are cached until EOE msg is encountered match = self._re_base.search(msg) if not match: log.warn('Failed to match audit event spec: {!r}'.format(msg)) return node, ev_id, ev_type, msg = (match.group(k) for k in ['node', 'ev_id', 'type', 'msg']) ev_key = node, ev_id if ev_key not in self._ev_cache: self._ev_cache[ev_key] = defaultdict(list) self._ev_cache[ev_key].update(ts=time.time(), node=node, ev_id=ev_id) self._ev_cache_gc() ev = self._ev_cache[ev_key] if ev_type != 'EOE': # cache event data ev[ev_type].append(msg) return del self._ev_cache[ev_key] ## Get "key" value for event, if present ev_key = None try: syscall, = ev['SYSCALL'] # currently handled events always have it except ValueError: pass else: try: ev_key = self.get_msg_val(syscall, 'key', ur'"(?P<val>[^"]+)"') except KeyError as err: log.noise('Failed to get ev_key from syscall: {}'.format(err)) if not ev_key: log.noise('Unhandled event: {!r}'.format(ev)) return ## Processing if ev_key in self.conf.events.watches.ev_keys: # Extract all necessary attributes ev_vals = dict(node=ev['node'], ev_id=ev['ev_id'], key=ev_key) for k in it.imap(''.join, it.product(['', 'e', 's', 'fs'], ['uid', 'gid'])): ev_vals[k] = self.get_msg_val(syscall, k) for k in 'comm', 'exe': ev_vals[k] = self.get_msg_val(syscall, k, ur'"(?P<val>[^"]+)"') ev_vals['tty'] = self.get_msg_val(syscall, 'tty', '(?P<val>\S+)') paths = ev_vals['paths'] = list() for msg in ev['PATH']: path = self.get_msg_val(msg, 'name', ur'(?P<val>"[^"]+"|\(null\)|[0-9A-F]+)') paths.append(dict( path=path, inode=self.get_msg_val(msg, 'inode', fallback='nil'), dev=self.get_msg_val(msg, 'dev', '(?P<val>[a-f\d]{2}:[a-f\d]{2})', fallback='nil') )) # Formatting err, tpl = None, force_unicode(self.conf.events.watches.template_path) ev_vals['paths'] = list() for val in paths: try: ev_vals['paths'].append(tpl.format(**val)) except self._lookup_error as err: break if not err: ev_vals['paths'] = ', '.join(ev_vals['paths']) tpl, val = force_unicode(self.conf.events.watches.template), ev_vals try: event = tpl.format(**val) except self._lookup_error as err: pass event = RelayedEvent(event) event.data = ev_vals return event raise ValueError( 'Failed to format template {!r} (data: {}): {}'.format(tpl, val, err))