def _poll_files(self): in_progress = os.path.join(self.work_dir, "in-progress") done = os.path.join(self.work_dir, "done") makedirs(in_progress) makedirs(done) for dirname, filename in iter_dir(in_progress): input_name = os.path.join(dirname, filename) output_name = os.path.join(done, filename) yield idiokit.send(input_name, output_name) while True: paths = itertools.chain( iter_dir(os.path.join(self.input_dir, "new")), iter_dir(os.path.join(self.input_dir, "cur"))) for dirname, filename in paths: uuid_name = uuid.uuid4().hex + "." + filename input_name = os.path.join(in_progress, uuid_name) output_name = os.path.join(done, uuid_name) if try_rename(os.path.join(dirname, filename), input_name): yield idiokit.send(input_name, output_name) yield idiokit.sleep(self.poll_interval)
def follow_config(path, poll_interval=1.0, force_interval=30.0): last_reload = -float("inf") last_mtime = None last_error_msg = None abspath = os.path.abspath(path) while True: try: now = time.time() if now < last_reload: last_reload = now mtime = os.path.getmtime(abspath) if now > last_reload + force_interval or last_mtime != mtime: configs = load_configs(abspath) yield idiokit.send(True, tuple(flatten(configs))) last_error_msg = None last_mtime = mtime last_reload = now except Exception as exc: error_msg = "Could not load module {0!r}: {1!r}".format(abspath, exc) if error_msg != last_error_msg: yield idiokit.send(False, error_msg) last_error_msg = error_msg last_mtime = None yield idiokit.sleep(poll_interval)
def _poll_files(self): in_progress = os.path.join(self.work_dir, "in-progress") done = os.path.join(self.work_dir, "done") makedirs(in_progress) makedirs(done) for dirname, filename in iter_dir(in_progress): input_name = os.path.join(dirname, filename) output_name = os.path.join(done, filename) yield idiokit.send(input_name, output_name) while True: paths = itertools.chain( iter_dir(os.path.join(self.input_dir, "new")), iter_dir(os.path.join(self.input_dir, "cur")) ) for dirname, filename in paths: uuid_name = uuid.uuid4().hex + "." + filename input_name = os.path.join(in_progress, uuid_name) output_name = os.path.join(done, uuid_name) if try_rename(os.path.join(dirname, filename), input_name): yield idiokit.send(input_name, output_name) yield idiokit.sleep(self.poll_interval)
def feed(self, query, feed_all): collab = wiki.GraphingWiki(self.collab_url, ssl_verify_cert=not self.collab_ignore_cert, ssl_ca_certs=self.collab_extra_ca_certs) yield idiokit.thread(collab.authenticate, self.collab_user, self.collab_password) yield idiokit.sleep(5) token = None current = dict() while True: try: result = yield idiokit.thread(collab.request, "IncGetMeta", query, token) except wiki.WikiFailure as fail: self.log.error("IncGetMeta failed: {0!r}".format(fail)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(current) current.clear() for page, keys in updates.iteritems(): event = current.setdefault(page, events.Event()) event.add("id:open", self.page_id(page)) event.add("gwikipagename", page) event.add( "collab url", self.collab_url + urllib.quote(page.encode("utf8"))) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in map(normalize, discarded): event.discard(key, value) for value in map(normalize, added): event.add(key, value) if not feed_all: yield idiokit.send(event) for page in removed: current.pop(page, None) event = events.Event() event.add("id:close", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + page) yield idiokit.send(event) if feed_all: for page in current: yield idiokit.send(current[page]) yield idiokit.sleep(self.poll_interval)
def feed(self, query, feed_all): collab = wiki.GraphingWiki( self.collab_url, ssl_verify_cert=not self.collab_ignore_cert, ssl_ca_certs=self.collab_extra_ca_certs) yield idiokit.thread(collab.authenticate, self.collab_user, self.collab_password) yield idiokit.sleep(5) token = None current = dict() while True: try: result = yield idiokit.thread(collab.request, "IncGetMeta", query, token) except wiki.WikiFailure as fail: self.log.error("IncGetMeta failed: {0!r}".format(fail)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(current) current.clear() for page, keys in updates.iteritems(): event = current.setdefault(page, events.Event()) event.add("id:open", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + urllib.quote(page.encode("utf8"))) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in map(normalize, discarded): event.discard(key, value) for value in map(normalize, added): event.add(key, value) if not feed_all: yield idiokit.send(event) for page in removed: current.pop(page, None) event = events.Event() event.add("id:close", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + page) yield idiokit.send(event) if feed_all: for page in current: yield idiokit.send(current[page]) yield idiokit.sleep(self.poll_interval)
def _geoip_lookup(self, event): self.ip_key = "ip" if event.contains(self.ip_key): ip = event.value(self.ip_key) geoip_result = self.geoip.geomap(ip) result = geoip_result.next() if result: for key, value in result.items(): event.add(key, str(value[0])) else: self.log.info("No geomap info for ip: %s" % ip) yield idiokit.send(event) yield idiokit.send(event)
class OpenBLBot(bot.PollingBot): feed_url = bot.Param(default=OPENBL_FEED_URL) use_cymru_whois = bot.BoolParam() def poll(self): pipe = self._poll(url=self.feed_url) if self.use_cymru_whois: pipe = pipe | cymruwhois.augment("ip") return pipe @idiokit.stream def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: raise bot.PollSkipped("failed to download {0!r} ({1})".format(url, fuf)) self.log.info("Downloaded") for line in fileobj: event = _parse_line(line) if event is None: continue event.add("feeder", "openbl.org") event.add("feed", "openbl") event.add("description url", self.feed_url) event.add("type", "brute-force") event.add( "description", "This host has most likely been performing brute-force " + "attacks on one of the following services: FTP, SSH, POP3, " + "IMAP, IMAPS or POP3S." ) yield idiokit.send(event)
def _poll(self, url): request = urllib2.Request(url) for key, value in self.http_headers: request.add_header(key, value) try: self.log.info('Downloading feed from: "%s"', url) _, fileobj = yield utils.fetch_url(request) except utils.FetchUrlFailed as e: self.log.error('Failed to download feed "%s": %r', url, e) idiokit.stop(False) self.log.info("Finished downloading the feed.") byte = fileobj.read(1) while byte and byte != "<": byte = fileobj.read(1) if byte == "<": fileobj.seek(-1, 1) try: for _, elem in etree.iterparse(fileobj): for event in self._parse(elem, url): if event: yield idiokit.send(event) except ParseError as e: self.log.error('Invalid format on feed: "%s", "%r"', url, e)
def normalize(self, name): while True: event = yield idiokit.next() # A dict telling how to rename raw event keys. # A key is not renamed by default. # Mapping a key to None removes the key. key_mappings = { "time": "source time", "id": "cleanmx id", "phishtank": "phishtank id", "line": None, "firsttime": "first seen", "lasttime": "last seen" } new = events.Event() for key, value in event.items(): key = key_mappings.get(key, key) if key is None: continue value = unescape(value).strip() if not value: continue new.add(key, value) if name: new.add("feed", name) yield idiokit.send(new)
def _follow_config(self): while True: ok, obj = yield idiokit.next() if not ok: self.log.error(obj) continue yield idiokit.send(set(obj))
def _handle(self): while True: prefix, command, params = yield idiokit.next() if self.filter(prefix, command, params): event = self.parse(prefix, command, params) if event is not None: yield idiokit.send(event)
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("failed to download {0} ({1})".format( self.feed_url, fuf)) self.log.info("Downloaded") for line in fileobj: url, netloc = parseURL(line) if url is None: continue event = events.Event() event.add("url", url) if i_am_a_name(netloc): event.add("domain name", netloc) else: event.add("ip", netloc) event.add("feeder", "siri urz") event.add("feed", "vxvault") event.add("feed url", self.feed_url) event.add("type", "malware url") event.add("description", "This host is most likely hosting a malware URL.") yield idiokit.send(event)
def alert(*times): if not times: yield idiokit.Event() return while True: yield idiokit.sleep(min(map(next_time, times))) yield idiokit.send()
def configs(self): follow = config.follow_config(self.config) while True: ok, obj = yield follow.next() if not ok: self.log.error(obj) continue yield idiokit.send(set(obj))
def decode(sock): while True: length_bytes = yield recvall(sock, 4) length, = struct.unpack("!I", length_bytes) msg_bytes = yield recvall(sock, length) msg = cPickle.loads(msg_bytes) yield idiokit.send(msg)
def _add_filename_info(groupdict): while True: event = yield idiokit.next() for key, value in groupdict.items(): if None in (key, value): continue event.add(key, value) yield idiokit.send(event)
def aggregate(self, group_keys, window_info): """ Create aggregated events and ids for them. """ group_keys = tuple(set(group_keys)) key_groups = dict() while True: yield timer.sleep(1.0) event = yield idiokit.next() current_time = time.time() updated = set() key = tuple(tuple(sorted(event.values(x))) for x in group_keys) updated.add(key) if key not in key_groups: windows = [] for constructor, keys, output_key in window_info: windows.append((constructor(**keys), output_key)) key_groups[key] = windows for window, output_key in key_groups[key]: window.push(current_time, event) for key, windows in list(key_groups.iteritems()): any_expired = False for window, _ in windows: any_expired = window.expire(current_time) or any_expired if not (any_expired or key in updated): continue output = None for window, output_key in windows: value = window.value() if value is None: continue if output is None: output = events.Event() output.add(output_key, unicode(value)) id = hashlib.md5(repr(key)).hexdigest() if output is not None: for group_key, group_values in zip(group_keys, key): output.update(group_key, group_values) yield idiokit.send(id, output) if output is None: del key_groups[key]
def _normalize(subject): while True: event = yield idiokit.next() if subject is not None: event.add("report_subject", subject) for key in event.keys(): event.discard(key, "") event.discard(key, "-") yield idiokit.send(event)
def _asn_lookup(self, event, resolver=None, cache_time=0): lookup = cymruwhois.OriginLookup(resolver, cache_time=cache_time) asn_dict = {} event = yield idiokit.next() if event.contains(self.ip_key): ip = event.values(self.ip_key) asns = yield lookup.lookup(ip[0]) for asn in asns: for key, value in asn: event.add(key, value) yield idiokit.send(event)
def _read_pipe(self): while True: yield idiokit.select.select((self._pipe, ), (), ()) try: line = self._pipe.readline() except IOError as err: yield idiokit.sleep(1) continue if not line: yield idiokit.sleep(0.5) continue yield idiokit.send("PRIVMSG", self._channel, "{0}".format(line))
def _asn_names_lookup(self, resolver=None, cache_time=4 * 60 * 60): lookup = cymruwhois.ASNameLookup(resolver, cache_time=cache_time) while True: asn_dict = {} event = yield idiokit.next() if event.contains("asn"): asn_value = event.value("asn") asns = yield lookup.lookup(asn_value) for asn in asns: for key, value in asn: event.add(key, value) yield idiokit.send(event)
def _read_stdin(self): while True: yield select.select([sys.stdin], [], []) line = sys.stdin.readline() if not line: break if not line.strip(): continue in_dict = json.loads(line) yield idiokit.send(events.Event(in_dict))
def _output_rate_limiter(self): while self.xmpp_rate_limit <= 0.0: yield idiokit.sleep(60.0) while True: delta = max(time.time() - self._last_output, 0) delay = 1.0 / self.xmpp_rate_limit - delta if delay > 0.0: yield idiokit.sleep(delay) self._last_output = time.time() msg = yield idiokit.next() yield idiokit.send(msg)
def csv_to_events(fileobj, delimiter=",", columns=None, charset=None): for row in _CSVReader(fileobj, charset=charset, delimiter=delimiter): if columns is None: columns = row continue event = events.Event() for key, value in zip(columns, row): if key is None or not value: continue event.add(key, value) yield idiokit.send(event)
def _rate_limiter(rate_limit): last_output = time.time() while True: if rate_limit is not None: delta = max(time.time() - last_output, 0) delay = 1.0 / rate_limit - delta if delay > 0.0: yield idiokit.sleep(delay) last_output = time.time() msg = yield idiokit.next() yield idiokit.send(msg)
def augment(self, ip_key, prefix): while True: eid, event = yield idiokit.next() for ip in event.values(ip_key): items = yield cymruwhois.lookup(ip) if not items: continue augmentation = events.Event() for key, value in items: augmentation.add(prefix + key, value) yield idiokit.send(eid, augmentation)
def _read_stdin(self): loads = json.JSONDecoder(parse_float=unicode, parse_int=unicode).decode while True: yield select.select([sys.stdin], [], []) line = sys.stdin.readline() if not line: break if not line.strip(): continue in_dict = loads(line) yield idiokit.send(events.Event(in_dict))
def augment(self, *ip_keys): while True: event = yield idiokit.next() if not ip_keys: values = event.values(parser=_parse_ip) else: values = self._ip_values(event, ip_keys) for ip in values: items = yield self.lookup(ip) for key, value in items: event.add(key, value) yield idiokit.send(event)
def walk_mail(self, uid, path=(), headers=[]): if not path: header = yield self.get_header(uid, "HEADER") if header is None: return if header.get_content_maintype() != "multipart": yield idiokit.send("TEXT", tuple(headers + [header])) return headers = headers + [header] path = list(path) + [0] while True: path[-1] += 1 path_str = ".".join(map(str, path)) header = yield self.get_header(uid, path_str + ".MIME") if header is None: return if header.get_content_maintype() == "multipart": yield self.walk_mail(uid, path, headers + [header]) else: yield idiokit.send(path_str, tuple(headers + [header]))
def process(self, ids, queue, window_time): while True: event = yield idiokit.next() current_time = time.time() expire_time = current_time + window_time eid = events.hexdigest(event) count, items = ids.get(eid, (0, event.items())) ids[eid] = count + 1, items if count == 0: yield idiokit.send(event.union({"id:open": eid})) queue.append((expire_time, eid))
def follow_config(path, poll_interval=1.0, force_interval=30.0): last_reload = -float("inf") last_mtime = None last_error_msg = None abspath = os.path.abspath(path) while True: now = time.time() if now < last_reload: last_reload = now mtime = os.path.getmtime(abspath) if now > last_reload + force_interval or last_mtime != mtime: try: configs = load_configs(abspath) except Exception: _, exc_value, exc_tb = sys.exc_info() stack = traceback.extract_tb(exc_tb) stack = stack[1:] # Make the traceback flatter by discarding the current stack frame error_msg = "Could not load {path!r} (most recent call last):\n{stack}\n{exception}".format( path=abspath, stack="".join(traceback.format_list(stack)).rstrip(), exception=utils.format_exception(exc_value) ) if error_msg != last_error_msg: yield idiokit.send(False, error_msg) last_error_msg = error_msg last_mtime = None else: yield idiokit.send(True, configs) last_error_msg = None last_mtime = mtime last_reload = now yield idiokit.sleep(poll_interval)
def feed(self): for result in tail_file(self.path, self.offset): if result is None: yield idiokit.sleep(2.0) continue mtime, line = result keys = self.parse(line, mtime) if keys is None: continue event = events.Event() for key, value in keys.items(): event.add(key, value) yield idiokit.send(event)
def collect(self, state, **keys): if state is None: state = utils.CompressedCollection() try: while True: event = yield idiokit.next() if event is self.REPORT_NOW: yield idiokit.send(state) state = utils.CompressedCollection() else: state.append(event) except services.Stop: idiokit.stop(state)
def collect_decode(socks): readable = [] while True: while not readable: readable, _, _ = yield select.select(socks, (), ()) readable = list(readable) sock = readable.pop() length_bytes = yield _recvall_stream(sock, 4) length, = struct.unpack("!I", length_bytes) msg_bytes = yield _recvall_stream(sock, length) yield idiokit.send(cPickle.loads(msg_bytes))
def purge(self, ids, queue): while True: yield idiokit.sleep(1.0) current_time = time.time() while queue and queue[0][0] <= current_time: expire_time, eid = queue.popleft() count, items = ids.pop(eid) if count > 1: ids[eid] = count - 1, items else: yield idiokit.send( events.Event(items).union({"id:close": eid}))
def _follow_config(self, abspath): workdir = os.path.dirname(abspath) while True: ok, obj = yield idiokit.next() if not ok: self.log.error(obj) continue output = set() for conf in iter_startups(obj): if self.disable is not None and conf.name in self.disable: continue if self.enable is not None and conf.name not in self.enable: continue output.add(conf.with_workdir(workdir)) yield idiokit.send(output)
def purge(self, ids, queue): while True: yield idiokit.sleep(1.0) current_time = time.time() while queue and queue[0][0] <= current_time: expire_time, eid = queue.popleft() count, items = ids.pop(eid) if count > 1: ids[eid] = count - 1, items else: yield idiokit.send(events.Event(items).union({ "id:close": eid }))
def process(self, ids, queue, window_time): while True: event = yield idiokit.next() current_time = time.time() expire_time = current_time + window_time eid = events.hexdigest(event) count, items = ids.get(eid, (0, event.items())) ids[eid] = count + 1, items if count == 0: yield idiokit.send(event.union({ "id:open": eid })) queue.append((expire_time, eid))
def normalize(self, subject, groupdict): while True: event = yield idiokit.next() if subject is not None: event.add("report_subject", subject) for key in event.keys(): event.discard(key, "") event.discard(key, "-") for key, value in groupdict.items(): if None in (key, value): continue event.add(key, value) yield idiokit.send(event)
def _cutoff(self): while True: event = yield idiokit.next() latest = None for value in event.values("source time"): try: source_time = time.strptime(value, "%Y-%m-%d %H:%M:%SZ") except ValueError: continue latest = max(latest, source_time) cutoff = time.gmtime(time.time() - self.drop_older_than) if latest and latest < cutoff: continue yield idiokit.send(event)
def cleanup(self, ids, queue): while True: yield idiokit.sleep(1.0) current_time = time.time() while queue and queue[0][0] <= current_time: expire_time, eid, unique = queue.popleft() event_set, augment_set = ids[eid] augment_set.pop(unique, None) event = event_set.pop(unique, None) if event is not None: yield idiokit.send(event) if not event_set and not augment_set: del ids[eid]