def feed(self, query, feed_all): collab = wiki.GraphingWiki(self.collab_url, ssl_verify_cert=not self.collab_ignore_cert, ssl_ca_certs=self.collab_extra_ca_certs) yield idiokit.thread(collab.authenticate, self.collab_user, self.collab_password) yield idiokit.sleep(5) token = None current = dict() while True: try: result = yield idiokit.thread(collab.request, "IncGetMeta", query, token) except wiki.WikiFailure as fail: self.log.error("IncGetMeta failed: {0!r}".format(fail)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(current) current.clear() for page, keys in updates.iteritems(): event = current.setdefault(page, events.Event()) event.add("id:open", self.page_id(page)) event.add("gwikipagename", page) event.add( "collab url", self.collab_url + urllib.quote(page.encode("utf8"))) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in map(normalize, discarded): event.discard(key, value) for value in map(normalize, added): event.add(key, value) if not feed_all: yield idiokit.send(event) for page in removed: current.pop(page, None) event = events.Event() event.add("id:close", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + page) yield idiokit.send(event) if feed_all: for page in current: yield idiokit.send(current[page]) yield idiokit.sleep(self.poll_interval)
def parse(inner, self, txt): def _add(event, k, v): decode = util.guess_encoding key = decode(k.lower().strip()) value = decode(v.strip()) event.add(key, value) return event def _lineparser(txt): rows = txt.split('\n') for row in rows: row = row.strip() match = re.findall("^(.*?)=(.*?)$", row) if match: for k, v in match: k = k.strip() v = v.strip() yield k, v yield decode = util.guess_encoding event = events.Event() event.add( 'source', 'imap://%s/%s %s' % (self.mail_server, self.mail_box, self.filter)) for k, v in _lineparser(txt): if k.strip() == 'Affected CI-s': for ci in v.split(","): event = _add(event, k, ci) elif k.strip() == 'SR': _add(event, 'id', v) _add(event, 'sr', v) else: event = _add(event, k, v) # keys and values collected, now dealing with the event and states. id = event.value('id', None) if u'rikke l\xf5pp' in event.keys(): #case closed if id in open_ids: clearevent = events.Event() clearevent.add('id', id) inner.send(clearevent) open_ids.remove(id) closed_ids.add(id) inner.finish(id) #if event actually has some contents, send forward if len(event.keys()) > 0 and id not in closed_ids: open_ids.add(id) inner.send(event) inner.finish(id) inner.finish(True)
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("failed to download {0} ({1})".format( self.feed_url, fuf)) self.log.info("Downloaded") for line in fileobj: url, netloc = parseURL(line) if url is None: continue event = events.Event() event.add("url", url) if i_am_a_name(netloc): event.add("domain name", netloc) else: event.add("ip", netloc) event.add("feeder", "siri urz") event.add("feed", "vxvault") event.add("feed url", self.feed_url) event.add("type", "malware url") event.add("description", "This host is most likely hosting a malware URL.") yield idiokit.send(event)
def _normalize(self, event): yield events.Event({ "feeder": "malware domain list", "feed": "mdl", "feed url": self.url, "source time": event.values("timestamp", parse_timestamp), "url": event.values("url", parse_url), "domain name": event.values("url", parse_host), "reverse dns": event.values("reverse", parse_valid), "description url": event.values("url", parse_description_url), "ip": event.values("ip", parse_ip), "asn": event.values("asn", parse_valid), "registrant": event.values("registrant", parse_valid), "description": event.values("description", parse_valid) })
def poll(inner, self, something): yield timer.sleep(2) depurl = "http://xtra.tllapt.ee/taru/dep_en.html" arrurl = "http://xtra.tllapt.ee/taru/arr_en.html" for (subtype, url) in [('departures', depurl), ('arrivals', arrurl)]: self.log.info("Downloading %r", url) try: info, fileobj = yield inner.sub(utils.fetch_url(url)) except utils.FetchUrlFailed, fuf: self.log.error("Downloading failed: %r", fuf) return self.log.info("Downloaded") utilization = get_metrics(fileobj) event = events.Event() event.add('source', url) event.add('utilization', utilization) event.add('service', 'airports') event.add('subtype', subtype) event.add('id', create_id(event, 'tallinnairpoirt', 'subtype')) event.add('longitude', '24.799303') event.add('latitude', '59.4165212') yield inner.send(event)
def normalize(self, name): while True: event = yield idiokit.next() # A dict telling how to rename raw event keys. # A key is not renamed by default. # Mapping a key to None removes the key. key_mappings = { "time": "source time", "id": "cleanmx id", "phishtank": "phishtank id", "line": None, "firsttime": "first seen", "lasttime": "last seen" } new = events.Event() for key, value in event.items(): key = key_mappings.get(key, key) if key is None: continue value = unescape(value).strip() if not value: continue new.add(key, value) if name: new.add("feed", name) yield idiokit.send(new)
def iter_metas(request, rule, keys=None, checkAccess=True): from abusehelper.core import rules, events if type(rule) != rules.rules.Match: rule = rules.parse(unicode(rule)) for page, _meta in request.graphdata.items(): _page = {u"gwikipagename": page} metas = _meta.get(u"meta", None) if checkAccess and not request.user.may.read(page): continue if not metas: continue _out = request.graphdata.get_out(page) if _out.has_key('gwikicategory'): metas.setdefault(u'gwikicategory', []).extend(_out.get("gwikicategory")) data = events.Event(dict(metas.items() + _page.items())) if rule.match(data): if keys: metas = dict((key, metas.get(key, list())) for key in keys) yield page, metas
def events_to_elements_with_delay_element(inner): while True: event = yield inner stamp = event.value('start') eid = event.value('id') body = Element("body") body.text = events._escape(unicode(event)) #set delay for faking the event arrival time delay = Element("delay") delay.text = "Greetings earthlings" delay.set_attr("xmlns", 'urn:xmpp:delay') strstamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.strptime(stamp, "%Y-%m-%d %H:%M:%S")) delay.set_attr("stamp", strstamp) inner.send(body, event.to_element(), delay) stamp = event.value('end') body = Element("body") close_event = events.Event() close_event.add('id', eid) body.text = events._escape(unicode(close_event)) #set delay for faking the event arrival time delay = Element("delay") delay.text = "Greetings earthlings" delay.set_attr("xmlns", 'urn:xmpp:delay') strstamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.strptime(stamp, "%Y-%m-%d %H:%M:%S")) delay.set_attr("stamp", strstamp) inner.send(body, close_event.to_element(), delay)
def parse(inner,tree): def _add(event,k,v): decode = util.guess_encoding key = decode(k.lower().strip()) value = decode(v.strip()) if key == 'tempmax' or key == 'tempmin': key = 'temp' event.add(key,value) event.add('time', date + " " +nightday) return event def _walk(tree,depth=0,path="",date=""): for node in tree: key = node.tag value = "%s" % (node.text) value = value.strip() if value != "": yield key, value for forecast in tree: if 'date' in forecast.attrib: date = forecast.attrib['date'] for nightday in 'night', 'day': for generic in forecast.findall('*'): event = events.Event() for k,v in _walk(generic.findall('*')): event = _add(event,k,v) event = _add(event, 'time', date + " " +nightday) inner.send(event) for place in forecast.findall(nightday + '/place'): event = events.Event() for k,v in _walk(place.findall('*')): event = _add(event,k,v) event = _add(event, 'time', date + " " +nightday) event.add('id', id.create_id(event,'name','time')) inner.send(event) for place in forecast.findall(nightday + '/wind'): event = events.Event() for k,v in _walk(place.findall('*')): event = _add(event,k,v) event.add('id', id.create_id(event,'name','time')) inner.send(event)
def throttle(self, throttle_time): """ Ensure that updates for a given event id are sent at least throttle_time apart. """ sleeper = timer.sleep(1.0) ids = dict() queue = list() while True: yield timer.sleep(1.0) item = yield idiokit.next() current_time = time.time() id, event = item previous, _, update_time = ids.get(id, (None, None, None)) if update_time is None: update_time = current_time + 1.0 heapq.heappush(queue, (update_time, id)) ids[id] = previous, event, update_time while queue and queue[0][0] <= current_time: _, id = heapq.heappop(queue) previous, next, _ = ids[id] if previous == next == None: del ids[id] continue if previous != next: if next is None: event = events.Event() else: event = events.Event(next) event.add("id", id) yield idiokit.send(event) if previous != next: update_time = current_time + throttle_time heapq.heappush(queue, (update_time, id)) else: update_time = None ids[id] = next, next, update_time
def parse_sw1(self, fileobject, baseurl): txt = fileobject.read() subpages = re.findall('A HREF="(.*?.html)"', txt) for page in subpages: self.log.info('Downloading %r.' % (page)) fileobject = urllib2.urlopen(baseurl + page) txt = fileobject.read() descr = re.findall('<td>Description:</td> <td>(.*) </td>', txt)[0] port = descr.split(" ")[0] customer = " ".join(descr.split(" ")[1:]) #find the daily stats part reg = '<!-- Begin `Daily\'.*?' reg += '<table>.*?' #find the 'in' statistics reg += '<tr class="in">.*?' #pick the statistics reg += '<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?' #find the 'out' statistics reg += '<tr class="out">.*?' #pick the statistics reg += '<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?' #stop reg += '<!-- End `Daily' stats = re.findall(reg, txt, re.S)[0] inmax = stats[0] inavg = stats[1] incur = stats[2] outmax = stats[3] outavg = stats[4] outcur = stats[5] inmbs = re.search('^(\S+)', incur).group(1) inutilization = re.search('\((.*?)%\)', incur).group(1) outmbs = re.search('^(\S+)', outcur).group(1) oututilization = re.search('\((.*?)%\)', outcur).group(1) event = events.Event() now = str(int(time.time())) event.add('start', now) event.add('end', now) event.add('source', self.baseurl + page) event.add('organization', 'TIX') event.add('port', port) event.add('subtype', 'exchange') event.add('customer', customer) event.add('area', 'Tallinn') event.add('latitude', '59.4388619') event.add('longitude', '24.7544715') event.add('inmbs', str(int(float(inmbs)))) event.add('inutilization', str(int(round(float(inutilization))))) event.add('outmbs', str(int(float(outmbs)))) event.add('oututilization', str(int(round(float(oututilization))))) event.add('id', create_id(event, 'port', 'customer')) yield event
def aggregate(self, group_keys, window_info): """ Create aggregated events and ids for them. """ group_keys = tuple(set(group_keys)) key_groups = dict() while True: yield timer.sleep(1.0) event = yield idiokit.next() current_time = time.time() updated = set() key = tuple(tuple(sorted(event.values(x))) for x in group_keys) updated.add(key) if key not in key_groups: windows = [] for constructor, keys, output_key in window_info: windows.append((constructor(**keys), output_key)) key_groups[key] = windows for window, output_key in key_groups[key]: window.push(current_time, event) for key, windows in list(key_groups.iteritems()): any_expired = False for window, _ in windows: any_expired = window.expire(current_time) or any_expired if not (any_expired or key in updated): continue output = None for window, output_key in windows: value = window.value() if value is None: continue if output is None: output = events.Event() output.add(output_key, unicode(value)) id = hashlib.md5(repr(key)).hexdigest() if output is not None: for group_key, group_values in zip(group_keys, key): output.update(group_key, group_values) yield idiokit.send(id, output) if output is None: del key_groups[key]
def assign_affected_targets(event): event_list = list() subtype = event.value('user', None) if ('user', subtype) in relations: for add in relations[('user', subtype)]: event2 = events.Event(event) for k, v in add.iteritems(): event2.clear(k) event2.update(k, [v]) event_list.append(event2) else: return [event] return event_list
def _handle_room(self, name): msg = "room {0!r}".format(name) attrs = events.Event(type="room", service=self.bot_name, room=name) with self.log.stateful(repr(self.xmpp.jid), "room", repr(name)) as log: log.open("Joining " + msg, attrs, status="joining") room = yield self.xmpp.muc.join(name, self.bot_name) log.open("Joined " + msg, attrs, status="joined") try: yield room finally: log.close("Left " + msg, attrs, status="left")
def parse(self, line, _): line = line.strip() if not line: return facts = dict(parse_log_line(line)) if "timestamp" in facts: facts["timestamp"] = convert_date(facts["timestamp"]) if "request" in facts: facts.update(parse_request(facts["request"])) if "user_agent" in facts: facts.update(parse_user_agent(facts["user_agent"])) return events.Event(facts)
def _read_stdin(self): loads = json.JSONDecoder(parse_float=unicode, parse_int=unicode).decode while True: yield select.select([sys.stdin], [], []) line = sys.stdin.readline() if not line: break if not line.strip(): continue in_dict = loads(line) yield idiokit.send(events.Event(in_dict))
def logger(): while True: try: yield idiokit.sleep(interval) finally: if counter.count > 0: self.log.info( "Sent {0} events to room {1!r}".format(counter.count, name), event=events.Event({ "type": "room", "service": self.bot_name, "sent events": unicode(counter.count), "room": name})) counter.count = 0
def feed(self): for result in tail_file(self.path, self.offset): if result is None: yield idiokit.sleep(2.0) continue mtime, line = result keys = self.parse(line, mtime) if keys is None: continue event = events.Event() for key, value in keys.items(): event.add(key, value) yield idiokit.send(event)
def purge(self, ids, queue): while True: yield idiokit.sleep(1.0) current_time = time.time() while queue and queue[0][0] <= current_time: expire_time, eid = queue.popleft() count, items = ids.pop(eid) if count > 1: ids[eid] = count - 1, items else: yield idiokit.send( events.Event(items).union({"id:close": eid}))
def _parse_line(line): if line.startswith("#"): return None try: ip, time = line.split() except ValueError: return None time = _normalize_time(time) event = events.Event() event.add("ip", ip) event.add("source time", time) return event
def create_a_event( n_timespan=60 * 60, #default last hour only n_sector=None, n_service=None, n_type=None, n_reason=None, lae=57.8, # default is EE law=59.39, lon=23.5, los=28): event = events.Event() #set time and place event.add('latitude', str(random.uniform(lae, law))) event.add('longitude', str(random.uniform(lon, los))) event.add('time', gettime(random.randint(1, n_timespan))) #something wrong with this...vsr does not take start end 'in' ;( event.add('start', gettime(random.randint(1, n_timespan))) event.add('end', gettime(-(random.randint(1, n_timespan)))) # create some damage numbers i = random.randint(1, 10) v = random.randint(1, 10) event.add('impact', str(i)) #humans event.add('value', str(v)) #money event.add('damage', str((i * v / 10))) #humans*money # ..sector, service.. sector = getsector(n_sector) event.add('sector', sector) service = getservice(sector, n_service) event.add('service', service) #what is a problem type = gettype(n_type) event.add('type', type) reason = getreason(type, n_reason) event.add('reason', reason) #who had (reported) problem provider = getprovider(service) customer = getcustomer(provider) event.add('provider', provider) #make customers report more than providers #providers are buzzy anyway with their problem, #and customers just waiting... g = random.randint(0, 9) if g < 7: event.add('customer', customer) myid = '' + sector + service + type + reason + provider event.add('id', myid) return event
def fetch_mails(inner, self, filter): result, data = yield inner.sub(self.call("uid", "SEARCH", None, filter)) if not data or not data[0]: return for uid in data[0].split(): collected = yield inner.sub(self.walk_mail(uid) | collect()) parts = list() for path, headers in collected: parts.append((headers, self.fetcher(uid, path))) if parts: top_header = parts[0][0][0] subject = top_header["Subject"] or "<no subject>" subject = base64.b64decode(subject[10:]) sender = top_header["From"] or "<unknown sender>" match = re.search('(<.*?>)$', sender) sender = base64.b64decode(sender[10:]) if match: sender = sender + " " + match.group(1) seire_id = yield inner.sub(self.handle(parts)) self.mailbox_ids.add(seire_id) self.log.info("Done with mail %r from %r, id %r", subject, sender, seire_id) # UID STORE command flags have to be in parentheses, otherwise # imaplib quotes them, which is not allowed. yield inner.sub( self.call("uid", "STORE", uid, "+FLAGS", "(\\Seen)")) #track which mails have been removed since the last poll #and send clear event for the corresponding events changes = self.old_mailbox_ids.difference(self.mailbox_ids) if len(changes) > 0: self.log.info( "Mails with following id's were removed from the mailbox: %r", ",".join(changes)) for removed_id in changes: event = events.Event() event.add('id', removed_id) inner.send(event) self.old_mailbox_ids = self.mailbox_ids self.mailbox_ids = set()
def create_event(self, source, **keys): event = events.Event({ "feed": self.feed_name, "malware": self.feed_malware, "type": self.feed_type, "feed url": source }) for input_key, input_value in keys.iteritems(): for output_key, output_value in self.parse(input_key, input_value): if isinstance(output_value, basestring): event.add(output_key, output_value) else: event.update(output_key, output_value) if not event.contains("description"): event = event.union(description=self.create_descriptions(event)) return event
def add_event(self, jid, event): timestamp = current_time() ids = event.values("id") if not ids: obj = dict((x, list(event.values(x))) for x in event.keys()) self.db.append_obj(timestamp, timestamp, marshal.dumps(obj)) return for id in ids: copy = events.Event(event) copy.clear("id") copy.add("id", id) if is_valid(copy): obj = dict((x, list(copy.values(x))) for x in copy.keys()) self._open(timestamp, id, jid, marshal.dumps(obj)) else: self._close(timestamp, id)
def poll(self, url="https://www.spamhaus.org/drop/drop.lasso"): request = urllib2.Request(url) for key, value in self.http_headers: request.add_header(key, value) self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(request) except utils.FetchUrlFailed as fuf: self.log.error("Download failed: %r", fuf) idiokit.stop(False) self.log.info("Downloaded") for line in fileobj.readlines(): if line.startswith(';'): continue data = line.split(';') if not data: continue netblock_sbl = [x.strip() for x in data] if len(netblock_sbl) != 2: continue netblock, sbl = netblock_sbl if not len(netblock.split('/')) == 2: continue new = events.Event() new.add('netblock', netblock) new.add('description url', "http://www.spamhaus.org/sbl/query/" + sbl) new.add('feeder', 'spamhaus') new.add('feed', 'spamhaus drop list') new.add('type', 'hijacked network') if self.use_cymru_whois: values = yield cymruwhois.lookup(netblock.split('/')[0]) for key, value in values: new.add(key, value) yield idiokit.send(new)
def manage_room(self, name): msg = "room {0!r}".format(name) attrs = events.Event({ "type": "room", "service": self.bot_name, "sent events": "0", "room": name }) with self.log.stateful(repr(self.xmpp.jid), "room", repr(name)) as log: log.open("Joining " + msg, attrs, status="joining") room = yield self.xmpp.muc.join(name, self.bot_name) log.open("Joined " + msg, attrs, status="joined") try: tail = self._stats(name) | room | idiokit.consume() if self.xmpp_rate_limit is not None: tail = self._output_rate_limiter() | tail yield self.augment() | events.events_to_elements() | tail finally: log.close("Left " + msg, attrs, status="left")
def get_event(self, user): status = user.GetStatus() update = status.GetCreatedAtInSeconds() name = user.name.lower() followers_count = user.GetFriendsCount() statuses_count = user.GetStatusesCount() event = events.Event() event.add('user', user.name.lower()) now = timeconversion.local_iso() event = blogparse(event, status.text) event.add('description', "%s - %s - %s" % (now, name, status.text.lower())) event.add('type', 'microblog') event.add('subtype', 'twitter') event.add('followers', unicode(followers_count)) event.add('statuses_count', unicode(statuses_count)) event.add('src', user.name.lower()) event.add('dst', 'followers') event.add('id', id.create_id(event, 'user')) event.add('start', timeconversion.seconds2iso(update)) return event
def create_event(self, **keys): description = keys.get("description", None) if description is None: return None event = events.Event() event.add("feeder", "malc0de.com") event.add("feed", "malc0de") event.add("type", "malware url") link = keys.get("link", None) if link: event.add("description url", link) for part in description.split(","): pair = part.split(":", 1) if len(pair) < 2: continue key = pair[0].strip() value = pair[1].strip() if not key or not value: continue if key in ["URL", "MD5"]: if key == "URL": value = "hxxp://" + value event.add(key.lower(), value) elif key == "IP Address": event.add("ip", value) host = keys.get("title", None) if not self.is_ip(host): event.add("domain name", host) event.add("description", "This host is most likely hosting a malware URL.") return event
def _handle_room(self, name): msg = "room {0!r}".format(name) attrs = events.Event({ "type": "room", "service": self.bot_name, "room": unicode(name) }) with self.log.stateful(repr(self.xmpp.jid), "room", repr(name)) as log: log.open("Joining " + msg, attrs, status="joining") room = yield self.xmpp.muc.join(name, self.bot_name) log.open("Joined " + msg, attrs, status="joined") try: yield idiokit.pipe( room, events.stanzas_to_events(), self._archive(room.jid.bare()) ) finally: log.close("Left " + msg, attrs, status="left")
class OpenBLBot(bot.PollingBot): feed_url = bot.Param(default=OPENBL_FEED_URL) use_cymru_whois = bot.BoolParam() def poll(self): pipe = self._poll(url=self.feed_url) if self.use_cymru_whois: pipe = pipe | cymruwhois.augment("ip") return pipe @idiokit.stream def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: raise bot.PollSkipped("failed to download {0!r} ({1})".format( url, fuf)) self.log.info("Downloaded") for line in fileobj: if line.startswith("#"): continue ip, time = line.split() time = normalize_time(time) event = events.Event() event.add("ip", ip) event.add("source time", time) event.add("feed", "openbl") event.add("description url", self.feed_url) event.add("type", "brute-force") event.add( "description", "This host has most likely been performing brute-force " + "attacks on one of the following services: FTP, SSH, POP3, " + "IMAP, IMAPS or POP3S.") yield idiokit.send(event)