def _poll(self): self.log.info("Downloading %s" % self.url) try: info, fileobj = yield utils.fetch_url(self.url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) return
def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) idiokit.stop()
def _poll(self, url): request = urllib2.Request(url) for key, value in self.http_headers: request.add_header(key, value) try: self.log.info('Downloading feed from: "%s"', url) _, fileobj = yield utils.fetch_url(request) except utils.FetchUrlFailed as e: self.log.error('Failed to download feed "%s": %r', url, e) idiokit.stop(False) self.log.info("Finished downloading the feed.") byte = fileobj.read(1) while byte and byte != "<": byte = fileobj.read(1) if byte == "<": fileobj.seek(-1, 1) try: for _, elem in etree.iterparse(fileobj): for event in self._parse(elem, url): if event: yield idiokit.send(event) except ParseError as e: self.log.error('Invalid format on feed: "%s", "%r"', url, e)
def poll(self, url, name): try: self.log.info("Downloading page from: %r", url) info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, e: self.log.error("Failed to download page %r: %r", url, e) return
def _poll(self, url="http://danger.rulez.sk/projects/bruteforceblocker/blist.php"): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) idiokit.stop(False)
def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: raise bot.PollSkipped("failed to download {0!r} ({1})".format( url, fuf))
def poll(inner, self, something): yield timer.sleep(2) depurl = "http://xtra.tllapt.ee/taru/dep_en.html" arrurl = "http://xtra.tllapt.ee/taru/arr_en.html" for (subtype, url) in [('departures', depurl), ('arrivals', arrurl)]: self.log.info("Downloading %r", url) try: info, fileobj = yield inner.sub(utils.fetch_url(url)) except utils.FetchUrlFailed, fuf: self.log.error("Downloading failed: %r", fuf) return self.log.info("Downloaded") utilization = get_metrics(fileobj) event = events.Event() event.add('source', url) event.add('utilization', utilization) event.add('service', 'airports') event.add('subtype', subtype) event.add('id', create_id(event, 'tallinnairpoirt', 'subtype')) event.add('longitude', '24.799303') event.add('latitude', '59.4165212') yield inner.send(event)
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("failed to download {0} ({1})".format( self.feed_url, fuf)) self.log.info("Downloaded") for line in fileobj: url, netloc = parseURL(line) if url is None: continue event = events.Event() event.add("url", url) if i_am_a_name(netloc): event.add("domain name", netloc) else: event.add("ip", netloc) event.add("feeder", "siri urz") event.add("feed", "vxvault") event.add("feed url", self.feed_url) event.add("type", "malware url") event.add("description", "This host is most likely hosting a malware URL.") yield idiokit.send(event)
def poll(inner, self,something): yield timer.sleep(2) depurl="http://xtra.tllapt.ee/taru/dep_en.html" arrurl="http://xtra.tllapt.ee/taru/arr_en.html" for ( subtype, url ) in [('departures', depurl), ('arrivals',arrurl)]: self.log.info("Downloading %r", url) try: info, fileobj = yield inner.sub(utils.fetch_url(url)) except utils.FetchUrlFailed, fuf: self.log.error("Downloading failed: %r", fuf) return self.log.info("Downloaded") utilization = get_metrics(fileobj) event = events.Event() event.add('source',url) event.add('utilization',utilization) event.add('service','airports') event.add('subtype',subtype) event.add('id', create_id(event,'tallinnairpoirt','subtype')) event.add('longitude','24.799303') event.add('latitude','59.4165212') yield inner.send(event)
def poll(inner, self,something): yield timer.sleep(5) self.log.info("Downloading %r", self.url) try: info, fileobj = yield inner.sub(utils.fetch_url(self.url)) except utils.FetchUrlFailed, fuf: self.log.error("Downloading failed: %r", fuf) return
def poll(inner, self, _): self.log.info("Downloading the report") opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) try: _, fileobj = yield inner.sub(utils.fetch_url(self.feed_url, opener)) except utils.FetchUrlFailed, fuf: self.log.error("Failed to download the report: %r", fuf) return
def poll(inner, self, something): yield timer.sleep(5) self.log.info("Downloading %r", self.url) try: info, fileobj = yield inner.sub(utils.fetch_url(self.url)) except utils.FetchUrlFailed, fuf: self.log.error("Downloading failed: %r", fuf) return
def poll(inner, self, asn, url="http://dshield.org/asdetailsascii.html"): url += "?as=%s" % asn self.log.info("ASN%s: downloading", asn) try: info, fileobj = yield inner.sub(utils.fetch_url(url)) except utils.FetchUrlFailed, fuf: self.log.error("ASN%s: downloading failed: %r", asn, fuf) return
def poll(inner, self, path): yield timer.sleep(1) self.log.info('Fetching population data from %s.' % path) try: info, fileobj = yield inner.sub(utils.fetch_url(path)) except utils.FetchUrlFailed, fuf: self.log.error("Failed to fetch %s." % path) return
def poll(self): self.log.info("Downloading updates from {0!r}".format(self.url)) try: info, fileobj = yield utils.fetch_url(self.url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Downloading {0!r} failed ({1})".format(self.url, fuf)) self.log.info("Updates downloaded from {0!r}".format(self.url)) yield idiokit.pipe( utils.csv_to_events(fileobj, columns=self._columns), idiokit.map(self._normalize))
def poll(self): url = self.feed_url % self.application_key try: self.log.info("Checking if {0!r} has new data".format(url)) info, _ = yield utils.fetch_url(HeadRequest(url)) etag = info.get("etag", None) if etag is not None and self._etag == etag: raise bot.PollSkipped( "no new data detected (ETag stayed the same)") self.log.info("Downloading data from {0!r}".format(url)) _, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed as error: raise bot.PollSkipped("failed to download {0!r} ({1})".format( url, error)) self.log.info("Downloaded data from {0!r}".format(url)) reader = BZ2Reader(fileobj) try: depth = 0 sites = dict() for event, element in etree.iterparse(reader, events=("start", "end")): if event == "start" and element.tag == "entry": depth += 1 if event == "end" and element.tag == "entry": yield self._handle_entry(element, sites) depth -= 1 if event == "end" and depth == 0: element.clear() except SyntaxError as error: raise bot.PollSkipped("syntax error in report {0!r} ({1})".format( url, error)) else: self._etag = etag
def main(inner, self): # Join the XMPP network using credentials given from the command line conn = yield self.xmpp_connect() # Join the XMPP room room = yield conn.muc.join(self.xmpp_room, self.bot_name) self.log.info("Joined room %r", self.xmpp_room) # Fetch the URL info and data as an file-like object. # Info contains e.g. the HTTP(S) headers, ignored for now. info, fileobj = yield utils.fetch_url(self.url) self.log.info("Opened URL %r", self.url) yield self.parse(fileobj) | events.events_to_elements() | room | threado.dev_null()
def poll(self): url = self.feed_url % self.application_key try: self.log.info("Checking if {0!r} has new data".format(url)) info, _ = yield utils.fetch_url(HeadRequest(url)) etag = info.get("etag", None) if etag is not None and self._etag == etag: raise bot.PollSkipped("no new data detected (ETag stayed the same)") self.log.info("Downloading data from {0!r}".format(url)) _, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed as error: raise bot.PollSkipped("failed to download {0!r} ({1})".format(url, error)) self.log.info("Downloaded data from {0!r}".format(url)) reader = BZ2Reader(fileobj) try: depth = 0 sites = dict() for event, element in etree.iterparse(reader, events=("start", "end")): if event == "start" and element.tag == "entry": depth += 1 if event == "end" and element.tag == "entry": yield self._handle_entry(element, sites) depth -= 1 if event == "end" and depth == 0: element.clear() except SyntaxError as error: raise bot.PollSkipped("syntax error in report {0!r} ({1})".format(url, error)) else: self._etag = etag
def fetch_extras(inner, opener, url): try: _, fileobj = yield inner.sub(utils.fetch_url(url, opener)) except utils.FetchUrlFailed: inner.finish(list()) data = yield inner.thread(fileobj.read) match = TABLE_REX.search(data) if match is None: inner.finish(list()) table = etree.XML(match.group(1)) keys = [th.text or "" for th in table.findall("thead/tr/th")] keys = map(str.strip, keys) values = [th.text or "" for th in table.findall("tbody/tr/td")] values = map(str.strip, values) items = [item for item in zip(keys, values) if all(item)] inner.finish(items)
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Download failed: {0}".format(fuf)) lines = [] for line in fileobj: line = line.strip() if line and not line.startswith("#"): lines.append(line) yield idiokit.pipe( utils.csv_to_events(tuple(lines), columns=COLUMNS, charset=info.get_param("charset", None)), _parse())
def poll(self, url="https://www.spamhaus.org/drop/drop.lasso"): request = urllib2.Request(url) for key, value in self.http_headers: request.add_header(key, value) self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(request) except utils.FetchUrlFailed as fuf: self.log.error("Download failed: %r", fuf) idiokit.stop(False) self.log.info("Downloaded") for line in fileobj.readlines(): if line.startswith(';'): continue data = line.split(';') if not data: continue netblock_sbl = [x.strip() for x in data] if len(netblock_sbl) != 2: continue netblock, sbl = netblock_sbl if not len(netblock.split('/')) == 2: continue new = events.Event() new.add('netblock', netblock) new.add('description url', "http://www.spamhaus.org/sbl/query/" + sbl) new.add('feeder', 'spamhaus') new.add('feed', 'spamhaus drop list') new.add('type', 'hijacked network') if self.use_cymru_whois: values = yield cymruwhois.lookup(netblock.split('/')[0]) for key, value in values: new.add(key, value) yield idiokit.send(new)
def main(inner, self): # Join the XMPP network using credentials given from the command line conn = yield self.xmpp_connect() # Join the XMPP room room = yield conn.muc.join(self.xmpp_room, self.bot_name) self.log.info("Joined room %r", self.xmpp_room) # Fetch the URL info and data as an file-like object. # Info contains e.g. the HTTP(S) headers, ignored for now. info, fileobj = yield utils.fetch_url(self.csv_url) self.log.info("Opened URL %r", self.csv_url) # csv_to_events feeds out abusehelper.core.events.Event # objects, so convert them to XML elements before sending them # to the room. csv_feed = utils.csv_to_events(fileobj, delimiter=self.csv_delimiter, columns=self.csv_columns) yield csv_feed | events.events_to_elements() | room | threado.dev_null()
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Download failed: {0}".format(fuf)) lines = [] for line in fileobj: line = line.strip() if line and not line.startswith("#"): lines.append(line) yield idiokit.pipe( utils.csv_to_events(tuple(lines), columns=COLUMNS, charset=info.get_param("charset", None)), _parse() )
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("failed to download {0} ({1})".format(self.feed_url, fuf)) self.log.info("Downloaded") for line in fileobj: url, netloc = parseURL(line) if url is None: continue event = events.Event() event.add("url", url) if i_am_a_name(netloc): event.add("domain name", netloc) else: event.add("ip", netloc) event.add("feed", "vxvault") event.add("feed url", self.feed_url) event.add("type", "malware") event.add("description", "This host is most likely hosting a malware URL.") yield idiokit.send(event)
def handle_text_plain(inner, self, headers, fileobj): filename = headers[-1].get_filename(None) if filename is not None: self.log.info("Parsing CSV data from an attachment") result = yield inner.sub(self.parse_csv(filename, fileobj)) inner.finish(result) for match in re.findall(self.url_rex, fileobj.read()): self.log.info("Fetching URL %r", match) try: info, fileobj = yield inner.sub(utils.fetch_url(match)) except utils.FetchUrlFailed, fail: self.log.error("Fetching URL %r failed: %r", match, fail) return filename = info.get_filename(None) if filename is None: self.log.error("No filename given for the data") continue self.log.info("Parsing CSV data from the URL") result = yield inner.sub(self.parse_csv(filename, fileobj)) inner.finish(result)
def handle_text_plain(inner, self, headers, fileobj): for match in re.findall(self.url_rex, fileobj.read()): self.log.info("Fetching URL %r", match) try: info, fileobj = yield inner.sub(utils.fetch_url(match)) except utils.FetchUrlFailed, fail: self.log.error("Fetching URL %r failed: %r", match, fail) return self.log.info("Parsing IODEF data from the URL") first_iteration = True event = events.Event() for k,v in parse_iodef(fileobj): yield if k == 'incident' and not first_iteration: inner.send(event) if k == 'incident': event = events.Event() first_iteration = False if v != '': event.add(k,v) inner.send(event)
def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: raise bot.PollSkipped("failed to download {0!r} ({1})".format(url, fuf))