def __init__(self, trend={'name': 'my_trend', 'columns': 'hashtag'}): self.trend = trend # trend = { 'name': 'my_trend', 'columns': ['hashtag']} client = StreamDrillClient("http://localhost:9669") client.delete(self.trend['name']) client.create(self.trend['name'], self.trend['columns'], 1000, ("hour", "minute", "second")) self.stream = client.stream()
referers = "referers" visitors = "visitors" # 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)" # group 1 2 3 4 5 6 7 8 9 logline = re.compile(r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\"") client = StreamDrillClient("http://localhost:9669") client.delete(pageViews) client.delete(referers) client.delete(visitors) client.create(pageViews, "path", 1000, ("hour", "minute", "second")) client.create(referers, "path:referer", 1000, ("hour", "minute", "second")) client.create(visitors, "path:addr", 1000, ("hour", "minute", "second")) stream = client.stream() for line in open(sys.argv[1]): result = logline.match(line) if result: #print(result.groups()) addr = result.group(1) path = result.group(5) referer = result.group(8) if path.endswith(".html"): print(addr, path, referer) stream.update(pageViews, [path]) if referer != "-" and not referer.startswith(site): stream.update(referers, [path, referer]) stream.update(visitors, [path, addr])
# 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)" # group 1 2 3 4 5 6 7 8 9 logline = re.compile( r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\"" ) client = StreamDrillClient("http://localhost:9669") client.delete(pageViews) client.delete(referers) client.delete(visitors) client.create(pageViews, "path", 1000, ("hour", "minute", "second")) client.create(referers, "path:referer", 1000, ("hour", "minute", "second")) client.create(visitors, "path:addr", 1000, ("hour", "minute", "second")) stream = client.stream() for line in open(sys.argv[1]): result = logline.match(line) if result: #print(result.groups()) addr = result.group(1) path = result.group(5) referer = result.group(8) if path.endswith(".html"): print(addr, path, referer) stream.update(pageViews, [path]) if referer != "-" and not referer.startswith(site): stream.update(referers, [path, referer]) stream.update(visitors, [path, addr])