def top_crashes(top=10, start="", end="", *args): start, end = parse_start_end(start, end) top = int(top) if "force" in args or not r.exists(top_crashes_key(start, end)): r.delete(top_crashes_key(start, end)) with timer(): map_data(_top_crashes, start, end, start=datetime.strptime(start, "%Y%m%d"), end=datetime.strptime(end, "%Y%m%d")) with timer(): for i in sorted(r.hgetall(top_crashes_key(start, end)).items(), key=lambda x: int(x[1]), reverse=True)[:top]: print i
def aggregate(window=24, *args): if "force" in args: nuke_it() window = int(window) with timer(): map_data(_aggregate, window)
def find_explosives_everywhere(model, start="", end="", training_data_length=14, window=24): training_data_length, window = int(training_data_length), int(window) modelclass = get_model(model) start, end = parse_start_end(start, end) e = 0 explosive = [] with timer(): print "Starting.." for key in r.keys("dt-*-crashes-*"): if e % 10000 == 0: print "{} signatures processed".format(e) signature = key.split("-")[1] cc = CrashCounts(signature, window) x, original = cc.crash_counts(start=start, end=end) for i, count in enumerate(original): if i <= training_data_length: continue model = modelclass(original[i-training_data_length:i]) if model.is_explosive(count): explosive.append((signature, unbuckify(window, x[i]))) e += 1 for i in explosive: print i print "Found {} explosive events.".format(len(explosive))
def nuke_it(): if raw_input("Are you sure you wanna nuke it? [yN] ").lower() == "y": pipe = r.pipeline() print "\033[91m\033[1m[NUCLEAR LAUNCH DETECTED]\033[0m" with timer(): for key in r.keys("dt-*"): pipe.delete(key) pipe.delete(PROCESSED_KEY) pipe.execute()
def fetch_data(start, end): """Fetch data from a date to another. The date format is YYYYMMDD.""" start = datetime(int(start[:4], 10), int(start[4:6], 10), int(start[6:8], 10)) end = datetime(int(end[:4], 10), int(end[4:6], 10), int(end[6:8], 10)) with timer(): while start <= end: print "Downloading data from {}-{}-{}".format(start.year, start.month, start.day) s = start.strftime("%Y%m%d") url = "https://crash-analysis.mozilla.com/crash_analysis/{date}/{date}-pub-crashdata.csv.gz".format(date=s) req = urllib2.urlopen(url) with open("data/{}.csv.gz".format(s), "w") as f: f.write(req.read()) os.system("gunzip data/{}.csv.gz".format(s)) start += timedelta(1)