Пример #1
0
def top_crashes(top=10, start="", end="", *args):
  start, end = parse_start_end(start, end)
  top = int(top)

  if "force" in args or not r.exists(top_crashes_key(start, end)):
    r.delete(top_crashes_key(start, end))
    with timer():
      map_data(_top_crashes, start, end, start=datetime.strptime(start, "%Y%m%d"), end=datetime.strptime(end, "%Y%m%d"))

  with timer():
    for i in sorted(r.hgetall(top_crashes_key(start, end)).items(), key=lambda x: int(x[1]), reverse=True)[:top]:
      print i
Пример #2
0
def aggregate(window=24, *args):
  if "force" in args:
    nuke_it()

  window = int(window)
  with timer():
    map_data(_aggregate, window)
Пример #3
0
def find_explosives_everywhere(model, start="", end="", training_data_length=14, window=24):
  training_data_length, window = int(training_data_length), int(window)
  modelclass = get_model(model)
  start, end = parse_start_end(start, end)

  e = 0
  explosive = []

  with timer():
    print "Starting.."
    for key in r.keys("dt-*-crashes-*"):
      if e % 10000 == 0:
        print "{} signatures processed".format(e)
      signature = key.split("-")[1]
      cc = CrashCounts(signature, window)
      x, original = cc.crash_counts(start=start, end=end)

      for i, count in enumerate(original):
        if i <= training_data_length:
          continue

        model = modelclass(original[i-training_data_length:i])
        if model.is_explosive(count):
          explosive.append((signature, unbuckify(window, x[i])))

      e += 1

  for i in explosive:
    print i

  print "Found {} explosive events.".format(len(explosive))
Пример #4
0
def nuke_it():
  if raw_input("Are you sure you wanna nuke it? [yN] ").lower() == "y":
    pipe = r.pipeline()
    print "\033[91m\033[1m[NUCLEAR LAUNCH DETECTED]\033[0m"
    with timer():
      for key in r.keys("dt-*"):
        pipe.delete(key)

      pipe.delete(PROCESSED_KEY)
      pipe.execute()
Пример #5
0
def fetch_data(start, end):
  """Fetch data from a date to another. The date format is YYYYMMDD."""
  start = datetime(int(start[:4], 10), int(start[4:6], 10), int(start[6:8], 10))
  end = datetime(int(end[:4], 10), int(end[4:6], 10), int(end[6:8], 10))

  with timer():
    while start <= end:
      print "Downloading data from {}-{}-{}".format(start.year, start.month, start.day)
      s = start.strftime("%Y%m%d")
      url = "https://crash-analysis.mozilla.com/crash_analysis/{date}/{date}-pub-crashdata.csv.gz".format(date=s)
      req = urllib2.urlopen(url)
      with open("data/{}.csv.gz".format(s), "w") as f:
        f.write(req.read())

      os.system("gunzip data/{}.csv.gz".format(s))

      start += timedelta(1)