def GetNumAccessesStat(): fn_out = "%s/cdf-youtube-accesses-per-co" % Conf.DnOut() if os.path.exists(fn_out): return fn_out num_accesses = [] fn_in = Conf.GetFn("video_accesses_by_COs") with open(fn_in) as fo: while True: line = fo.readline() if len(line) == 0: break line = line.strip() if len(line) == 0: continue if line[0] == "#": continue # 4 34.3305 -111.091 13 t = line.split(" ") if len(t) != 4: raise RuntimeError("Unexpected: [%s]" % line) n = int(t[3]) #Cons.P(n) num_accesses.append(n) for j in range(n): if len(fo.readline()) == 0: raise RuntimeError("Unexpected") r = Stat.Gen(num_accesses, fn_out) #Cons.P(r) return fn_out
def GetTemporalDist(): fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-temporal-dist-weekly" % (Conf.DnOut(), os.path.basename(fn_in)) if os.path.isfile(fn_out): return (fn_out, _GetWeeklyMax(fn_out)) cmd = "%s/_gen-plot-data.sh --youtube_workload=%s --out_fn=%s" % (os.path.dirname(__file__), fn_in, fn_out) Util.RunSubp(cmd) return (fn_out, _GetWeeklyMax(fn_out))
def GetObjPopDist(): fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-obj-pop-dist" % (Conf.DnOut(), os.path.basename(fn_in)) if os.path.isfile(fn_out): return fn_out cmd = "%s/_gen-plot-data.sh --youtube_workload=%s" % ( os.path.dirname(__file__), fn_in) Util.RunSubp(cmd) return fn_out
def Plot(): fn_in = "data-cloudfront-locs" fn_out = "%s/cloudfront-locs.pdf" % Conf.DnOut() with Cons.MT("Plotting CF locations ..."): env = os.environ.copy() env["FN_IN"] = fn_in env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/cf-locs.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetClusteredPoints(): dist_sq_threshold = Conf.Get("dist_sq_threshold") fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-clustered-with-dist-sq-%s" % ( Conf.DnOut(), os.path.basename(fn_in), dist_sq_threshold) #Cons.P(fn_out) if os.path.isfile(fn_out): return fn_out cmd = "%s/_cluster.sh --youtube_workload=%s --dist_sq_threshold=%s" \ % (os.path.dirname(__file__), fn_in, dist_sq_threshold) Util.RunSubp(cmd) return fn_out
def Plot(): fn_cf = Conf.Get("cf_locs") fn_wf = GetWfLocFile() fn_out = "%s/cloudfront-wholefoods-locations.pdf" % Conf.DnOut() with Cons.MT("Plotting Whole Foods store locations ..."): env = os.environ.copy() env["FN_CF"] = fn_cf env["FN_WF"] = fn_wf env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/edge-server-locs.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetPlotData(): fn_out = "%s/whole-foods-locations" % Conf.DnOut() if os.path.exists(fn_out): return fn_out fn_in = Conf.Get("addrs") with Cons.MT("Adding store locations from %s" % fn_in): addrs = set() num_dups = 0 with open(fn_in) as fo: for line in fo: addr = line.strip() if len(addr) == 0: continue if addr[0] == "#": continue if addr in addrs: #Cons.P("Dup: %s" % addr) num_dups += 1 else: addrs.add(addr) Cons.P("Filtered out %d duplicate addresses" % num_dups) db = WfLocDb() geolocator = geopy.Nominatim() for addr in sorted(addrs): if db.Exist(addr): continue loc = None try: loc = geolocator.geocode(addr) except geopy.exc.GeocoderTimedOut as e: Cons.P("%s [%s]" % (e, addr)) sys.exit(1) if loc is None: raise RuntimeError("Unexpected: [%s]" % addr) Cons.P("\"%s\" %f %f\n" % (addr, loc.latitude, loc.longitude)) db.Insert(addr, loc.latitude, loc.longitude) # At most 1 req per second # https://operations.osmfoundation.org/policies/nominatim time.sleep(1) fn_in = Conf.Get("addrs_googlemaps") with Cons.MT("Adding store locations from %s" % fn_in): with open(fn_in) as fo: for line in fo: #Cons.P(line) t = line.strip().split(" ") addr = " ".join(t[0:-1]) t1 = t[-1].split(",") lat = t1[0] lon = t1[1] #Cons.P("%s|%s,%s" % (addr, lat, lon)) if db.Exist(addr): continue db.Insert(addr, lat, lon) with open(fn_out, "w") as fo: fo.write("# addr lon lat\n") for r in db.GetAll(): fo.write("\"%s\" %f %f\n" % (r["addr"], r["lat"], r["lon"])) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out