def main(): userhash = user_mapping() engine = create_engine( "sqlite:////redacted_path_for_test/sumcoresg/queue_data.db", echo=True) jobs = engine.execute("select * from jobs where cluster ='Orca'").fetchall() # jobs = engine.execute("select * from jobs").fetchmany(6) jobs.sort(key=lambda x: x[1]) fmt = '%Y-%m-%d %H:%M:%S.%f' # if two commits are longer than delta_t apart, suppose they're parsed from the # same xml data delta_t = datetime.timedelta(microseconds=100000) # 0.1 seconds # if it's ref_created, then it's not necessarily the exact real time stamp ref_created = None rcu, qcu = {}, {} for k, j in enumerate(jobs): # cn: clustername; un: username cn = j[0].lower() created = datetime.datetime.strptime(j[1], fmt) un = userhash[j[2]] # converted to realname, realname equals username here cores = int(j[3]) * int(j[4]) status = j[5] if not ref_created: # first time in the loop ref_created = created elif (created - ref_created) > delta_t: ref_created = created rcu, qcu = update_vars(cn, un, cores, status, ref_created, rcu, qcu) for cu in [rcu, qcu]: new_cu = {} for key in cu: # note: a key here is of a tuple type clustername, username, ref_created = key[0], key[1], key[2] pomeslab_key = (clustername, 'testlab', ref_created) if pomeslab_key in new_cu: new_cu[pomeslab_key] += cu[key] else: new_cu[pomeslab_key] = cu[key] cu.update(new_cu) return rcu, qcu
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca" ).split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i: j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = (lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k: usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust( bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N * width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={ 'facecolor': 'red', 'alpha': 0.5 }) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind + width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca").split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i:j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = ( lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k:usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust(bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N*width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={'facecolor':'red', 'alpha':0.5}) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind+width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))