for pid, vals in unit_info.iteritems(): if "Terminated" in vals.keys(): line = "{0}, {1}, {2:0.5f}, {3:0.5f}, None\n".format(pid, vals["Interrupt"], vals["Started"],vals["Terminated"]) f1.write(line) elif "Done" in vals.keys(): line = "{0}, None, {1:0.5f}, None, {2:0.5f}\n".format(pid, vals["Started"],vals["Done"]) f1.write(line) f1.close() import radical.pilot.utils as rpu # we have a session sid = session.uid profiles = rpu.fetch_profiles(sid=sid, tgt='/tmp/') profile = rpu.combine_profiles (profiles) frame = rpu.prof2frame(profile) sf, pf, uf = rpu.split_frame(frame) rpu.add_info(uf) rpu.add_states(uf) adv = uf[uf['event'].isin(['advance'])] rpu.add_frequency(adv, 'f_exe', 0.5, {'state' : 'Executing', 'event' : 'advance'}) s_frame, p_frame, u_frame = rpu.get_session_frames(sid) print str(u_frame)
def preprocess(sid): session_dir = os.path.join(PICKLE_DIR, sid) if os.path.isdir(session_dir): report.warn("Session dir '%s' already exists, skipping session." % session_dir) return try: sid_profiles = find_profiles(sid) print sid_profiles report.info("Combining profiles for session: %s.\n" % sid) combined_profiles = rpu.combine_profiles(sid_profiles) uids = set() for p in combined_profiles: uids.add(p['uid']) report.info("Converting profiles to frames for session: %s.\n" % sid) frames = rpu.prof2frame(combined_profiles) print frames report.info("Head of Combined DF for session %s:\n" % sid) print frames.entity.unique() ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames) report.info("Head of CU Profiling DF for session %s:\n" % sid) print cu_prof_fr.head() report.info("Head of Session DF for session %s:\n" % sid) ses_prof_fr.insert(0, 'sid', sid) print ses_prof_fr.head() report.info("Head of Pilot DF for session %s:\n" % sid) pilot_prof_fr.insert(0, 'sid', sid) print pilot_prof_fr.head() report.info("Head of CU DF for session %s:\n" % sid) rpu.add_states(cu_prof_fr) report.info("Head of CU DF for session %s (after states added):\n" % sid) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after info added):\n" % sid) rpu.add_info(cu_prof_fr) print cu_prof_fr.head() report.info( "Head of CU DF for session %s (after concurrency added):\n" % sid) # Add a column with the number of concurrent populating the database spec = { 'in': [{ 'state': rps.STAGING_INPUT, 'event': 'advance' }], 'out': [{ 'state': rps.AGENT_STAGING_INPUT_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_populating', spec) # Add a column with the number of concurrent staging in units spec = { 'in': [{ 'state': rps.AGENT_STAGING_INPUT, 'event': 'advance' }], 'out': [{ 'state': rps.ALLOCATING_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_stage_in', spec) # Add a column with the number of concurrent scheduling units spec = { 'in': [{ 'state': rps.ALLOCATING, 'event': 'advance' }], 'out': [{ 'state': rps.EXECUTING_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_sched', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [{ 'state': rps.EXECUTING, 'event': 'advance' }], 'out': [{ 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_exec', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [{ 'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance' }], 'out': [{ 'state': rps.PENDING_OUTPUT_STAGING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_stage_out', spec) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after sid added):\n" % sid) cu_prof_fr.insert(0, 'sid', sid) print cu_prof_fr.head() report.info("CU DF columns for session %s:\n" % sid) print cu_prof_fr['info'].unique() # transpose report.info("Head of Transposed CU DF for session %s:\n" % sid) tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr) tr_cu_prof_fr.insert(0, 'sid', sid) print tr_cu_prof_fr.head() report.info("Head of json Docs for session %s:\n" % sid) ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid) print ses_info_fr.head() except Exception as e: report.error("Failed to pre-process data for session %s (%s)" % (sid, e)) return report.header("Writing dataframes to disk.\n") try: os.mkdir(session_dir) tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl')) pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl')) ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl')) pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl')) cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl')) except Exception as e: report.error("Failed to write data: %s" % e) return
# Wait for processes to finish uids = [cu.uid for cu in units] umgr.wait_units(uids) session.close(cleanup=False) import radical.pilot.utils as rpu # we have a session sid = session.uid profiles = rpu.fetch_profiles(sid=sid, tgt='/tmp/') profile = rpu.combine_profiles(profiles) frame = rpu.prof2frame(profile) sf, pf, uf = rpu.split_frame(frame) rpu.add_info(uf) rpu.add_states(uf) adv = uf[uf['event'].isin(['advance'])] rpu.add_frequency(adv, 'f_exe', 0.5, { 'state': 'Executing', 'event': 'advance' }) s_frame, p_frame, u_frame = rpu.get_session_frames(sid) #print str(u_frame) info = [ "uid", "Unscheduled", "StagingInput", "AgentStagingInputPending", "AgentStagingInput", "AllocatingPending", "Allocating", "ExecutingPending", "Executing", "AgentStagingOutputPending",
def preprocess(sid): session_dir = os.path.join(PICKLE_DIR, sid) if os.path.isdir(session_dir): report.warn("Session dir '%s' already exists, skipping session." % session_dir) return try: sid_profiles = find_profiles(sid) print sid_profiles report.info("Combining profiles for session: %s.\n" % sid) combined_profiles = rpu.combine_profiles(sid_profiles) uids = set() for p in combined_profiles: uids.add(p['uid']) report.info("Converting profiles to frames for session: %s.\n" % sid) frames = rpu.prof2frame(combined_profiles) report.info("Head of Combined DF for session %s:\n" % sid) print frames.entity.unique() ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames) report.info("Head of Session DF for session %s:\n" % sid) ses_prof_fr.insert(0, 'sid', sid) print ses_prof_fr.head() report.info("Head of Pilot DF for session %s:\n" % sid) pilot_prof_fr.insert(0, 'sid', sid) print pilot_prof_fr.head() report.info("Head of CU DF for session %s:\n" % sid) rpu.add_states(cu_prof_fr) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after states added):\n" % sid) rpu.add_info(cu_prof_fr) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after info added):\n" % sid) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after concurrency added):\n" % sid) # Add a column with the number of concurrent populating the database spec = { 'in': [ {'state': rps.STAGING_INPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.AGENT_STAGING_INPUT_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_populating', spec) # Add a column with the number of concurrent staging in units spec = { 'in': [ {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.ALLOCATING_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_stage_in', spec) # Add a column with the number of concurrent scheduling units spec = { 'in': [ {'state': rps.ALLOCATING, 'event': 'advance'} ], 'out' : [ {'state':rps.EXECUTING_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_sched', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [ {'state': rps.EXECUTING, 'event': 'advance'} ], 'out' : [ {'state':rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_exec', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [ {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.PENDING_OUTPUT_STAGING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_stage_out', spec) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after sid added):\n" % sid) cu_prof_fr.insert(0, 'sid', sid) print cu_prof_fr.head() report.info("CU DF columns for session %s:\n" % sid) print cu_prof_fr['info'].unique() # transpose tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr) tr_cu_prof_fr.insert(0, 'sid', sid) report.info("Head of Transposed CU DF for session %s:\n" % sid) print tr_cu_prof_fr.head() ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid) report.info("Head of json Docs for session %s:\n" % sid) print ses_info_fr.head() except: report.error("Failed to pre-process data for session %s" % sid) return report.header("Writing dataframes to disk.\n") try: os.mkdir(session_dir) ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl')) pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl')) ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl')) pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl')) cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl')) tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) except: report.error("Failed to write data") return
def profile_analysis(sid): import radical.pilot.utils as rpu report.header('profile analysis') # fetch profiles for all pilots profiles = rpu.fetch_profiles(sid=sid, tgt='/tmp/') print(profiles) # combine into a single profile profile = rpu.combine_profiles(profiles) # derive a global data frame frame = rpu.prof2frame(profile) # split into session / pilot / unit frames sf, pf, uf = rpu.split_frame(frame) print(len(sf)) print(len(pf)) print(len(uf)) print(sf[0:10]) print(pf[0:10]) print(uf[0:10]) # derive some additional 'info' columns, which contains some commonly used # tags rpu.add_info(uf) for index, row in uf.iterrows(): if str(row['info']) != 'nan': print("%-20s : %-10s : %-25s : %-20s" % (row['time'], row['uid'], row['state'], row['info'])) # add a 'state_from' columns which signals a state transition rpu.add_states(uf) adv = uf[uf['event'].isin(['advance'])] print('---------------') print(len(adv)) print(uf[uf['uid'] == 'unit.000001']) print(list(pf['event'])) tmp = uf[uf['uid'] == 'unit.000001'].dropna() print(tmp[['time', 'uid', 'state', 'state_from']]) # add a columns 'rate_out' which contains the rate (1/s) of the event # 'advance to state STAGING_OUTPUT' print('---------------') rpu.add_frequency(adv, 'rate_out', 0.5, { 'state': 'StagingOutput', 'event': 'advance' }) print(adv[['time', 'rate_out']].dropna(subset=['rate_out'])) print('---------------') fig, plot = rpu.create_plot() plot.set_title('rate of ouput staging transitions', y=1.05, fontsize=18) plot.set_xlabel('time (s)', fontsize=14) plot.set_ylabel('rate (1/s)', fontsize=14) plot.set_frame_on(True) adv[['time', 'rate_out']].dropna().plot(ax=plot, logx=False, logy=False, x='time', y='rate_out', drawstyle='steps', label='output rate', legend=False) plot.legend(labels=['output rate'], loc='best', fontsize=14, frameon=True) fig.savefig('profile.png', bbox_inches='tight')
def profile_analysis(sid): import radical.pilot.utils as rpu report.header('profile analysis') # fetch profiles for all pilots profiles = rpu.fetch_profiles(sid=sid, tgt='/tmp/') print profiles # combine into a single profile profile = rpu.combine_profiles(profiles) # derive a global data frame frame = rpu.prof2frame(profile) # split into session / pilot / unit frames sf, pf, uf = rpu.split_frame(frame) print len(sf) print len(pf) print len(uf) print sf[0:10] print pf[0:10] print uf[0:10] # derive some additional 'info' columns, which contains some commonly used # tags rpu.add_info(uf) for index, row in uf.iterrows(): if str(row['info']) != 'nan': print "%-20s : %-10s : %-25s : %-20s" % \ (row['time'], row['uid'], row['state'], row['info']) # add a 'state_from' columns which signals a state transition rpu.add_states(uf) adv = uf[uf['event'].isin(['advance'])] print '---------------' print len(adv) print uf[uf['uid'] == 'unit.000001'] print list(pf['event']) tmp = uf[uf['uid'] == 'unit.000001'].dropna() print tmp[['time', 'uid', 'state', 'state_from']] # add a columns 'rate_out' which contains the rate (1/s) of the event # 'advance to state STAGING_OUTPUT' print '---------------' rpu.add_frequency(adv, 'rate_out', 0.5, {'state' : 'StagingOutput', 'event' : 'advance'}) print adv[['time', 'rate_out']].dropna(subset=['rate_out']) print '---------------' fig, plot = rpu.create_plot() plot.set_title('rate of ouput staging transitions', y=1.05, fontsize=18) plot.set_xlabel('time (s)', fontsize=14) plot.set_ylabel('rate (1/s)', fontsize=14) plot.set_frame_on(True) adv[['time', 'rate_out']].dropna().plot(ax=plot, logx=False, logy=False, x='time', y='rate_out', drawstyle='steps', label='output rate', legend=False) plot.legend(labels=['output rate'], loc='best', fontsize=14, frameon=True) fig.savefig('profile.png', bbox_inches='tight')