def plot(sids, value, label='', paper=False):

    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    labels = []

    first = True

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        cores = info['metadata.effective_cores']
        #cores = 32

        if value == 'cc_fork':
            spec = {
                'in': [
                    {'info' : 'aec_start_script'}
                ],
                'out' : [
                    {'info' : 'aec_after_exec'}
                ]
            }
            rpu.add_concurrency (unit_prof_df, 'cc_fork', spec)

        elif value == 'cc_exit':
            spec = {
                'in': [
                    {'info' : 'aec_after_exec'}
                ],
                'out' : [
                    {'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'},
                ]
            }
            rpu.add_concurrency (unit_prof_df, 'cc_exit', spec)

        df = unit_prof_df[
            (unit_prof_df[value] >= 0) &
            #(unit_prof_df.event == 'advance') &
            (unit_prof_df.sid == sid)
            ][['time', value]]

        df.columns = ['time', cores]
        df['time'] -= df['time'].min()

        if first:
            df_all = df
        else:
            df_all = pd.merge(df_all, df,  on='time', how='outer')

        #labels.append("Cores: %d" % cores)
        # labels.append("%d" % cores)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])
        labels.append("%d" % info['metadata.cu_runtime'])

        first = False

    df_all.set_index('time', inplace=True)
    print df_all.head()
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)
    ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)

    # Vertial reference
    #x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    #labels.append("Optimal")

    location = 'upper right'
    legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)

    if not paper:
        mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n"
                "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
                "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
                "RP: %s - RS: %s - RU: %s"
               % (value,
                  info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
                  info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                  info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                  ), fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE)
    # mp.pyplot.ylim(0, 200)
    mp.pyplot.ylim(-50,)
    mp.pyplot.xlim(0, 600)
    #ax.get_xaxis().set_ticks([])
    print dir(ax)

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    # width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_concurrency.pdf')
    mp.pyplot.close()
Пример #2
0
def preprocess(sid):

    session_dir = os.path.join(PICKLE_DIR, sid)

    if os.path.isdir(session_dir):
        report.warn("Session dir '%s' already exists, skipping session." %
                    session_dir)
        return

    try:
        sid_profiles = find_profiles(sid)
        print sid_profiles
        report.info("Combining profiles for session: %s.\n" % sid)
        combined_profiles = rpu.combine_profiles(sid_profiles)
        uids = set()
        for p in combined_profiles:
            uids.add(p['uid'])

        report.info("Converting profiles to frames for session: %s.\n" % sid)
        frames = rpu.prof2frame(combined_profiles)

        print frames

        report.info("Head of Combined DF for session %s:\n" % sid)
        print frames.entity.unique()

        ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames)
        report.info("Head of CU Profiling DF for session %s:\n" % sid)
        print cu_prof_fr.head()

        report.info("Head of Session DF for session %s:\n" % sid)
        ses_prof_fr.insert(0, 'sid', sid)
        print ses_prof_fr.head()

        report.info("Head of Pilot DF for session %s:\n" % sid)
        pilot_prof_fr.insert(0, 'sid', sid)
        print pilot_prof_fr.head()

        report.info("Head of CU DF for session %s:\n" % sid)
        rpu.add_states(cu_prof_fr)
        report.info("Head of CU DF for session %s (after states added):\n" %
                    sid)
        print cu_prof_fr.head()

        report.info("Head of CU DF for session %s (after info added):\n" % sid)
        rpu.add_info(cu_prof_fr)
        print cu_prof_fr.head()

        report.info(
            "Head of CU DF for session %s (after concurrency added):\n" % sid)

        # Add a column with the number of concurrent populating the database
        spec = {
            'in': [{
                'state': rps.STAGING_INPUT,
                'event': 'advance'
            }],
            'out': [{
                'state': rps.AGENT_STAGING_INPUT_PENDING,
                'event': 'advance'
            }, {
                'state': rps.FAILED,
                'event': 'advance'
            }, {
                'state': rps.CANCELED,
                'event': 'advance'
            }]
        }
        rpu.add_concurrency(cu_prof_fr, 'cc_populating', spec)

        # Add a column with the number of concurrent staging in units
        spec = {
            'in': [{
                'state': rps.AGENT_STAGING_INPUT,
                'event': 'advance'
            }],
            'out': [{
                'state': rps.ALLOCATING_PENDING,
                'event': 'advance'
            }, {
                'state': rps.FAILED,
                'event': 'advance'
            }, {
                'state': rps.CANCELED,
                'event': 'advance'
            }]
        }
        rpu.add_concurrency(cu_prof_fr, 'cc_stage_in', spec)

        # Add a column with the number of concurrent scheduling units
        spec = {
            'in': [{
                'state': rps.ALLOCATING,
                'event': 'advance'
            }],
            'out': [{
                'state': rps.EXECUTING_PENDING,
                'event': 'advance'
            }, {
                'state': rps.FAILED,
                'event': 'advance'
            }, {
                'state': rps.CANCELED,
                'event': 'advance'
            }]
        }
        rpu.add_concurrency(cu_prof_fr, 'cc_sched', spec)

        # Add a column with the number of concurrent Executing units
        spec = {
            'in': [{
                'state': rps.EXECUTING,
                'event': 'advance'
            }],
            'out': [{
                'state': rps.AGENT_STAGING_OUTPUT_PENDING,
                'event': 'advance'
            }, {
                'state': rps.FAILED,
                'event': 'advance'
            }, {
                'state': rps.CANCELED,
                'event': 'advance'
            }]
        }
        rpu.add_concurrency(cu_prof_fr, 'cc_exec', spec)

        # Add a column with the number of concurrent Executing units
        spec = {
            'in': [{
                'state': rps.AGENT_STAGING_OUTPUT,
                'event': 'advance'
            }],
            'out': [{
                'state': rps.PENDING_OUTPUT_STAGING,
                'event': 'advance'
            }, {
                'state': rps.FAILED,
                'event': 'advance'
            }, {
                'state': rps.CANCELED,
                'event': 'advance'
            }]
        }
        rpu.add_concurrency(cu_prof_fr, 'cc_stage_out', spec)

        print cu_prof_fr.head()

        report.info("Head of CU DF for session %s (after sid added):\n" % sid)
        cu_prof_fr.insert(0, 'sid', sid)
        print cu_prof_fr.head()

        report.info("CU DF columns for session %s:\n" % sid)
        print cu_prof_fr['info'].unique()

        # transpose
        report.info("Head of Transposed CU DF for session %s:\n" % sid)
        tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr)
        tr_cu_prof_fr.insert(0, 'sid', sid)
        print tr_cu_prof_fr.head()

        report.info("Head of json Docs for session %s:\n" % sid)
        ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid)
        print ses_info_fr.head()

    except Exception as e:
        report.error("Failed to pre-process data for session %s (%s)" %
                     (sid, e))
        return

    report.header("Writing dataframes to disk.\n")
    try:

        os.mkdir(session_dir)

        tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
        ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl'))
        pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl'))
        pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl'))
        cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl'))

    except Exception as e:
        report.error("Failed to write data: %s" % e)
        return
                'update'          : event_filter['update'        ]['in']
               }



for exp in sorted(exp_frames.keys()):
    print "plotting '%s'" % exp

    # these are the franes we want to plot for this experiment
    plot_frames = list()
    
    for frame, label in exp_frames[exp]:
        # we add a data frame column for CU concurrency for the component of
        # interest.  Also, we calibrate t0 to when the first unit enters that
        # component.
        rpu.add_concurrency (frame, tgt='cu_num', spec=event_filter[exp])
        rpu.calibrate_frame (frame, spec=calib_filter[exp])
      # plot_frames.append([frame, label])
        plot_frames.append([frame, None])
        
    # create the plots for CU concurrency over time, and also show the plots in the notebook
    fig, _ = rpu.frame_plot(plot_frames, logx=False, logy=False, 
                            title=exp, legend=False, figdir=figdir,
                            axis=[['time',   'time (s)'], 
                                  ['cu_num', "number of concurrent CUs in '%s'" % exp]])
  # fig.show()
  # 
  # # inverse axis, use logar. scale for time
  # fig, _ = rpu.frame_plot(plot_frames, logx=False, logy=True, title=exp,
  #                         title=exp, legend=False, figdir=figdir,
  #                         axis=[['cu_num', 'number of concurrent CUs'],
def preprocess(sid):

    session_dir = os.path.join(PICKLE_DIR, sid)

    if os.path.isdir(session_dir):
        report.warn("Session dir '%s' already exists, skipping session." % session_dir)
        return

    try:
        sid_profiles = find_profiles(sid)
        print sid_profiles
        report.info("Combining profiles for session: %s.\n" % sid)
        combined_profiles = rpu.combine_profiles(sid_profiles)
        uids = set()
        for p in combined_profiles:
            uids.add(p['uid'])

        report.info("Converting profiles to frames for session: %s.\n" % sid)
        frames = rpu.prof2frame(combined_profiles)

        report.info("Head of Combined DF for session %s:\n" % sid)
        print frames.entity.unique()

        ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames)

        report.info("Head of Session DF for session %s:\n" % sid)
        ses_prof_fr.insert(0, 'sid', sid)
        print ses_prof_fr.head()

        report.info("Head of Pilot DF for session %s:\n" % sid)
        pilot_prof_fr.insert(0, 'sid', sid)
        print pilot_prof_fr.head()

        report.info("Head of CU DF for session %s:\n" % sid)
        rpu.add_states(cu_prof_fr)
        print cu_prof_fr.head()
        report.info("Head of CU DF for session %s (after states added):\n" % sid)
        rpu.add_info(cu_prof_fr)
        print cu_prof_fr.head()
        report.info("Head of CU DF for session %s (after info added):\n" % sid)
        print cu_prof_fr.head()

        report.info("Head of CU DF for session %s (after concurrency added):\n" % sid)

        # Add a column with the number of concurrent populating the database
        spec = {
            'in': [
                {'state': rps.STAGING_INPUT, 'event': 'advance'}
            ],
            'out' : [
                {'state':rps.AGENT_STAGING_INPUT_PENDING, 'event': 'advance'},
                {'state':rps.FAILED, 'event': 'advance'},
                {'state':rps.CANCELED, 'event': 'advance'}
            ]
        }
        rpu.add_concurrency (cu_prof_fr, 'cc_populating', spec)

        # Add a column with the number of concurrent staging in units
        spec = {
            'in': [
                {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'}
            ],
            'out' : [
                {'state':rps.ALLOCATING_PENDING, 'event': 'advance'},
                {'state':rps.FAILED, 'event': 'advance'},
                {'state':rps.CANCELED, 'event': 'advance'}
            ]
        }
        rpu.add_concurrency (cu_prof_fr, 'cc_stage_in', spec)

        # Add a column with the number of concurrent scheduling units
        spec = {
            'in': [
                {'state': rps.ALLOCATING, 'event': 'advance'}
            ],
            'out' : [
                {'state':rps.EXECUTING_PENDING, 'event': 'advance'},
                {'state':rps.FAILED, 'event': 'advance'},
                {'state':rps.CANCELED, 'event': 'advance'}
            ]
        }
        rpu.add_concurrency (cu_prof_fr, 'cc_sched', spec)

        # Add a column with the number of concurrent Executing units
        spec = {
            'in': [
                {'state': rps.EXECUTING, 'event': 'advance'}
            ],
            'out' : [
                {'state':rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'},
                {'state':rps.FAILED, 'event': 'advance'},
                {'state':rps.CANCELED, 'event': 'advance'}
            ]
        }
        rpu.add_concurrency (cu_prof_fr, 'cc_exec', spec)

        # Add a column with the number of concurrent Executing units
        spec = {
            'in': [
                {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'}
            ],
            'out' : [
                {'state':rps.PENDING_OUTPUT_STAGING, 'event': 'advance'},
                {'state':rps.FAILED, 'event': 'advance'},
                {'state':rps.CANCELED, 'event': 'advance'}
            ]
        }
        rpu.add_concurrency (cu_prof_fr, 'cc_stage_out', spec)

        print cu_prof_fr.head()

        report.info("Head of CU DF for session %s (after sid added):\n" % sid)
        cu_prof_fr.insert(0, 'sid', sid)
        print cu_prof_fr.head()

        report.info("CU DF columns for session %s:\n" % sid)
        print cu_prof_fr['info'].unique()

        # transpose
        tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr)
        tr_cu_prof_fr.insert(0, 'sid', sid)
        report.info("Head of Transposed CU DF for session %s:\n" % sid)
        print tr_cu_prof_fr.head()

        ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid)
        report.info("Head of json Docs for session %s:\n" % sid)
        print ses_info_fr.head()

    except:
        report.error("Failed to pre-process data for session %s" % sid)
        return

    report.header("Writing dataframes to disk.\n")
    try:

        os.mkdir(session_dir)

        ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl'))
        pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl'))
        pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl'))
        cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl'))
        tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
    except:
        report.error("Failed to write data")
        return
def plot(sids, value, label='', paper=False):

    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    labels = []

    first = True

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(
            os.path.join(session_dir, 'pilot_info.pkl'))
        unit_prof_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_prof.pkl'))
        session_info_df = pd.read_pickle(
            os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        cores = info['metadata.effective_cores']
        #cores = 32

        if value == 'cc_fork':
            spec = {
                'in': [{
                    'info': 'aec_start_script'
                }],
                'out': [{
                    'info': 'aec_after_exec'
                }]
            }
            rpu.add_concurrency(unit_prof_df, 'cc_fork', spec)

        elif value == 'cc_exit':
            spec = {
                'in': [{
                    'info': 'aec_after_exec'
                }],
                'out': [
                    {
                        'state': rps.AGENT_STAGING_OUTPUT_PENDING,
                        'event': 'advance'
                    },
                ]
            }
            rpu.add_concurrency(unit_prof_df, 'cc_exit', spec)

        df = unit_prof_df[(unit_prof_df[value] >= 0) &
                          #(unit_prof_df.event == 'advance') &
                          (unit_prof_df.sid == sid)][['time', value]]

        df.columns = ['time', cores]
        df['time'] -= df['time'].min()

        if first:
            df_all = df
        else:
            df_all = pd.merge(df_all, df, on='time', how='outer')

        #labels.append("Cores: %d" % cores)
        # labels.append("%d" % cores)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])
        labels.append("%d" % info['metadata.cu_runtime'])

        first = False

    df_all.set_index('time', inplace=True)
    print df_all.head()
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)
    ax = df_all.plot(fontsize=TICK_FONTSIZE,
                     linewidth=LINEWIDTH,
                     colors=colors)

    # Vertial reference
    #x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    #labels.append("Optimal")

    location = 'upper right'
    legend = mp.pyplot.legend(labels,
                              loc=location,
                              fontsize=LEGEND_FONTSIZE,
                              labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)

    if not paper:
        mp.pyplot.title(
            "Concurrent number of CUs in stage '%s'.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (value, info['metadata.generations'], info['metadata.cu_cores'],
             info['metadata.cu_runtime'], resource_label,
             info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE)
    # mp.pyplot.ylim(0, 200)
    mp.pyplot.ylim(-50, )
    mp.pyplot.xlim(0, 600)
    #ax.get_xaxis().set_ticks([])
    print dir(ax)

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    # width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_concurrency.pdf')
    mp.pyplot.close()