Пример #1
0
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sids, value, label=''):

    labels = []

    for sid in sids:

        # Legend info
        info = info_df.loc[sid]

        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        tufs = tuf.sort('awo_get_u_pend')

        if value == 'overhead':
            ax = (tufs['asc_released'] - tufs['asc_allocated'] - info['metadata.cu_runtime']).plot(kind='line')

        elif value == 'orte':
            ax = (tufs['aew_after_exec'] - tufs['aew_after_cd'] - info['metadata.cu_runtime']).plot(kind='line')

        elif value == 'popen':
            ax = (tufs['aew_start_script'] - tufs['aec_handover']).plot(kind='line')

        elif value == 'scheduler':
            ax = (tufs['asc_get_u_pend'] - tufs['asic_put_u_pend']).plot(kind='line')

        elif value == 'execworker':
            ax = (tufs['aew_work_u_pend'] - tufs['asc_put_u_pend']).plot(kind='line')

        elif value == 'stageout':
            ax = (tufs['asoc_get_u_pend'] - tufs['aew_put_u_pend']).plot(kind='line')

        else:
            raise Exception("Value %s unknown" % value)

        labels.append("Sub-Agents: %d" % info['metadata.num_sub_agents'])

    mp.pyplot.legend(labels, loc='upper left', fontsize=5)
    mp.pyplot.title("'%s' per CU for varying number of Sub-Agents.\n"
                    "%d CUs of %d core(s) with a %ss payload on a %d(+N*16) core pilot on %s.\n"
                    "Varying number of sub-agent with %d ExecWorker(s). All times are per CU.\n"
                    "RP: %s - RS: %s - RU: %s"
                   % (value,
                      info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.cu_count'], resource_label,
                      info['metadata.num_exec_instances_per_sub_agent'],
                      info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                      ), fontsize=8)
    mp.pyplot.xlabel("Compute Units (ordered by agent arrival)")
    mp.pyplot.ylabel("Time (s)")
    mp.pyplot.ylim(0)
    ax.get_xaxis().set_ticks([])

    mp.pyplot.savefig('plot3_%s%s.pdf' % (value, label))
    mp.pyplot.close()
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sids):

    labels = []

    orte_ttc = {}

    for sid in sids:


        # Legend info
        info = info_df.loc[sid]

        #num_sas = info['metadata.num_sub_agents']
        num_sas = info['metadata.num_exec_instances_per_sub_agent']

        if num_sas not in orte_ttc:
            orte_ttc[num_sas] = []

        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # Only take completed CUs into account
        tuf = tuf[tuf['Done'].notnull()]

        # We sort the units based on the order they arrived at the agent
        tufs = tuf.sort('awo_get_u_pend')

        orte_ttc[num_sas].append((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min()))

        labels.append("ExecWorkers: %d" % num_sas)

    orte_df = pd.DataFrame(orte_ttc)

    ax = orte_df.mean().plot(kind='bar', colormap='Paired')

    print 'labels: %s' % labels
    #mp.pyplot.legend(labels, loc='upper left', fontsize=5)
    mp.pyplot.title("TTC for varying ExecWorkers.\n"
                    "%d CUs of %d core(s) with a %ss payload on a %d core pilot on %s.\n"
                    "%d sub-agent with varying ExecWorker(s).\n"
                    "RP: %s - RS: %s - RU: %s"
                   % (info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label,
                      info['metadata.num_sub_agents'],
                      info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                      ), fontsize=8)
    mp.pyplot.xlabel("Number of Execution Workers")
    mp.pyplot.ylabel("Time to Completion (s)")
    #mp.pyplot.ylim(0)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set

    mp.pyplot.savefig('plot_ttc_ew.pdf')
    mp.pyplot.close()
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid):

    print "Plotting %s ..." % sid

    labels = []

    # Legend info
    info = info_df.loc[sid]

    mpi = get_mpi(unit_info_df, sid)
    #mpi = True
    # For this call assume that there is only one pilot per session
    lms = get_lm(unit_info_df, pilot_info_df, sid, mpi)
    assert len(lms) == 1
    launch_method = lms.values()[0]

    # For this call assume that there is only one pilot per session
    spawners = get_spawners(unit_info_df, pilot_info_df, sid)
    assert len(spawners) == 1
    spawner = spawners.values()[0]

    # For this call assume that there is only one pilot per session
    resources = get_resources(unit_info_df, pilot_info_df, sid)
    assert len(resources) == 1
    resource_label = resources.values()[0]

    # Get only the entries for this session
    uf = unit_info_df[unit_info_df['sid'] == sid]

    result = pd.value_counts(uf['state'].values, sort=False)
    print result

    ax = result.plot(kind='pie', autopct='%.2f%%')
    ax.set_aspect('equal')

    print info
    #mp.pyplot.legend(labels, loc='upper left', fontsize=5)
    mp.pyplot.title("%s (%s)\n"
                    "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n"
                    "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n"
                    "RP: %s - RS: %s - RU: %s"
                   % (sid, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(info['created'])),
                      info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label,
                      info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                      info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                      ), fontsize=8)

    mp.pyplot.savefig('%s_plot_states.pdf' % sid)
    mp.pyplot.close()
def plot(sid, values, label='', paper=False, window=1.0, plot_mean=False):

    labels = []
    means = {}

    colors = [cmap(i) for i in np.linspace(0, 1, len(values))]
    c = 0

    first = True

    if sid.startswith('rp.session'):
        rp = True
    else:
        rp = False

    session_dir = os.path.join(PICKLE_DIR, sid)

    pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
    session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))
    unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
    unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))

    # Legend info
    info = session_info_df.loc[sid]

    if rp:
        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]
    else:
        resource_label = "bogus"

    # Get only the entries for this session
    #uf = unit_prof_df[unit_prof_df['sid'] == sid]

    # We sort the units based on the order they arrived at the agent
    #ufs = uf.sort('awo_get_u_pend')

    cores = info['metadata.effective_cores']

    for value in values:

        if 'stagein_freq' == value:
            spec = {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'}

        elif 'sched_freq' == value:
            spec = {'state': rps.EXECUTING_PENDING, 'event': 'advance'}

        elif 'exec_freq' == value:
            spec = {'state' : rps.EXECUTING, 'event' : 'advance'}

        elif 'fork_freq' == value:
            spec = {'info' : 'aec_start_script'}

        elif 'exit_freq' == value:
            spec = {'info' : 'aec_after_exec'}

        elif 'stageout_pend_freq' == value:
            spec = {'state' : rps.AGENT_STAGING_OUTPUT_PENDING, 'event' : 'advance'}

        elif 'stageout_freq' == value:
            spec = {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'}

        else:
            raise Exception("Value %s unknown" % value)

        #print unit_prof_df.head()

        add_frequency(unit_prof_df, value, window, spec)
        df = unit_prof_df[
            (unit_prof_df[value] >= 0) &
            #(unit_prof_df.event == 'advance') &
            (unit_prof_df.sid == sid)
            ][['time', value]]
        means[value] = df[value].mean()

        #df.columns = ['time', value]
        #df['time'] -= df['time'].min()
        df.time = pd.to_datetime(df.time, unit='s')
        df.set_index('time', inplace=True, drop=True, append=False)

        #print ("Head of %s before resample" % value)
        #print df.head()

        def _mean(array_like):
            return np.mean(array_like)/window
        df = df.resample('%dL' % int(1000.0*window), how=_mean)[value]
        df = df.fillna(0)

        #print ("Head of %s after resample" % value)
        #print df.head()
        if first:
            df_all = df
        else:

            #df_all = pd.merge(df_all, df,  on='time', how='outer')
            #df_all = pd.merge(df_all, df,  on='time')
            #df_all = pd.merge(df_all, df)
            df_all = pd.concat([df_all, df], axis=1)
            #df_all.append(df)

        #print ("Head of df_all")
        #print df_all.head()

        if value == 'exec_freq':
            labels.append("Launching")
        elif value == 'sched_freq':
            labels.append("Scheduling")
        elif value == 'fork_freq':
            labels.append("Forking")
        elif value == 'stageout_pend_freq':
            labels.append("Completing")
        else:
            labels.append("%s" % value)

        first = False

        # df.plot(drawstyle='steps-pre')

    c = 0
    for value in values:
        mean = df_all[value].mean()
        print "%s mean: %f" % (value, mean)
        # df_all['mean_%s' % value] = mean
        #labels.append("Mean %s" % value)
    print 'means:', means

    my_colors = colors
    if plot_mean:
        my_colors *= 2

    my_styles = []
    for x in range(len(values)):
        my_styles.append('-')
    if plot_mean:
        for x in range(len(values)):
            my_styles.append('--')

    #df_all.set_index('time', inplace=True)
    #print df_all.head(500)
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    ax = df_all.plot(drawstyle='steps-pre', color=my_colors, style=my_styles, linewidth=LINEWIDTH, fontsize=TICK_FONTSIZE)
    # df_all.plot(drawstyle='steps')
    #df_all.plot()

    # Vertial reference
    # x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    # mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    # labels.append("Optimal")

    mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0)
    if not paper:
        mp.pyplot.title("Rate of various components: %s'.\n"
                "%d generations of %d 'concurrent' units of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
                "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
                "RP: %s - RS: %s - RU: %s"
               % (values,
                  info['metadata.generations'], cores, info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
                  info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                  info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                  ), fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Rate (Unit/s)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(-1, 400)
    #mp.pyplot.xlim(-1,)
    #mp.pyplot.xlim(['1/1/2000', '1/1/2000'])
    #mp.pyplot.xlim('03:00', '04:00')
    #mp.pyplot.xlim(380, 400)
    #mp.pyplot.xlim(675, 680)
    #ax.get_xaxis().set_ticks([])
    # ax.set_yscale('log', basey=10)

    #mp.pyplot.xlim((291500.0, 1185200.0))
    #mp.pyplot.xlim((474000.0, 2367600.0))

    print "xlim:", ax.get_xlim()

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)
    #fig.tight_layout()

    mp.pyplot.savefig('plot_more_rates-%s.pdf' % sid)
    mp.pyplot.close()
def migrate(options):
    from ckan import model
    from ckanext.archiver.model import Archival
    from ckanext.qa.model import QA

    resources = common.get_resources(state='active',
                                     publisher_ref=options.publisher,
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        # Gather the details of QA from TaskStatus
        # to fill all properties of QA apart from:
        # * package_id
        # * resource_id
        fields = {}
        qa_task_status = model.Session.query(model.TaskStatus)\
                                    .filter_by(entity_id=res.id)\
                                    .filter_by(task_type='qa')\
                                    .filter_by(key='status')\
                                    .first()
        if not qa_task_status:
            add_stat('No QA data', res, stats)
            continue
        qa_error = json.loads(qa_task_status.error)
        fields['openness_score'] = int(qa_task_status.value)
        fields['openness_score_reason'] = qa_error['reason']
        fields['format'] = qa_error['format']
        qa_date = qa_task_status.last_updated
        # NB qa_task_status.last_updated appears to be 1hr ahead of the revision
        # time, so some timezone nonesense going on. Can't do much.
        archival = Archival.get_for_resource(res.id)
        if not archival:
            print add_stat('QA but no Archival data', res, stats)
            continue
        archival_date = archival.updated
        # the state of the resource was as it was archived on the date of
        # the QA update but we only know when the latest archival was. So
        # if it was archived before the QA update thenwe know that was the
        # archival, otherwise we don't know when the relevant archival was.
        if archival_date and qa_date >= archival_date:
            fields['archival_timestamp'] = archival_date
            fields['updated'] = archival_date
            fields['created'] = archival_date
            # Assume the resource URL archived was the one when the
            # archival was done (it may not be if the URL was queued and
            # there was significant delay before it was archived)
            get_resource_as_at = archival_date
        else:
            # This is common for when a resource is created and qa runs just
            # before archiver and you get:
            # "This file had not been downloaded at the time of scoring it."
            # Just put sensible datetimes since we don't really know the exact
            # ones
            fields['archival_timestamp'] = qa_date
            fields['updated'] = qa_date
            fields['created'] = qa_date
            get_resource_as_at = qa_date
        res_rev = model.Session.query(model.ResourceRevision).\
            filter_by(id=res.id).\
            filter(model.ResourceRevision.revision_timestamp < get_resource_as_at).\
            order_by(model.ResourceRevision.revision_timestamp.desc()).\
            first()
        fields['resource_timestamp'] = res_rev.revision_timestamp

        # Compare with any existing data in the Archival table
        qa = QA.get_for_resource(res.id)
        if qa:
            changed = None
            for field, value in fields.items():
                if getattr(qa, field) != value:
                    if options.write:
                        setattr(qa, field, value)
                    changed = True
            if not changed:
                add_stat('Already exists correctly in QA table', res, stats)
                continue
            add_stat('Updated in QA table', res, stats)
        else:
            qa = QA.create(res.id)
            if options.write:
                for field, value in fields.items():
                    setattr(qa, field, value)
                model.Session.add(qa)
            add_stat('Added to QA table', res, stats)

    print 'Summary\n', stats.report()
    if options.write:
        model.repo.commit_and_remove()
        print 'Written'
def plot(sids, paper=False):

    labels = []

    for key in sids:

        orte_ttc = {}

        for sid in sids[key]:

            if sid.startswith('rp.session'):
                rp = True
            else:
                rp = False

            session_dir = os.path.join(PICKLE_DIR, sid)

            unit_info_df = pd.read_pickle(
                os.path.join(session_dir, 'unit_info.pkl'))
            pilot_info_df = pd.read_pickle(
                os.path.join(session_dir, 'pilot_info.pkl'))
            tr_unit_prof_df = pd.read_pickle(
                os.path.join(session_dir, 'tr_unit_prof.pkl'))
            session_info_df = pd.read_pickle(
                os.path.join(session_dir, 'session_info.pkl'))

            # Legend info
            info = session_info_df.loc[sid]

            cores = info['metadata.effective_cores']
            nodes = cores / 32

            if nodes not in orte_ttc:
                orte_ttc[nodes] = pd.Series()

            if rp:
                # For this call assume that there is only one pilot per session
                resources = get_resources(unit_info_df, pilot_info_df, sid)
                assert len(resources) == 1
                resource_label = resources.values()[0]
            else:
                resource_label = 'bogus'

            # Get only the entries for this session
            tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

            # Only take completed CUs into account
            #tuf = tuf[tuf['Done'].notnull()]

            # We sort the units based on the order they arrived at the agent
            #tufs = tuf.sort('awo_get_u_pend')
            #tufs = tuf.sort('awo_adv_u')
            #tufs = tuf.sort('asic_get_u_pend')
            tufs = tuf.sort()

            orte_ttc[nodes] = orte_ttc[nodes].append(
                pd.Series((tufs['aec_after_exec'].max() -
                           tufs['asic_get_u_pend'].min())))

        print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(orte_ttc)
        print 'orte_ttc df:', orte_df

        labels.append("%s" % resource_legend[key])
        ax = orte_df.mean().plot(kind='line',
                                 color=resource_colors[key],
                                 marker=resource_marker[key],
                                 fontsize=TICK_FONTSIZE,
                                 linewidth=LINEWIDTH)

    # ORTE only
    # Data for BW
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+')
    # Data for Stampede
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+')
    #labels.append("ORTE-only (C)")

    # Horizontal reference
    y_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH)
    labels.append("Optimal")

    print 'labels: %s' % labels
    location = 'upper left'
    mp.pyplot.legend(labels,
                     loc=location,
                     fontsize=LEGEND_FONTSIZE,
                     markerscale=0)
    if not paper:
        mp.pyplot.title(
            "TTC for a varying number of 'concurrent' Full-Node CUs.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (info['metadata.generations'], info['metadata.cu_cores'],
             info['metadata.cu_runtime'], resource_label,
             info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("\# Nodes", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(0)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([])
    # #ax.get_xaxis.set

    #width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_ttc_full_node.pdf')

    mp.pyplot.close()
Пример #7
0
def plot(sids, paper=False):

    labels = []

    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]
    c = 0

    for key in sids:

        orte_ttc = {}

        for sid in sids[key]:

            session_dir = os.path.join(PICKLE_DIR, sid)

            unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
            pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
            tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
            session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

            # Legend info
            info = session_info_df.loc[sid]

            cores = info['metadata.effective_cores']

            if cores not in orte_ttc:
                orte_ttc[cores] = pd.Series()

            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]

            # Get only the entries for this session
            tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

            # Only take completed CUs into account
            #tuf = tuf[tuf['Done'].notnull()]

            # We sort the units based on the order they arrived at the agent
            #tufs = tuf.sort('awo_get_u_pend')
            #tufs = tuf.sort('awo_adv_u')
            tufs = tuf.sort('asic_get_u_pend')

            orte_ttc[cores] = orte_ttc[cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min())))

        print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(orte_ttc)
        print 'orte_ttc df:', orte_df

        #labels.append("%s" % resource_legend[key])
        labels.append("%s" % key)
        #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH)
        ax = orte_df.mean().plot(kind='line', color=colors[c])
        c += 1

    # ORTE only
    # Data for BW
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+')
    # Data for Stampede
    # mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+')
    # labels.append("ORTE-only (C)")


    # Horizontal reference
    y_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--')
    labels.append("Optimal")

    #print 'labels: %s' % labels
    mp.pyplot.legend(labels, loc='upper left', fontsize=BARRIER_FONTSIZE)
    if not paper:
        mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("# Cores", fontsize=BARRIER_FONTSIZE)
    mp.pyplot.ylabel("Time to Completion (s)", fontsize=BARRIER_FONTSIZE)
    #mp.pyplot.ylim(0)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(y_ref-10)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set

    mp.pyplot.savefig('plot_worker_effect.pdf')

    mp.pyplot.close()
def migrate(options):
    from ckan import model
    from ckanext.archiver.model import Archival, Status

    resources = common.get_resources(state='active',
                                     publisher_ref=options.publisher,
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        # Gather the details of archivals from TaskStatus and Resource
        # to fill all properties of Archival apart from:
        # * package_id
        # * resource_id
        fields = {}
        archiver_task_status = model.Session.query(model.TaskStatus)\
                                    .filter_by(entity_id=res.id)\
                                    .filter_by(task_type='archiver')\
                                    .filter_by(key='status')\
                                    .first()
        if archiver_task_status:
            ats_error = json.loads(archiver_task_status.error)
            fields['status_id'] = Status.by_text(archiver_task_status.value)
            fields['is_broken'] = Status.is_status_broken(fields['status_id'])
            fields['reason'] = ats_error['reason']
            fields['last_success'] = date_str_to_datetime_or_none(ats_error['last_success'])
            fields['first_failure'] = date_str_to_datetime_or_none(ats_error['first_failure'])
            fields['failure_count'] = int(ats_error['failure_count'])
            fields['url_redirected_to'] = ats_error['url_redirected_to']
            fields['updated'] = archiver_task_status.last_updated
        else:
            if not (res.cache_url
                    or res.extras.get('cache_filepath')
                    or res.hash
                    or res.size
                    or res.mimetype):
                add_stat('No archive data', res, stats)
                continue
            for field_name in ('status_id', 'is_broken', 'reason',
                               'last_success', 'first_failure',
                               'failure_count', 'url_redirected_to',
                               'updated', 'created'):
                fields[field_name] = None

        fields['cache_filepath'] = res.extras.get('cache_filepath')
        fields['cache_url'] = res.cache_url
        fields['hash'] = res.hash
        fields['size'] = res.size
        fields['mimetype'] = res.mimetype

        revisions_with_hash = model.Session.query(model.ResourceRevision)\
                .filter_by(id=res.id)\
                .order_by(model.ResourceRevision.revision_timestamp)\
                .filter(model.ResourceRevision.hash != '').all()
        if revisions_with_hash:
            # these are not perfect by not far off
            fields['created'] = revisions_with_hash[0].revision_timestamp
            fields['resource_timestamp'] = revisions_with_hash[-1].revision_timestamp
        else:
            fields['created'] = min(fields['updated'] or END_OF_TIME,
                                    fields['first_failure'] or END_OF_TIME,
                                    fields['last_success'] or END_OF_TIME)
            fields['resource_timestamp'] = max(
                fields['updated'] or START_OF_TIME,
                fields['first_failure'] or START_OF_TIME,
                fields['last_success'] or START_OF_TIME)

        # Compare with any existing data in the Archival table
        archival = Archival.get_for_resource(res.id)
        if archival:
            changed = None
            for field, value in fields.items():
                if getattr(archival, field) != value:
                    if options.write:
                        setattr(archival, field, value)
                    changed = True
            if not changed:
                add_stat('Already exists correctly in archival table', res, stats)
                continue
            add_stat('Updated in archival table', res, stats)
        else:
            archival = Archival.create(res.id)
            if options.write:
                for field, value in fields.items():
                    setattr(archival, field, value)
                model.Session.add(archival)
            add_stat('Added to archival table', res, stats)

    print 'Summary\n', stats.report()
    if options.write:
        model.repo.commit_and_remove()
        print 'Written'
Пример #9
0
def plot(unit_prof_df, tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid):

    print "Plotting %s ..." % sid

    labels = []

    # # Legend info
    info = info_df.loc[sid]

    # For this call assume that there is only one pilot per session
    resources = get_resources(unit_info_df, pilot_info_df, sid)
    assert len(resources) == 1
    resource_label = resources.values()[0]

    df = pd.DataFrame()

    #
    # Pulling in
    #
    populating_df = unit_prof_df[
        (unit_prof_df.cc_populating >= 0) &
        (unit_prof_df.event == 'advance') &
        (unit_prof_df.sid == sid)
        ][['time', 'cc_populating']]

    #
    # Staging in
    #
    stage_in_df = unit_prof_df[
        (unit_prof_df.cc_stage_in >= 0) &
        (unit_prof_df.event == 'advance') &
        (unit_prof_df.sid == sid)
        ][['time', 'cc_stage_in']]

    #
    # Scheduling
    #
    sched_df = unit_prof_df[
        (unit_prof_df.cc_sched >= 0) &
        (unit_prof_df.event == 'advance') &
        (unit_prof_df.sid == sid)
        ][['time', 'cc_sched']]

    #
    # Executing
    #
    exec_df = unit_prof_df[
        (unit_prof_df.cc_exec >= 0) &
        (unit_prof_df.event == 'advance') &
        (unit_prof_df.sid == sid)
        ][['time', 'cc_exec']]

    #
    # Staging out
    #
    stage_out_df = unit_prof_df[
        (unit_prof_df.cc_stage_out >= 0) &
        (unit_prof_df.event == 'advance') &
        (unit_prof_df.sid == sid)
        ][['time', 'cc_stage_out']]

    print sched_df.head()

    df = populating_df
    labels.append("Populating MongoDB")
    df = pd.merge(df, stage_in_df,  on='time', how='outer')
    labels.append("Staging Input Data")
    df = pd.merge(df, sched_df,     on='time', how='outer')
    labels.append("Scheduling")
    df = pd.merge(df, exec_df,      on='time', how='outer')
    labels.append("Executing")
    df = pd.merge(df, stage_out_df, on='time', how='outer')
    labels.append("Staging Output Data")

    df.set_index('time', inplace=True)
    print df.head()

    df.plot(colormap='Paired', drawstyle='steps-post')

    mp.pyplot.legend(labels, loc='upper left', fontsize=5)
#    mp.pyplot.title("Concurrent Compute Units per Component.\n"
#                    "%d CUs of %d core(s) with a %ss payload on a %d core pilot on %s.\n"
#                    "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n"
#                    "RP: %s - RS: %s - RU: %s"
#                   % (info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label,
#                      info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
#                      info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
#                      ), fontsize=8)
    mp.pyplot.xlabel("Time (s)")
    mp.pyplot.ylabel("Concurrent Compute Units")

    #mp.pyplot.ylim(0,100)
    #mp.pyplot.xlim(1200, 1500)

    mp.pyplot.savefig('%s_plot4.pdf' % sid)
    mp.pyplot.close()
Пример #10
0
def plot(sid, values, label='', paper=False, window=1.0, plot_mean=False):

    labels = []
    means = {}

    colors = [cmap(i) for i in np.linspace(0, 1, len(values))]
    c = 0

    first = True

    if sid.startswith('rp.session'):
        rp = True
    else:
        rp = False

    session_dir = os.path.join(PICKLE_DIR, sid)

    pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
    session_info_df = pd.read_pickle(
        os.path.join(session_dir, 'session_info.pkl'))
    unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
    unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))

    # Legend info
    info = session_info_df.loc[sid]

    if rp:
        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]
    else:
        resource_label = "bogus"

    # Get only the entries for this session
    #uf = unit_prof_df[unit_prof_df['sid'] == sid]

    # We sort the units based on the order they arrived at the agent
    #ufs = uf.sort('awo_get_u_pend')

    cores = info['metadata.effective_cores']

    for value in values:

        if 'stagein_freq' == value:
            spec = {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'}

        elif 'sched_freq' == value:
            spec = {'state': rps.EXECUTING_PENDING, 'event': 'advance'}

        elif 'exec_freq' == value:
            spec = {'state': rps.EXECUTING, 'event': 'advance'}

        elif 'fork_freq' == value:
            spec = {'info': 'aec_start_script'}

        elif 'exit_freq' == value:
            spec = {'info': 'aec_after_exec'}

        elif 'stageout_pend_freq' == value:
            spec = {
                'state': rps.AGENT_STAGING_OUTPUT_PENDING,
                'event': 'advance'
            }

        elif 'stageout_freq' == value:
            spec = {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'}

        else:
            raise Exception("Value %s unknown" % value)

        #print unit_prof_df.head()

        add_frequency(unit_prof_df, value, window, spec)
        df = unit_prof_df[(unit_prof_df[value] >= 0) &
                          #(unit_prof_df.event == 'advance') &
                          (unit_prof_df.sid == sid)][['time', value]]
        means[value] = df[value].mean()

        #df.columns = ['time', value]
        #df['time'] -= df['time'].min()
        df.time = pd.to_datetime(df.time, unit='s')
        df.set_index('time', inplace=True, drop=True, append=False)

        #print ("Head of %s before resample" % value)
        #print df.head()

        def _mean(array_like):
            return np.mean(array_like) / window

        df = df.resample('%dL' % int(1000.0 * window), how=_mean)[value]
        df = df.fillna(0)

        #print ("Head of %s after resample" % value)
        #print df.head()
        if first:
            df_all = df
        else:

            #df_all = pd.merge(df_all, df,  on='time', how='outer')
            #df_all = pd.merge(df_all, df,  on='time')
            #df_all = pd.merge(df_all, df)
            df_all = pd.concat([df_all, df], axis=1)
            #df_all.append(df)

        #print ("Head of df_all")
        #print df_all.head()

        if value == 'exec_freq':
            labels.append("Launching")
        elif value == 'sched_freq':
            labels.append("Scheduling")
        elif value == 'fork_freq':
            labels.append("Forking")
        elif value == 'stageout_pend_freq':
            labels.append("Completing")
        else:
            labels.append("%s" % value)

        first = False

        # df.plot(drawstyle='steps-pre')

    c = 0
    for value in values:
        mean = df_all[value].mean()
        print "%s mean: %f" % (value, mean)
        # df_all['mean_%s' % value] = mean
        #labels.append("Mean %s" % value)
    print 'means:', means

    my_colors = colors
    if plot_mean:
        my_colors *= 2

    my_styles = []
    for x in range(len(values)):
        my_styles.append('-')
    if plot_mean:
        for x in range(len(values)):
            my_styles.append('--')

    #df_all.set_index('time', inplace=True)
    #print df_all.head(500)
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    ax = df_all.plot(drawstyle='steps-pre',
                     color=my_colors,
                     style=my_styles,
                     linewidth=LINEWIDTH,
                     fontsize=TICK_FONTSIZE)
    # df_all.plot(drawstyle='steps')
    #df_all.plot()

    # Vertial reference
    # x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    # mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    # labels.append("Optimal")

    mp.pyplot.legend(labels,
                     loc='upper right',
                     fontsize=LEGEND_FONTSIZE,
                     labelspacing=0)
    if not paper:
        mp.pyplot.title(
            "Rate of various components: %s'.\n"
            "%d generations of %d 'concurrent' units of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (values, info['metadata.generations'], cores,
             info['metadata.cu_cores'], info['metadata.cu_runtime'],
             resource_label, info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Rate (Unit/s)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(-1, 400)
    #mp.pyplot.xlim(-1,)
    #mp.pyplot.xlim(['1/1/2000', '1/1/2000'])
    #mp.pyplot.xlim('03:00', '04:00')
    #mp.pyplot.xlim(380, 400)
    #mp.pyplot.xlim(675, 680)
    #ax.get_xaxis().set_ticks([])
    # ax.set_yscale('log', basey=10)

    #mp.pyplot.xlim((291500.0, 1185200.0))
    #mp.pyplot.xlim((474000.0, 2367600.0))

    print "xlim:", ax.get_xlim()

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)
    #fig.tight_layout()

    mp.pyplot.savefig('plot_more_rates-%s.pdf' % sid)
    mp.pyplot.close()
def plot(sids, value, label='', paper=False, window=1.0, plot_mean=False, compare=None, micro=False):

    labels = []
    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    first = True

    values = []

    counter = 0

    for sid in sids:

        print "sid: %s" % sid

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))
        unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = "bogus"

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        if not compare:
            raise Exception("Need to specify 'compare' parameter!")
        elif 'metadata.%s' % compare in info:
             metric = info['metadata.%s' % compare]
        else:
            #raise Exception("'%s' not found in info!" % compare)
            metric = counter
            counter += 1
        values.append(metric)

        if value == 'sched_freq':
            plot_type = 'sched'
            plot_label = 'Scheduling'

            spec = {'state': rps.ALLOCATING, 'event' : 'advance'}
            add_frequency(unit_prof_df, 'sched_freq', window, spec)
            print unit_prof_df.state.unique()

            #
            # scheduling frequency
            #
            df = unit_prof_df[
                (unit_prof_df.sched_freq >= 0) &
                (unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', 'sched_freq']]

        elif value == 'exec_freq':
            plot_type = 'exec'
            plot_label = 'Executing'

            spec = {'state': 'Executing', 'event' : 'advance'}
            add_frequency(unit_prof_df, 'exec_freq', window, spec)

            #
            # feq
            #
            df = unit_prof_df[
                (unit_prof_df.exec_freq >= 0) &
                (unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', 'exec_freq']]

        elif 'fork_freq' == value:
            spec = {'info': 'aec_start_script'}
            add_frequency(unit_prof_df, value, window, spec)

            #
            # fork - start_script
            #
            df = unit_prof_df[
                (unit_prof_df[value] >= 0) &
                #(unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', value]]

        elif value == 'done_freq':

            spec = {'state' : rps.AGENT_STAGING_OUTPUT_PENDING, 'event' : 'advance'}
            add_frequency(unit_prof_df, 'done_freq', 1, spec)

            #
            # feq
            #
            df = unit_prof_df[
                (unit_prof_df.done_freq >= 0) &
                (unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', 'done_freq']]

        else:
            raise Exception("Value %s unknown" % value)

        df.columns = ['time', metric]
        df['time'] -= df['time'].min()
        df.time = pd.to_datetime(df.time, unit='s')
        df.set_index('time', inplace=True)


        def _mean(array_like):
            return np.mean(array_like)/window
        df = df.resample('%dL' % int(1000.0*window), how=_mean)
        df = df.fillna(0)

        print df.head()

        if first:
            df_all = df
        else:
            #df_all = pd.merge(df_all, df,  on='time', how='outer')
            #df_all = pd.merge(df_all, df, how='outer')
            df_all = pd.concat([df_all, df], axis=1)

        labels.append("%d" % metric)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])

        first = False


    c = 0
    for value in values:
        mean = df_all[value].mean()
        stddev = df_all[value].std(ddof=0)
        print "Mean value for %d: %f (%f)" % (value, mean, stddev)
        if plot_mean:
            df_all['mean_%s' % value] = mean
        # labels.append("Mean %s" % value)
    my_colors = colors
    my_styles = []
    for x in range(len(values)):
        my_styles.append('-')
    if plot_mean:
        my_colors *= 2
        for x in range(len(values)):
            my_styles.append('--')


    #df_all.set_index('time', inplace=True)
    # print df_all.head(500)
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    ax = df_all.plot(color=my_colors, style=my_styles, drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
    # df_all.plot(drawstyle='steps')
    # df_all.plot()


    mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0)
    if not paper:
        mp.pyplot.title("Rate of CUs transitioning in stage '%s'.\n"
                "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
                "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
                "RP: %s - RS: %s - RU: %s"
               % (value,
                  info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
                  info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                  info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                  ), fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("%s Rate (Unit/s)" % plot_label, fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(0,)
    #mp.pyplot.xlim('0:00', '0:40')
    #mp.pyplot.xlim(380, 400)
    #mp.pyplot.xlim(675, 680)
    #ax.get_xaxis().set_ticks([])

    from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, SecondLocator

    #second_fmt = DateFormatter('%S')
    # second_loc = SecondLocator(bysecond=range(0, 300, 10))
    # ax.xaxis.set_minor_formatter(second_fmt)
    # ax.xaxis.set_minor_locator(second_loc)
    # ax.xaxis.set_major_formatter(second_fmt)
    # ax.xaxis.set_major_locator(second_loc)
    # second_loc.set_axis(ax.xaxis)  # Have to manually make this call and the one below.
    # second_loc.refresh()

    # secondsFmt = DateFormatter('%s')
    s = SecondLocator()
    ax.xaxis.set_major_locator(s)
    # ax.xaxis.set_minor_locator(SecondLocator())
    # ax.xaxis.set_major_formatter(secondsFmt)

    #ax.xaxis.set_major_locator(years)
    #ax.xaxis.set_major_formatter(yearsFmt)
    #ax.xaxis.set_minor_locator(months)

    ax.autoscale_view()

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    width = 3.3
    height = width / 1.618
    #height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)
    fig.tight_layout(pad=0.1)


    mp.pyplot.savefig('plot_rate_%s%s_%s_%dgen.pdf' % ('micro_' if micro else '', value, resource_label, info['metadata.generations']))
    mp.pyplot.close()
Пример #12
0
def wms_revisions(options):
    '''
    These revisions look like this:

    # select url from resource_revision where id='3b157e17-cef2-43dc-b0ce-76de18549852' order by revision_timestamp;
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.1.1
    http://www.acas.org.uk/CHttpHandler.ashx?service=WFS&request=GetCapabilities&version=2.0
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3

    The bad ones have been changed to "?service=" params. These revisions need removing.

    # Typical revision:
                     id                  |         timestamp          |           author           |                         message                          | state  | approved_timestamp
    a2370bd1-b1b8-41b4-9fc1-d38b46d2fbda | 2014-02-22 04:34:56.634442 | co-prod3.dh.bytemark.co.uk | REST API: Update object financial-transactions-data-acas | active |
    # i.e. author='co-prod3...' (site-user, via API)
    '''
    resources = common.get_resources(state='active',
            resource_id=options.resource, dataset_name=options.dataset)
    stats = StatsList()
    stats.report_value_limit = 1000
    total_bad_revisions = 0
    need_to_commit = False
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        res = model.Resource.get(res.id)  # as the session gets flushed during the loop
        res_rev_q = model.Session.query(model.ResourceRevision).filter_by(id=res.id).order_by(model.ResourceRevision.revision_timestamp)
        res_revs = res_rev_q.all()
        first_res_rev = res_revs[0]
        if 'request=GetCapabilities&version=' in first_res_rev.url:
            print add_stat('First revision already was WMS', res, stats)
            continue

        # Identify bad revisions by the WMS URL parameters and author
        bad_res_revs = res_rev_q.filter(model.ResourceRevision.url.ilike('%?service=W%S&request=GetCapabilities&version=%')).all()
        if bad_res_revs and \
           bad_res_revs[0].revision.author not in ('co-prod3.dh.bytemark.co.uk', 'current_revision_fixer2'):
            print add_stat('Misidentified', res, stats, 'author=%r' % bad_res_revs[0].revision.author)
            continue
        if not bad_res_revs:
            add_stat('Resource ok', res, stats)
            continue
        print ' ' # don't overwrite progress bar
        print add_stat('Bad revisions', res, stats, '(%d/%d)' % (len(bad_res_revs), len(res_revs)))
        total_bad_revisions += len(bad_res_revs)

        # Find the new latest (good) revision
        bad_res_revs_set = set(bad_res_revs)
        for res_rev_index in reversed(xrange(len(res_revs))):
            if res_revs[res_rev_index] not in bad_res_revs_set:
                latest_good_res_rev = res_revs[res_rev_index]
                break
        else:
            print add_stat('No good revisions', res, stats)
            continue
        if not options.write:
            continue

        # Delete the revisions and resource_revisions
        print '  Deleting bad revisions...'
        def delete_bad_revisions(res_revs):
            # Build the sql as a list, as it is faster when you have 1000 strings to append
            sql = ['''BEGIN;
            ALTER TABLE package_tag DROP CONSTRAINT package_tag_revision_id_fkey;
            ALTER TABLE package_extra DROP CONSTRAINT package_extra_revision_id_fkey;
            ALTER TABLE resource DROP CONSTRAINT resource_revision_id_fkey;
            ''']
            for res_rev in res_revs:
                sql.append("DELETE from resource_revision where id='%s' and revision_id='%s';\n" % (res.id, res_rev.revision_id))
                # a revision created (e.g. over the API) can be connect to other
                # resources or a dataset, so only delete the revision if only
                # connected to this one.
                if model.Session.query(model.ResourceRevision).\
                        filter_by(revision_id=res_rev.revision_id).\
                        count() == 1 and \
                        model.Session.query(model.PackageRevision).\
                        filter_by(revision_id=res_rev.revision_id).count() == 0:
                    sql.append("DELETE from revision where id='%s';\n" % res_rev.revision_id)
            sql.append("UPDATE resource SET revision_id='%s' WHERE id='%s';\n" % \
                (latest_good_res_rev.revision_id, res.id))
            sql.append('''
            ALTER TABLE package_tag ADD CONSTRAINT package_tag_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            ALTER TABLE package_extra ADD CONSTRAINT package_extra_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            ALTER TABLE resource ADD CONSTRAINT resource_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            COMMIT;''')
            print '  sql..',
            model.Session.execute(''.join(sql))
            print '.committed'
            model.Session.remove()
        def chunks(l, n):
            '''Yield successive n-sized chunks from l.'''
            for i in xrange(0, len(l), n):
                yield l[i:i+n]
        # chunk revisions in chunks to cope when there are so many
        widgets = ['Creating SQL: ', Counter(),
                   'k/%sk ' % int(float(len(bad_res_revs))/1000.0), Bar(),
                   ' ', ETA()]
        progress2 = ProgressBar(widgets=widgets, maxval=int(float(len(bad_res_revs))/1000.0) or 1)
        for chunk_of_bad_res_revs in progress2(chunks(bad_res_revs, 1000)):
            delete_bad_revisions(chunk_of_bad_res_revs)

        # Knit together the remaining revisions again
        print '  Knitting existing revisions back together...'
        res_rev_q = model.Session.query(model.ResourceRevision).filter_by(id=res.id).order_by(model.ResourceRevision.revision_timestamp)
        res_revs = res_rev_q.all()
        latest_res_rev = res_revs[-1]
        if not latest_res_rev.current:
            latest_res_rev.current = True
        for i, res_rev in enumerate(res_revs[:-1]):
            if res_rev.expired_timestamp != res_revs[i+1].revision_timestamp:
                res_rev.expired_timestamp = res_revs[i+1].revision_timestamp
                res_rev.expired_id = res_revs[i+1].revision_id
        if latest_res_rev.expired_timestamp != END_OF_TIME:
            latest_res_rev.expired_timestamp = END_OF_TIME
        if latest_res_rev.expired_id is not None:
            latest_res_rev.expired_id = None

        # Correct the URL on the resource
        model.Session.query(model.Resource).filter_by(id=res.id).update({'url': latest_res_rev.url})
        model.repo.commit_and_remove()
        print '  ...done'


    print 'Summary\n', stats.report()
    print 'Total bad revs: %d' % total_bad_revisions
    if options.write and need_to_commit:
        model.repo.commit_and_remove()
        print 'Written'
def plot(sids, paper=False):

    labels = []

    colors = [cmap(i) for i in np.linspace(0, 1, 3)]
    c = 0

    for key in sids:

        orte_ttc = {}

        for sid in sids[key]:

            session_dir = os.path.join(PICKLE_DIR, sid)

            unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
            pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
            tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
            session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

            # Legend info
            info = session_info_df.loc[sid]

            cores = info['metadata.effective_cores']

            if cores not in orte_ttc:
                orte_ttc[cores] = []

            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]

            # Get only the entries for this session
            tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

            # Only take completed CUs into account
            tuf = tuf[tuf['Done'].notnull()]

            # We sort the units based on the order they arrived at the agent
            tufs = tuf.sort('awo_get_u_pend')

            orte_ttc[cores].append((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min()))


        orte_df = pd.DataFrame(orte_ttc)

        labels.append("%s" % barrier_legend[key])
        #ax = orte_df.mean().plot(kind='line', color=colors[c], marker=barrier_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
        ax = orte_df.mean().plot(kind='line', color=colors[c], marker='+', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)

        c += 1

    print 'labels: %s' % labels
    legend = mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE, markerscale=0, labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)
    if not paper:
        mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("Pilot Cores", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("$ttc_{a}$", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(290, 550)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    if paper:
        # width = 3.487
        width = 3.3
        # height = width / 1.618
        height = 1.3
        fig = mp.pyplot.gcf()
        fig.set_size_inches(width, height)
        # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

        # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
        fig.tight_layout(pad=0.1)
        mp.pyplot.savefig('plot_ttc_cores_barriers.pdf')
    else:
        mp.pyplot.savefig('plot_ttc_cores_many.pdf')

    mp.pyplot.close()
Пример #14
0
def plot(sids,
         value,
         label='',
         paper=False,
         window=1.0,
         plot_mean=False,
         compare=None,
         micro=False):

    labels = []
    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    first = True

    values = []

    counter = 0

    for sid in sids:

        print "sid: %s" % sid

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(
            os.path.join(session_dir, 'pilot_info.pkl'))
        session_info_df = pd.read_pickle(
            os.path.join(session_dir, 'session_info.pkl'))
        unit_prof_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_prof.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = "bogus"

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        if not compare:
            raise Exception("Need to specify 'compare' parameter!")
        elif 'metadata.%s' % compare in info:
            metric = info['metadata.%s' % compare]
        else:
            #raise Exception("'%s' not found in info!" % compare)
            metric = counter
            counter += 1
        values.append(metric)

        if value == 'sched_freq':
            plot_type = 'sched'
            plot_label = 'Scheduling'

            spec = {'state': rps.ALLOCATING, 'event': 'advance'}
            add_frequency(unit_prof_df, 'sched_freq', window, spec)
            print unit_prof_df.state.unique()

            #
            # scheduling frequency
            #
            df = unit_prof_df[(unit_prof_df.sched_freq >= 0)
                              & (unit_prof_df.event == 'advance') &
                              (unit_prof_df.sid == sid)][[
                                  'time', 'sched_freq'
                              ]]

        elif value == 'exec_freq':
            plot_type = 'exec'
            plot_label = 'Executing'

            spec = {'state': 'Executing', 'event': 'advance'}
            add_frequency(unit_prof_df, 'exec_freq', window, spec)

            #
            # feq
            #
            df = unit_prof_df[(unit_prof_df.exec_freq >= 0)
                              & (unit_prof_df.event == 'advance') &
                              (unit_prof_df.sid == sid)][['time', 'exec_freq']]

        elif 'fork_freq' == value:
            spec = {'info': 'aec_start_script'}
            add_frequency(unit_prof_df, value, window, spec)

            #
            # fork - start_script
            #
            df = unit_prof_df[(unit_prof_df[value] >= 0) &
                              #(unit_prof_df.event == 'advance') &
                              (unit_prof_df.sid == sid)][['time', value]]

        elif value == 'done_freq':

            spec = {
                'state': rps.AGENT_STAGING_OUTPUT_PENDING,
                'event': 'advance'
            }
            add_frequency(unit_prof_df, 'done_freq', 1, spec)

            #
            # feq
            #
            df = unit_prof_df[(unit_prof_df.done_freq >= 0)
                              & (unit_prof_df.event == 'advance') &
                              (unit_prof_df.sid == sid)][['time', 'done_freq']]

        else:
            raise Exception("Value %s unknown" % value)

        df.columns = ['time', metric]
        df['time'] -= df['time'].min()
        df.time = pd.to_datetime(df.time, unit='s')
        df.set_index('time', inplace=True)

        def _mean(array_like):
            return np.mean(array_like) / window

        df = df.resample('%dL' % int(1000.0 * window), how=_mean)
        df = df.fillna(0)

        print df.head()

        if first:
            df_all = df
        else:
            #df_all = pd.merge(df_all, df,  on='time', how='outer')
            #df_all = pd.merge(df_all, df, how='outer')
            df_all = pd.concat([df_all, df], axis=1)

        labels.append("%d" % metric)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])

        first = False

    c = 0
    for value in values:
        mean = df_all[value].mean()
        stddev = df_all[value].std(ddof=0)
        print "Mean value for %d: %f (%f)" % (value, mean, stddev)
        if plot_mean:
            df_all['mean_%s' % value] = mean
        # labels.append("Mean %s" % value)
    my_colors = colors
    my_styles = []
    for x in range(len(values)):
        my_styles.append('-')
    if plot_mean:
        my_colors *= 2
        for x in range(len(values)):
            my_styles.append('--')

    #df_all.set_index('time', inplace=True)
    # print df_all.head(500)
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    ax = df_all.plot(color=my_colors,
                     style=my_styles,
                     drawstyle='steps-pre',
                     fontsize=TICK_FONTSIZE,
                     linewidth=LINEWIDTH)
    # df_all.plot(drawstyle='steps')
    # df_all.plot()

    mp.pyplot.legend(labels,
                     loc='upper right',
                     fontsize=LEGEND_FONTSIZE,
                     labelspacing=0)
    if not paper:
        mp.pyplot.title(
            "Rate of CUs transitioning in stage '%s'.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (value, info['metadata.generations'], info['metadata.cu_cores'],
             info['metadata.cu_runtime'], resource_label,
             info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("%s Rate (Unit/s)" % plot_label, fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(0, )
    #mp.pyplot.xlim('0:00', '0:40')
    #mp.pyplot.xlim(380, 400)
    #mp.pyplot.xlim(675, 680)
    #ax.get_xaxis().set_ticks([])

    from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, SecondLocator

    #second_fmt = DateFormatter('%S')
    # second_loc = SecondLocator(bysecond=range(0, 300, 10))
    # ax.xaxis.set_minor_formatter(second_fmt)
    # ax.xaxis.set_minor_locator(second_loc)
    # ax.xaxis.set_major_formatter(second_fmt)
    # ax.xaxis.set_major_locator(second_loc)
    # second_loc.set_axis(ax.xaxis)  # Have to manually make this call and the one below.
    # second_loc.refresh()

    # secondsFmt = DateFormatter('%s')
    s = SecondLocator()
    ax.xaxis.set_major_locator(s)
    # ax.xaxis.set_minor_locator(SecondLocator())
    # ax.xaxis.set_major_formatter(secondsFmt)

    #ax.xaxis.set_major_locator(years)
    #ax.xaxis.set_major_formatter(yearsFmt)
    #ax.xaxis.set_minor_locator(months)

    ax.autoscale_view()

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    width = 3.3
    height = width / 1.618
    #height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_rate_%s%s_%s_%dgen.pdf' %
                      ('micro_' if micro else '', value, resource_label,
                       info['metadata.generations']))
    mp.pyplot.close()
def plot(sids, key, paper=False):


    # keys = []
    # for sid in sids:
    #     print ("sid: %s") % sid
    #     session_dir = os.path.join(PICKLE_DIR, sid)
    #
    #     session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))
    #
    #     # Legend info
    #     info = session_info_df.loc[sid]
    #
    #
    #     keys.append(val)
    #     orte_dfs[val] = {}
    #


    all_kv_dict = {}

    for sid in sids:

        print "Sid: %s" % sid

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if key == 'pilot_cores':
            keyval = info['metadata.pilot_cores']
        else:
            print 'Unknown key: %s' % key
            exit(-1)

        if keyval not in all_kv_dict:
            print "First time I see this number of cu_cores: %d" % keyval
            all_kv_dict[keyval] = {}
        else:
            print "Already saw this number of cu_cores: %d" % keyval

        cu_runtime = info['metadata.cu_runtime']
        generations = info['metadata.generations']


        if cu_runtime not in all_kv_dict[keyval]:
            print "First time I see this value of cu_runtime: %d" % cu_runtime
            all_kv_dict[keyval][cu_runtime] = pd.Series()
        else:
            print "Already saw this value of cu_runtime: %d" % cu_runtime

        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #tufs = tuf.sort('awo_get_u_pend')
        #tufs = tuf.sort('awo_adv_u')
        tufs = tuf.sort('asic_get_u_pend')

        val = all_kv_dict[keyval][cu_runtime].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min())))
        startup = all_kv_dict[keyval][cu_runtime].append(pd.Series((tufs['aec_start_script'].min() - tufs['asic_get_u_pend'].min())))

        ttc_o = generations * cu_runtime * 1.0
        ttc_a = val
        cores = info['metadata.pilot_cores']
        freq = 60.0

        # $$ \frac {generations * unit runtime}{cores / freq + (generations * unit % runtime)}$$
        proj_eff = ttc_o / ( (cores / freq) + ttc_o) * 100


        if val[0] < ttc_o:
            # This likely means the pilot runtime was too short and we didn't complete all cu's
            print ("Einstein was wrong!?!")
            val = val/val
        else:
            val /= ttc_o
            val = 1 / val
            val *= 100

        real_eff = val
        delta = real_eff - proj_eff
        print "startup: %f, ttc_a: %f, ttc_o: %f, freq: %f, proj_eff: %f, real_eff: %f, delta: %f" % (
            startup, val, ttc_o, freq, proj_eff, real_eff, delta)


        all_kv_dict[keyval][cu_runtime] = delta

    colors = [cmap(i) for i in np.linspace(0, 1, len(all_kv_dict))]
    c = 0

    labels = []
    for key in sorted(all_kv_dict, key=int, reverse=False):

        # Skip small experiments
        if key <= 128:
            continue

        print 'orte_ttc raw:', all_kv_dict[key]
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(all_kv_dict[key])
        print 'orte_ttc df:', orte_df

        #labels.append("%s" % resource_legend[key])
        labels.append("%s" % key)
        #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH)
        ax = orte_df.mean().plot(kind='line', marker='+', color=colors[c], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
        c += 1

    # ORTE only
    # Data for BW
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+')
    # Data for Stampede
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+')
    #labels.append("ORTE-only (C)")

    # Horizontal reference
    # y_ref = 100
    # mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH)
    # labels.append("Optimal")

    print 'labels: %s' % labels
    position = 'lower right'
    mp.pyplot.legend(labels, loc=position, fontsize=LEGEND_FONTSIZE, markerscale=0, labelspacing=0.2)
    if not paper:
        mp.pyplot.title("Resource efficiency for varying CU runtime.\n"
            "%d generations of a variable number of 'concurrent' CUs with a variable payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("Unit Duration (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Core Utilisation (\%)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(0, 105)
    #mp.pyplot.xlim(0, 4096)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(0, 2000)
    #mp.pyplot.ylim(y_ref-10)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set
    #ax.set_yscale('log', basey=10)
    ax.set_xscale('log', basex=2)


    #width = 3.487
    width = 3.3
    #height = width / 1.618
    height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_formula.pdf')

    mp.pyplot.close()
Пример #16
0
def plot(sids, key, paper=False):

    # keys = []
    # for sid in sids:
    #     print ("sid: %s") % sid
    #     session_dir = os.path.join(PICKLE_DIR, sid)
    #
    #     session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))
    #
    #     # Legend info
    #     info = session_info_df.loc[sid]
    #
    #
    #     keys.append(val)
    #     orte_dfs[val] = {}
    #

    all_kv_dict = {}

    for sid in sids:

        print "Sid: %s" % sid

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(
            os.path.join(session_dir, 'pilot_info.pkl'))
        tr_unit_prof_df = pd.read_pickle(
            os.path.join(session_dir, 'tr_unit_prof.pkl'))
        session_info_df = pd.read_pickle(
            os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if key == 'pilot_cores':
            keyval = info['metadata.pilot_cores']
        else:
            print 'Unknown key: %s' % key
            exit(-1)

        if keyval not in all_kv_dict:
            print "First time I see this number of cu_cores: %d" % keyval
            all_kv_dict[keyval] = {}
        else:
            print "Already saw this number of cu_cores: %d" % keyval

        cu_runtime = info['metadata.cu_runtime']
        generations = info['metadata.generations']

        if cu_runtime not in all_kv_dict[keyval]:
            print "First time I see this value of cu_runtime: %d" % cu_runtime
            all_kv_dict[keyval][cu_runtime] = pd.Series()
        else:
            print "Already saw this value of cu_runtime: %d" % cu_runtime

        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #tufs = tuf.sort('awo_get_u_pend')
        #tufs = tuf.sort('awo_adv_u')
        tufs = tuf.sort('asic_get_u_pend')

        val = all_kv_dict[keyval][cu_runtime].append(
            pd.Series((tufs['aec_after_exec'].max() -
                       tufs['asic_get_u_pend'].min())))
        startup = all_kv_dict[keyval][cu_runtime].append(
            pd.Series((tufs['aec_start_script'].min() -
                       tufs['asic_get_u_pend'].min())))

        ttc_o = generations * cu_runtime * 1.0
        ttc_a = val
        cores = info['metadata.pilot_cores']
        freq = 60.0

        # $$ \frac {generations * unit runtime}{cores / freq + (generations * unit % runtime)}$$
        proj_eff = ttc_o / ((cores / freq) + ttc_o) * 100

        if val[0] < ttc_o:
            # This likely means the pilot runtime was too short and we didn't complete all cu's
            print("Einstein was wrong!?!")
            val = val / val
        else:
            val /= ttc_o
            val = 1 / val
            val *= 100

        real_eff = val
        delta = real_eff - proj_eff
        print "startup: %f, ttc_a: %f, ttc_o: %f, freq: %f, proj_eff: %f, real_eff: %f, delta: %f" % (
            startup, val, ttc_o, freq, proj_eff, real_eff, delta)

        all_kv_dict[keyval][cu_runtime] = delta

    colors = [cmap(i) for i in np.linspace(0, 1, len(all_kv_dict))]
    c = 0

    labels = []
    for key in sorted(all_kv_dict, key=int, reverse=False):

        # Skip small experiments
        if key <= 128:
            continue

        print 'orte_ttc raw:', all_kv_dict[key]
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(all_kv_dict[key])
        print 'orte_ttc df:', orte_df

        #labels.append("%s" % resource_legend[key])
        labels.append("%s" % key)
        #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH)
        ax = orte_df.mean().plot(kind='line',
                                 marker='+',
                                 color=colors[c],
                                 fontsize=TICK_FONTSIZE,
                                 linewidth=LINEWIDTH)
        c += 1

    # ORTE only
    # Data for BW
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+')
    # Data for Stampede
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+')
    #labels.append("ORTE-only (C)")

    # Horizontal reference
    # y_ref = 100
    # mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH)
    # labels.append("Optimal")

    print 'labels: %s' % labels
    position = 'lower right'
    mp.pyplot.legend(labels,
                     loc=position,
                     fontsize=LEGEND_FONTSIZE,
                     markerscale=0,
                     labelspacing=0.2)
    if not paper:
        mp.pyplot.title(
            "Resource efficiency for varying CU runtime.\n"
            "%d generations of a variable number of 'concurrent' CUs with a variable payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (info['metadata.generations'], resource_label,
             info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=8)
    mp.pyplot.xlabel("Unit Duration (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Core Utilisation (\%)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(0, 105)
    #mp.pyplot.xlim(0, 4096)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(0, 2000)
    #mp.pyplot.ylim(y_ref-10)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set
    #ax.set_yscale('log', basey=10)
    ax.set_xscale('log', basex=2)

    #width = 3.487
    width = 3.3
    #height = width / 1.618
    height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_formula.pdf')

    mp.pyplot.close()
def plot(sids, value, label='', paper=False):

    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    labels = []

    first = True

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(
            os.path.join(session_dir, 'pilot_info.pkl'))
        unit_prof_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_prof.pkl'))
        session_info_df = pd.read_pickle(
            os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        cores = info['metadata.effective_cores']
        #cores = 32

        if value == 'cc_fork':
            spec = {
                'in': [{
                    'info': 'aec_start_script'
                }],
                'out': [{
                    'info': 'aec_after_exec'
                }]
            }
            rpu.add_concurrency(unit_prof_df, 'cc_fork', spec)

        elif value == 'cc_exit':
            spec = {
                'in': [{
                    'info': 'aec_after_exec'
                }],
                'out': [
                    {
                        'state': rps.AGENT_STAGING_OUTPUT_PENDING,
                        'event': 'advance'
                    },
                ]
            }
            rpu.add_concurrency(unit_prof_df, 'cc_exit', spec)

        df = unit_prof_df[(unit_prof_df[value] >= 0) &
                          #(unit_prof_df.event == 'advance') &
                          (unit_prof_df.sid == sid)][['time', value]]

        df.columns = ['time', cores]
        df['time'] -= df['time'].min()

        if first:
            df_all = df
        else:
            df_all = pd.merge(df_all, df, on='time', how='outer')

        #labels.append("Cores: %d" % cores)
        # labels.append("%d" % cores)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])
        labels.append("%d" % info['metadata.cu_runtime'])

        first = False

    df_all.set_index('time', inplace=True)
    print df_all.head()
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)
    ax = df_all.plot(fontsize=TICK_FONTSIZE,
                     linewidth=LINEWIDTH,
                     colors=colors)

    # Vertial reference
    #x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    #labels.append("Optimal")

    location = 'upper right'
    legend = mp.pyplot.legend(labels,
                              loc=location,
                              fontsize=LEGEND_FONTSIZE,
                              labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)

    if not paper:
        mp.pyplot.title(
            "Concurrent number of CUs in stage '%s'.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (value, info['metadata.generations'], info['metadata.cu_cores'],
             info['metadata.cu_runtime'], resource_label,
             info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE)
    # mp.pyplot.ylim(0, 200)
    mp.pyplot.ylim(-50, )
    mp.pyplot.xlim(0, 600)
    #ax.get_xaxis().set_ticks([])
    print dir(ax)

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    # width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_concurrency.pdf')
    mp.pyplot.close()
def wms_revisions(options):
    '''
    These revisions look like this:

    # select url from resource_revision where id='3b157e17-cef2-43dc-b0ce-76de18549852' order by revision_timestamp;
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.1.1
    http://www.acas.org.uk/CHttpHandler.ashx?service=WFS&request=GetCapabilities&version=2.0
    http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3

    The bad ones have been changed to "?service=" params. These revisions need removing.

    # Typical revision:
                     id                  |         timestamp          |           author           |                         message                          | state  | approved_timestamp
    a2370bd1-b1b8-41b4-9fc1-d38b46d2fbda | 2014-02-22 04:34:56.634442 | co-prod3.dh.bytemark.co.uk | REST API: Update object financial-transactions-data-acas | active |
    # i.e. author='co-prod3...' (site-user, via API)
    '''
    resources = common.get_resources(state='active',
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    stats.report_value_limit = 1000
    total_bad_revisions = 0
    need_to_commit = False
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        res = model.Resource.get(
            res.id)  # as the session gets flushed during the loop
        res_rev_q = model.Session.query(model.ResourceRevision).filter_by(
            id=res.id).order_by(model.ResourceRevision.revision_timestamp)
        res_revs = res_rev_q.all()
        first_res_rev = res_revs[0]
        if 'request=GetCapabilities&version=' in first_res_rev.url:
            print add_stat('First revision already was WMS', res, stats)
            continue

        # Identify bad revisions by the WMS URL parameters and author
        bad_res_revs = res_rev_q.filter(
            model.ResourceRevision.url.ilike(
                '%?service=W%S&request=GetCapabilities&version=%')).all()
        if bad_res_revs and \
           bad_res_revs[0].revision.author not in ('co-prod3.dh.bytemark.co.uk', 'current_revision_fixer2'):
            print add_stat('Misidentified', res, stats,
                           'author=%r' % bad_res_revs[0].revision.author)
            continue
        if not bad_res_revs:
            add_stat('Resource ok', res, stats)
            continue
        print ' '  # don't overwrite progress bar
        print add_stat('Bad revisions', res, stats,
                       '(%d/%d)' % (len(bad_res_revs), len(res_revs)))
        total_bad_revisions += len(bad_res_revs)

        # Find the new latest (good) revision
        bad_res_revs_set = set(bad_res_revs)
        for res_rev_index in reversed(xrange(len(res_revs))):
            if res_revs[res_rev_index] not in bad_res_revs_set:
                latest_good_res_rev = res_revs[res_rev_index]
                break
        else:
            print add_stat('No good revisions', res, stats)
            continue
        if not options.write:
            continue

        # Delete the revisions and resource_revisions
        print '  Deleting bad revisions...'

        def delete_bad_revisions(res_revs):
            # Build the sql as a list, as it is faster when you have 1000 strings to append
            sql = [
                '''BEGIN;
            ALTER TABLE package_tag DROP CONSTRAINT package_tag_revision_id_fkey;
            ALTER TABLE package_extra DROP CONSTRAINT package_extra_revision_id_fkey;
            ALTER TABLE resource DROP CONSTRAINT resource_revision_id_fkey;
            '''
            ]
            for res_rev in res_revs:
                sql.append(
                    "DELETE from resource_revision where id='%s' and revision_id='%s';\n"
                    % (res.id, res_rev.revision_id))
                # a revision created (e.g. over the API) can be connect to other
                # resources or a dataset, so only delete the revision if only
                # connected to this one.
                if model.Session.query(model.ResourceRevision).\
                        filter_by(revision_id=res_rev.revision_id).\
                        count() == 1 and \
                        model.Session.query(model.PackageRevision).\
                        filter_by(revision_id=res_rev.revision_id).count() == 0:
                    sql.append("DELETE from revision where id='%s';\n" %
                               res_rev.revision_id)
            sql.append("UPDATE resource SET revision_id='%s' WHERE id='%s';\n" % \
                (latest_good_res_rev.revision_id, res.id))
            sql.append('''
            ALTER TABLE package_tag ADD CONSTRAINT package_tag_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            ALTER TABLE package_extra ADD CONSTRAINT package_extra_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            ALTER TABLE resource ADD CONSTRAINT resource_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id);
            COMMIT;''')
            print '  sql..',
            model.Session.execute(''.join(sql))
            print '.committed'
            model.Session.remove()

        def chunks(l, n):
            '''Yield successive n-sized chunks from l.'''
            for i in xrange(0, len(l), n):
                yield l[i:i + n]

        # chunk revisions in chunks to cope when there are so many
        widgets = [
            'Creating SQL: ',
            Counter(),
            'k/%sk ' % int(float(len(bad_res_revs)) / 1000.0),
            Bar(), ' ',
            ETA()
        ]
        progress2 = ProgressBar(widgets=widgets,
                                maxval=int(float(len(bad_res_revs)) / 1000.0)
                                or 1)
        for chunk_of_bad_res_revs in progress2(chunks(bad_res_revs, 1000)):
            delete_bad_revisions(chunk_of_bad_res_revs)

        # Knit together the remaining revisions again
        print '  Knitting existing revisions back together...'
        res_rev_q = model.Session.query(model.ResourceRevision).filter_by(
            id=res.id).order_by(model.ResourceRevision.revision_timestamp)
        res_revs = res_rev_q.all()
        latest_res_rev = res_revs[-1]
        if not latest_res_rev.current:
            latest_res_rev.current = True
        for i, res_rev in enumerate(res_revs[:-1]):
            if res_rev.expired_timestamp != res_revs[i + 1].revision_timestamp:
                res_rev.expired_timestamp = res_revs[i + 1].revision_timestamp
                res_rev.expired_id = res_revs[i + 1].revision_id
        if latest_res_rev.expired_timestamp != END_OF_TIME:
            latest_res_rev.expired_timestamp = END_OF_TIME
        if latest_res_rev.expired_id is not None:
            latest_res_rev.expired_id = None

        # Correct the URL on the resource
        model.Session.query(model.Resource).filter_by(id=res.id).update(
            {'url': latest_res_rev.url})
        model.repo.commit_and_remove()
        print '  ...done'

    print 'Summary\n', stats.report()
    print 'Total bad revs: %d' % total_bad_revisions
    if options.write and need_to_commit:
        model.repo.commit_and_remove()
        print 'Written'
Пример #19
0
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid):

    print "Plotting %s ..." % sid

    labels = []

    # Legend info
    info = info_df.loc[sid]

    mpi = get_mpi(unit_info_df, sid)
    #mpi = True
    # For this call assume that there is only one pilot per session
    lms = get_lm(unit_info_df, pilot_info_df, sid, mpi)
    assert len(lms) == 1
    launch_method = lms.values()[0]

    # For this call assume that there is only one pilot per session
    spawners = get_spawners(unit_info_df, pilot_info_df, sid)
    assert len(spawners) == 1
    spawner = spawners.values()[0]

    #exit()

    # For this call assume that there is only one pilot per session
    resources = get_resources(unit_info_df, pilot_info_df, sid)
    assert len(resources) == 1
    resource_label = resources.values()[0]

    # Get only the entries for this session
    tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

    tuf = tuf[tuf['Done'].notnull()]

    # We sort the units based on the order ...
    #tufs = tuf.sort('awo_get_u_pend') # they arrived at the agent
    #tufs = tuf.sort('aec_work_u_pend') # they are picked up by an EW
    tufs = tuf.sort('asc_put_u_pend') # they are scheduled
    #tufs = tuf.sort('asc_get_u_pend') # the are picked up by the scheduler

    ax = (tufs['asc_released'] - tufs['asc_allocated'] - info['metadata.cu_runtime']).plot(kind='line', color='red')
    labels.append("Core Occupation overhead")

    ax = (tufs['aec_after_exec'] - tufs['aec_after_cd'] - info['metadata.cu_runtime']).plot(kind='line', color='orange')
    labels.append('%s LaunchMethod (%s)' % ('MPI' if mpi else 'Task', launch_method))

    ax = (tufs['aec_start_script'] - tufs['aec_handover']).plot(kind='line', color='black')
    labels.append("Spawner (%s)" % spawner)

    (tufs['asc_get_u_pend'] - tufs['asic_put_u_pend']).plot(kind='line', color='blue')
    labels.append("Scheduler Queue")

    ax = (tufs['aec_work_u_pend'] - tufs['asc_put_u_pend']).plot(kind='line', color='green')
    labels.append("ExecWorker Queue")

    ax = (tufs['asc_released'] - tufs['aec_complete']).plot(kind='line', color='magenta')
    labels.append("Postexec")

    mp.pyplot.legend(labels, loc='upper left', fontsize=5)
    mp.pyplot.title("%s (%s)\n"
                    "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n"
                    "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n"
                    "RP: %s - RS: %s - RU: %s"
                   % (sid, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(info['created'])),
                      info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label,
                      info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                      info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                      ), fontsize=8)
    mp.pyplot.xlabel("Compute Units (ordered by agent arrival)")
    mp.pyplot.ylabel("Time (s)")
    mp.pyplot.ylim(-0.01)
    ax.get_xaxis().set_ticks([])

    mp.pyplot.savefig('%s_plot1.pdf' % sid)
    mp.pyplot.close()
def plot(sids, paper=False):

    labels = []

    for key in sids:

        orte_ttc = {}

        for sid in sids[key]:

            session_dir = os.path.join(PICKLE_DIR, sid)

            unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
            pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
            tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
            session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

            # Legend info
            info = session_info_df.loc[sid]

            cores = info['metadata.effective_cores']

            if cores not in orte_ttc:
                orte_ttc[cores] = pd.Series()

            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]

            # Get only the entries for this session
            tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

            # Only take completed CUs into account
            tuf = tuf[tuf['Done'].notnull()]

            # We sort the units based on the order they arrived at the agent
            tufs = tuf.sort('awo_get_u_pend')

            orte_ttc[cores] = orte_ttc[cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min())))

        print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(orte_ttc)
        print 'orte_ttc df:', orte_df

        labels.append("%s" % resource_legend[key])
        ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH)

    print 'labels: %s' % labels
    mp.pyplot.legend(labels, loc='lower left', fontsize=BARRIER_FONTSIZE)
    if not paper:
        mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("# Cores", fontsize=BARRIER_FONTSIZE)
    mp.pyplot.ylabel("Time to Completion (s)", fontsize=BARRIER_FONTSIZE)
    mp.pyplot.ylim(0)
    #ax.get_xaxis().set_ticks([])
    #ax.get_xaxis.set

    if paper:
        mp.pyplot.savefig('plot_ttc_cores_resources.pdf')
    else:
        mp.pyplot.savefig('plot_ttc_cores_many.pdf')

    mp.pyplot.close()
Пример #21
0
def migrate(options):
    from ckan import model
    from ckanext.archiver.model import Archival, Status

    resources = common.get_resources(state='active',
                                     publisher_ref=options.publisher,
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        # Gather the details of archivals from TaskStatus and Resource
        # to fill all properties of Archival apart from:
        # * package_id
        # * resource_id
        fields = {}
        archiver_task_status = model.Session.query(model.TaskStatus)\
                                    .filter_by(entity_id=res.id)\
                                    .filter_by(task_type='archiver')\
                                    .filter_by(key='status')\
                                    .first()
        if archiver_task_status:
            ats_error = json.loads(archiver_task_status.error)
            fields['status_id'] = Status.by_text(archiver_task_status.value)
            fields['is_broken'] = Status.is_status_broken(fields['status_id'])
            fields['reason'] = ats_error['reason']
            fields['last_success'] = date_str_to_datetime_or_none(
                ats_error['last_success'])
            fields['first_failure'] = date_str_to_datetime_or_none(
                ats_error['first_failure'])
            fields['failure_count'] = int(ats_error['failure_count'])
            fields['url_redirected_to'] = ats_error['url_redirected_to']
            fields['updated'] = archiver_task_status.last_updated
        else:
            if not (res.cache_url or res.extras.get('cache_filepath')
                    or res.hash or res.size or res.mimetype):
                add_stat('No archive data', res, stats)
                continue
            for field_name in ('status_id', 'is_broken', 'reason',
                               'last_success', 'first_failure',
                               'failure_count', 'url_redirected_to', 'updated',
                               'created'):
                fields[field_name] = None

        fields['cache_filepath'] = res.extras.get('cache_filepath')
        fields['cache_url'] = res.cache_url
        fields['hash'] = res.hash
        fields['size'] = res.size
        fields['mimetype'] = res.mimetype

        revisions_with_hash = model.Session.query(model.ResourceRevision)\
                .filter_by(id=res.id)\
                .order_by(model.ResourceRevision.revision_timestamp)\
                .filter(model.ResourceRevision.hash != '').all()
        if revisions_with_hash:
            # these are not perfect by not far off
            fields['created'] = revisions_with_hash[0].revision_timestamp
            fields['resource_timestamp'] = revisions_with_hash[
                -1].revision_timestamp
        else:
            fields['created'] = min(fields['updated'] or END_OF_TIME,
                                    fields['first_failure'] or END_OF_TIME,
                                    fields['last_success'] or END_OF_TIME)
            fields['resource_timestamp'] = max(
                fields['updated'] or START_OF_TIME, fields['first_failure']
                or START_OF_TIME, fields['last_success'] or START_OF_TIME)

        # Compare with any existing data in the Archival table
        archival = Archival.get_for_resource(res.id)
        if archival:
            changed = None
            for field, value in fields.items():
                if getattr(archival, field) != value:
                    if options.write:
                        setattr(archival, field, value)
                    changed = True
            if not changed:
                add_stat('Already exists correctly in archival table', res,
                         stats)
                continue
            add_stat('Updated in archival table', res, stats)
        else:
            archival = Archival.create(res.id)
            if options.write:
                for field, value in fields.items():
                    setattr(archival, field, value)
                model.Session.add(archival)
            add_stat('Added to archival table', res, stats)

    print 'Summary\n', stats.report()
    if options.write:
        model.repo.commit_and_remove()
        print 'Written'
def plot(sids, value, label='', paper=False):

    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]

    labels = []

    first = True

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0]
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        cores = info['metadata.effective_cores']
        #cores = 32

        if value == 'cc_fork':
            spec = {
                'in': [
                    {'info' : 'aec_start_script'}
                ],
                'out' : [
                    {'info' : 'aec_after_exec'}
                ]
            }
            rpu.add_concurrency (unit_prof_df, 'cc_fork', spec)

        elif value == 'cc_exit':
            spec = {
                'in': [
                    {'info' : 'aec_after_exec'}
                ],
                'out' : [
                    {'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'},
                ]
            }
            rpu.add_concurrency (unit_prof_df, 'cc_exit', spec)

        df = unit_prof_df[
            (unit_prof_df[value] >= 0) &
            #(unit_prof_df.event == 'advance') &
            (unit_prof_df.sid == sid)
            ][['time', value]]

        df.columns = ['time', cores]
        df['time'] -= df['time'].min()

        if first:
            df_all = df
        else:
            df_all = pd.merge(df_all, df,  on='time', how='outer')

        #labels.append("Cores: %d" % cores)
        # labels.append("%d" % cores)
        #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE'))
        #labels.append(sid[-4:])
        labels.append("%d" % info['metadata.cu_runtime'])

        first = False

    df_all.set_index('time', inplace=True)
    print df_all.head()
    #df_all.plot(colormap='Paired')
    #df_all.plot(drawstyle='steps-post')
    #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)
    ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors)

    # Vertial reference
    #x_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--')
    #labels.append("Optimal")

    location = 'upper right'
    legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)

    if not paper:
        mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n"
                "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
                "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
                "RP: %s - RS: %s - RU: %s"
               % (value,
                  info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
                  info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
                  info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
                  ), fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE)
    # mp.pyplot.ylim(0, 200)
    mp.pyplot.ylim(-50,)
    mp.pyplot.xlim(0, 600)
    #ax.get_xaxis().set_ticks([])
    print dir(ax)

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    # width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.5
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_concurrency.pdf')
    mp.pyplot.close()
def plot(sids, paper=False):

    labels = []

    all_dict = {}

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        # cu_cores = info['metadata.cu_cores']
        # cu_count = info['metadata.cu_count']
        cu_count = info['metadata.cu_cores']
        cu_cores = info['metadata.cu_count']
        cu_runtime = info['metadata.cu_runtime']

        if cu_count not in all_dict:
            all_dict[cu_count] = {}

        if cu_cores not in all_dict[cu_count]:
            all_dict[cu_count][cu_cores] = pd.Series()

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0].replace('_', '\_')
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # Only take completed CUs into account
        #tuf = tuf[tuf['Done'].notnull()]

        # We sort the units based on the order they arrived at the agent
        #tufs = tuf.sort('awo_get_u_pend')
        #tufs = tuf.sort('awo_adv_u')
        #tufs = tuf.sort('asic_get_u_pend')
        tufs = tuf.sort()

        try:
            all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min() - cu_runtime)))
            #all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min())))
        except:
            print "Plotting failed for session: %s" % sid
            continue

    for key in all_dict:
        # print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(all_dict[key])
        print 'orte_ttc df:', orte_df

        #labels.append("%s" % resource_legend[key])
        labels.append("%s" % key)
        #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
        ax = orte_df.mean().plot(kind='line', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
        ax.set_xscale('log', basex=10)
        ax.set_yscale('log', basey=10)

    # Horizontal reference
    # y_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    # ax = mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--')
    # labels.append("Optimal")

    print 'labels: %s' % labels
    mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE)
    if not paper:
        mp.pyplot.title("TTC overhead for variable size CU.\n"
            "%d generations of a variable number of 'concurrent' CUs with variable number of cores with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("\# CUs", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("$TTC_{overhead}$ (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(0)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([])
    # #ax.get_xaxis.set

    # [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    # plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    # plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    #width = 3.3
    #height = width / 1.618
    # height = 2.7
    #fig = mp.pyplot.gcf()
    #fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    #fig.tight_layout(pad=0.1)
    #fig.tight_layout()

    mp.pyplot.savefig('plot_ttc_cu_cores.pdf')

    mp.pyplot.close()
Пример #24
0
def plot(sids, value, label='', paper=False):

    labels = []
    #colors = []
    colors = [cmap(i) for i in np.linspace(0, 1, len(sids))]
    #c = 0

    first = True

    for sid in sids:

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
        unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl'))
        session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        # For this call assume that there is only one pilot per session
        resources = get_resources(unit_info_df, pilot_info_df, sid)
        assert len(resources) == 1
        resource_label = resources.values()[0]

        # Get only the entries for this session
        #uf = unit_prof_df[unit_prof_df['sid'] == sid]

        # We sort the units based on the order they arrived at the agent
        #ufs = uf.sort('awo_get_u_pend')

        cores = info['metadata.effective_cores']

        if value == 'sched':
            #
            # Scheduling
            #
            df = unit_prof_df[
                (unit_prof_df.cc_sched >= 0) &
                (unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', 'cc_sched']]

        elif value == 'exec':
            #
            # Scheduling
            #
            df = unit_prof_df[
                (unit_prof_df.cc_exec >= 0) &
                (unit_prof_df.event == 'advance') &
                (unit_prof_df.sid == sid)
                ][['time', 'cc_exec']]

        else:
            raise Exception("Value %s unknown" % value)

        if 'metadata.barriers' in info and 'barrier_generation' in info['metadata.barriers']:
            barrier = BARRIER_GENERATION
        elif 'metadata.barriers' in info and 'barrier_client_submit' in info['metadata.barriers']:
            barrier = BARRIER_CLIENT_SUBMIT
        elif 'metadata.barriers' in info and 'barrier_agent_launch' in info['metadata.barriers']:
            barrier = BARRIER_AGENT_LAUNCH
        else:
            raise Exception("No barrier info found")

        df.columns = ['time', barrier]
        df['time'] -= df['time'].min()

        if first:
            df_all = df
        else:
            df_all = pd.merge(df_all, df,  on='time', how='outer')

        labels.append(barrier_legend[barrier])
        #colors.append(barrier_colors[barrier])

        first = False

    df_all.set_index('time', inplace=True)
    print df_all.head()
    #df_all.plot(colormap='Paired')
    ax = df_all.plot(color=colors, fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)

    # For this call assume that there is only one pilot per session
    ppn_values = get_ppn(unit_info_df, pilot_info_df, sid)
    assert len(ppn_values) == 1
    ppn = ppn_values.values()[0]

    legend = mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0)
    legend.get_frame().set_linewidth(BORDERWIDTH)
    if not paper:
        mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (value,
              info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=8)
    mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Concurrent Units", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(-10, 1500)
    mi = df_all.index.min()
    ma = df_all.index.max()
    mp.pyplot.xlim(mi - 0.01 * ma, ma * 1.01)
    #ax.get_xaxis().set_ticks([])

    [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    if paper:
        # width = 3.487
        width = 3.3
        height = width / 1.618
        #height = 2.7
        fig = mp.pyplot.gcf()
        fig.set_size_inches(width, height)
        # fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

        # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
        fig.tight_layout(pad=0.1)
        mp.pyplot.savefig('plot_cc_ew_barriers.pdf')
    else:
        mp.pyplot.savefig('plot6_%s%s.pdf' % (value, label))
    mp.pyplot.close()
Пример #25
0
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid):

    print "Plotting %s ..." % sid

    labels = []

    # Legend info
    info = info_df.loc[sid]

    mpi = get_mpi(unit_info_df, sid)
    #mpi = True
    # For this call assume that there is only one pilot per session
    lms = get_lm(unit_info_df, pilot_info_df, sid, mpi)
    assert len(lms) == 1
    launch_method = lms.values()[0]

    # For this call assume that there is only one pilot per session
    spawners = get_spawners(unit_info_df, pilot_info_df, sid)
    assert len(spawners) == 1
    spawner = spawners.values()[0]

    # For this call assume that there is only one pilot per session
    resources = get_resources(unit_info_df, pilot_info_df, sid)
    assert len(resources) == 1
    resource_label = resources.values()[0]

    # Get only the entries for this session
    uf = unit_info_df[unit_info_df['sid'] == sid]

    # Get only the entries for this session
    tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

    result = pd.value_counts(tuf['aec_complete'].notnull(), sort=False)

    # result = pd.value_counts(uf['state'].values, sort=False)
    print result

    # Only take completed CUs into account
    #tuf = tuf[tuf['Done'].notnull()]

    #print tuf['aec_after_exec']
    #print tuf['aec_complete']

    #print 'c:', c

    ax = result.plot(kind='pie', autopct='%.2f%%')
    ax.set_aspect('equal')

    print info
    #mp.pyplot.legend(labels, loc='upper left', fontsize=5)
    mp.pyplot.title(
        "%s (%s)\n"
        "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n"
        "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n"
        "RP: %s - RS: %s - RU: %s" %
        (sid,
         time.strftime("%a, %d %b %Y %H:%M:%S +0000",
                       time.gmtime(info['created'])),
         info['metadata.cu_count'], info['metadata.cu_cores'],
         info['metadata.cu_runtime'], info['metadata.pilot_cores'],
         resource_label, info['metadata.num_sub_agents'],
         info['metadata.num_exec_instances_per_sub_agent'],
         info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'],
         info['metadata.radical_stack.ru']),
        fontsize=8)

    mp.pyplot.savefig('%s_plot_states.pdf' % sid)
    mp.pyplot.close()
Пример #26
0
def plot(sids, paper=False):

    labels = []

    all_dict = {}

    for sid in sids:

        if sid.startswith('rp.session'):
            rp = True
        else:
            rp = False

        session_dir = os.path.join(PICKLE_DIR, sid)

        unit_info_df = pd.read_pickle(
            os.path.join(session_dir, 'unit_info.pkl'))
        pilot_info_df = pd.read_pickle(
            os.path.join(session_dir, 'pilot_info.pkl'))
        tr_unit_prof_df = pd.read_pickle(
            os.path.join(session_dir, 'tr_unit_prof.pkl'))
        session_info_df = pd.read_pickle(
            os.path.join(session_dir, 'session_info.pkl'))

        # Legend info
        info = session_info_df.loc[sid]

        # cu_cores = info['metadata.cu_cores']
        # cu_count = info['metadata.cu_count']
        cu_count = info['metadata.cu_cores']
        cu_cores = info['metadata.cu_count']
        cu_runtime = info['metadata.cu_runtime']

        if cu_count not in all_dict:
            all_dict[cu_count] = {}

        if cu_cores not in all_dict[cu_count]:
            all_dict[cu_count][cu_cores] = pd.Series()

        if rp:
            # For this call assume that there is only one pilot per session
            resources = get_resources(unit_info_df, pilot_info_df, sid)
            assert len(resources) == 1
            resource_label = resources.values()[0].replace('_', '\_')
        else:
            resource_label = 'bogus'

        # Get only the entries for this session
        tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

        # Only take completed CUs into account
        #tuf = tuf[tuf['Done'].notnull()]

        # We sort the units based on the order they arrived at the agent
        #tufs = tuf.sort('awo_get_u_pend')
        #tufs = tuf.sort('awo_adv_u')
        #tufs = tuf.sort('asic_get_u_pend')
        tufs = tuf.sort()

        try:
            all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(
                pd.Series((tufs['aec_after_exec'].max() -
                           tufs['asic_get_u_pend'].min() - cu_runtime)))
            #all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min())))
        except:
            print "Plotting failed for session: %s" % sid
            continue

    for key in all_dict:
        # print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(all_dict[key])
        print 'orte_ttc df:', orte_df

        #labels.append("%s" % resource_legend[key])
        labels.append("%s" % key)
        #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)
        ax = orte_df.mean().plot(kind='line',
                                 fontsize=TICK_FONTSIZE,
                                 linewidth=LINEWIDTH)
        ax.set_xscale('log', basex=10)
        ax.set_yscale('log', basey=10)

    # Horizontal reference
    # y_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    # ax = mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--')
    # labels.append("Optimal")

    print 'labels: %s' % labels
    mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE)
    if not paper:
        mp.pyplot.title(
            "TTC overhead for variable size CU.\n"
            "%d generations of a variable number of 'concurrent' CUs with variable number of cores with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s" %
            (info['metadata.generations'], info['metadata.cu_runtime'],
             resource_label, info['metadata.num_sub_agents'],
             info['metadata.num_exec_instances_per_sub_agent'],
             info['metadata.radical_stack.rp'],
             info['metadata.radical_stack.rs'],
             info['metadata.radical_stack.ru']),
            fontsize=8)
    mp.pyplot.xlabel("\# CUs", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("$TTC_{overhead}$ (s)", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylim(0)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([])
    # #ax.get_xaxis.set

    # [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()]
    # plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)
    # plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH)

    #width = 3.487
    #width = 3.3
    #height = width / 1.618
    # height = 2.7
    #fig = mp.pyplot.gcf()
    #fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    #fig.tight_layout(pad=0.1)
    #fig.tight_layout()

    mp.pyplot.savefig('plot_ttc_cu_cores.pdf')

    mp.pyplot.close()
def plot(sids, paper=False):

    labels = []

    for key in sids:

        orte_ttc = {}

        for sid in sids[key]:

            if sid.startswith('rp.session'):
                rp = True
            else:
                rp = False

            session_dir = os.path.join(PICKLE_DIR, sid)

            unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl'))
            pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl'))
            tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl'))
            session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl'))

            # Legend info
            info = session_info_df.loc[sid]

            cores = info['metadata.effective_cores']
            nodes = cores / 32

            if nodes not in orte_ttc:
                orte_ttc[nodes] = pd.Series()

            if rp:
                # For this call assume that there is only one pilot per session
                resources = get_resources(unit_info_df, pilot_info_df, sid)
                assert len(resources) == 1
                resource_label = resources.values()[0]
            else:
                resource_label = 'bogus'

            # Get only the entries for this session
            tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid]

            # Only take completed CUs into account
            #tuf = tuf[tuf['Done'].notnull()]

            # We sort the units based on the order they arrived at the agent
            #tufs = tuf.sort('awo_get_u_pend')
            #tufs = tuf.sort('awo_adv_u')
            #tufs = tuf.sort('asic_get_u_pend')
            tufs = tuf.sort()

            orte_ttc[nodes] = orte_ttc[nodes].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min())))

        print 'orte_ttc raw:', orte_ttc
        #print 'orte_ttc mean:', orte_ttc.mean()
        orte_df = pd.DataFrame(orte_ttc)
        print 'orte_ttc df:', orte_df

        labels.append("%s" % resource_legend[key])
        ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH)

    # ORTE only
    # Data for BW
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+')
    # Data for Stampede
    #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+')
    #labels.append("ORTE-only (C)")

    # Horizontal reference
    y_ref = info['metadata.generations'] * info['metadata.cu_runtime']
    mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH)
    labels.append("Optimal")

    print 'labels: %s' % labels
    location = 'upper left'
    mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, markerscale=0)
    if not paper:
        mp.pyplot.title("TTC for a varying number of 'concurrent' Full-Node CUs.\n"
            "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n"
            "Constant number of %d sub-agent with %d ExecWorker(s) each.\n"
            "RP: %s - RS: %s - RU: %s"
           % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label,
              info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'],
              info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']
              ), fontsize=TITLE_FONTSIZE)
    mp.pyplot.xlabel("\# Nodes", fontsize=LABEL_FONTSIZE)
    mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE)
    #mp.pyplot.ylim(0)
    #mp.pyplot.ylim(290, 500)
    #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([])
    # #ax.get_xaxis.set

    #width = 3.487
    width = 3.3
    height = width / 1.618
    # height = 2.7
    fig = mp.pyplot.gcf()
    fig.set_size_inches(width, height)
    #fig.subplots_adjust(left=0, right=1, top=1, bottom=1)

    #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1)
    fig.tight_layout(pad=0.1)

    mp.pyplot.savefig('plot_ttc_full_node.pdf')

    mp.pyplot.close()