示例#1
0
def getMetaData(explist):
    # Get DataFrame of All Experiments
    df_All = analysis.getAllExperiments()

    # Get Cori Runs
    df_Cori = df_All.loc[df_All['Experiment'].isin(explist)]

    # Create Meta DataFrame
    df_Cori_Meta = pd.DataFrame(
        columns=['Workload', 'Ranks', 'Stencil', 'Runtime'])

    # Loops Over All Cori Runs
    for df in [df_Cori]:
        for run in range(0, len(df)):
            currentRun = df.iloc[run]

            eid = currentRun['Experiment']
            workload = currentRun['workload']
            ranks = currentRun['processors']
            stencil = currentRun['stencil_size']
            rab_work = currentRun['rabbit_workload']
            rid = currentRun['expid']

            currentPath = './mlruns/' + str(eid) + '/' + str(
                rid) + '/artifacts/bsp-trace.json'

            # Print Progress
            print(eid, currentPath)

            # Check that Rabbit is Off
            if (rab_work == 0):
                # Get Data
                currentData = analysis.getData(currentPath)
                currentData = currentData[currentData['rank'] == 0]

                # Calculate Total Runtime
                currentRunTime = currentData['interval_max_usec'].sum(
                ) / 1000000

                # Add to Metadata
                df_Cori_Meta.loc[len(df_Cori_Meta)] = [
                    workload,
                    int(ranks),
                    int(stencil), currentRunTime
                ]

    return df_Cori_Meta
def get_formated_data(df_platform_NoStencil, workload, processors, fname):

    if path.isfile(fname):
        data = np.load(fname)

    else:

        ppn = 32

        data_platform = analysis.getData(df_platform_NoStencil)
        data_platform = data_platform[[
            'workload', 'node', 'rank', 'iteration', 'iterations',
            'workload_usec'
        ]]
        data_platform_Iteration0 = data_platform[(
            data_platform['iteration'] == 0)]

        print(data_platform.head())

        nodes = data_platform['node'].unique()
        node_counter = np.zeros((len(nodes), 1))

        iterations = data_platform['iterations'].iloc[0]
        data = np.zeros((int(processors / ppn), ppn, iterations))

        for item in range(0, len(data_platform_Iteration0)):
            cur = data_platform_Iteration0.iloc[item]

            for node in nodes:
                if node == cur['node']:
                    i = int(np.where(nodes == node)[0][0])
                    j = int(node_counter[i])
            node_counter[i] = node_counter[i] + 1

            curData = data_platform[data_platform['rank'] == cur['rank']]

            for k in range(0, iterations):
                temp = curData.iloc[k]
                data[i, j, k] = temp['workload_usec']
        # save data into a npy
        np.save(fname, data)

    return data
示例#3
0
def generatePredictionFigure( df_All, workload, baseRanks, CI, fname ):
    print( 'Generating Prediction for: ' + workload )

    outfile1 = 'Figures/Attaway_' + workload + '_prediction'
    outfile2 = 'Figures/Attaway_' + workload + '_prediction_stencil'

    coriData = pd.read_csv( fname )
    workloadData = coriData[ ( coriData['Workload'] == workload ) & ( coriData['Ranks'] == baseRanks )]

    kvals = np.array( [1, 2, 4, 8, 16] )
    iterations = 50

    MILLION = 1000000
    eps = 10 ** -5
    yMin = 0
    yMax = 0
    count = 0

    fig1 = plt.figure( 1, figsize=( 7, 3 ) )
    fig2 = plt.figure( 2, figsize=( 7, 3 ) )

    count = 0
    labels = ['No Stencil', 'Stencil']
    if workload == 'hpcg' or workload == 'lammps':
        df_Attaway_NoStencil = df_All[ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] == 0 ) ]
        df_List = [df_Attaway_NoStencil]
    else:
        df_Attaway_NoStencil = df_All[ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] == 0 ) ]
        df_Attaway_Stencil = df_All [ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] != 0 ) ]
        df_List = [df_Attaway_NoStencil, df_Attaway_Stencil]

    for df in df_List:

        print( labels[count] )

        lowerBound = []
        middle = []
        upperBound = []

        if ( count == 0 ):
            runtimeData = workloadData[workloadData['Stencil'] == 0]['Runtime']
        else:
            runtimeData = workloadData[workloadData['Stencil'] != 0]['Runtime']

        runtimeData = runtimeData.sort_values()

        minRuntime = min( runtimeData )
        medianRuntime = runtimeData.iloc[ int( len( runtimeData ) / 2  ) ]
        maxRuntime = max( runtimeData )

        print( workload, minRuntime, medianRuntime, maxRuntime )

        for k in kvals:
            expListMin = []
            expListMed = []
            expListMax = []

            for run in range( 0, len( df ) ):
                currentRun = df.iloc[run]
                eid = currentRun['Experiment']
                rid = currentRun['expid']
                currentPath = './mlruns/' + str( eid ) + '/' + str( rid ) + '/artifacts/bsp-trace.json'
                currentData = analysis.getData( currentPath )
                currentData = currentData[currentData['rank'] == 0]
                currentRuntime = sum( currentData['interval_max_usec'] ) / MILLION

                if ( abs( currentRuntime - maxRuntime ) < eps ) or ( abs( currentRuntime - medianRuntime ) < eps ) or ( abs( currentRuntime - minRuntime ) < eps ):
                    for i in range( iterations ):
                        data = analysis.resample_project( currentData, len( currentData ), k, col='interval_max_usec' )
                        projectedRunTime = sum( data ) / MILLION
                        print( 'Run:', run + 1, '/', len( df ), '\tIteration:', i + 1, '/', iterations, '\tWorkload:', workload, '\tk:', k, '\tProjected Runtime:', projectedRunTime )

                        if ( abs( currentRuntime - minRuntime ) < eps ):
                            expListMin.append( projectedRunTime )
                        if ( abs( currentRuntime - medianRuntime ) < eps ):
                            expListMed.append( projectedRunTime )
                        if ( abs( currentRuntime - maxRuntime ) < eps ):
                            expListMax.append( projectedRunTime )

            expListMin.sort()
            expListMed.sort()
            expListMax.sort()

            meanMin = np.mean( np.array( expListMin ) )
            stvMin = stats.sem( np.array( expListMin ) )

            meanMed = np.mean( np.array( expListMed ) )
            stvMed = stats.sem( np.array( expListMed ) )

            meanMax = np.mean( np.array( expListMax ) )
            stvMax = stats.sem( np.array( expListMax ) )

            intervalMin = stats.norm.interval( CI, loc=meanMin, scale=stvMin )
            intervalMed = stats.norm.interval( CI, loc=meanMed, scale=stvMed )
            intervalMax = stats.norm.interval( CI, loc=meanMax, scale=stvMax )

            print( CI, '$ Confidence Interval:', intervalMin[0], intervalMax[1], '\tMedian:', ( intervalMed[0] + intervalMed[1] ) / 2 )

            lowerBound.append( intervalMin[0] )
            middle.append( ( intervalMed[0] + intervalMed[1] ) / 2 )
            upperBound.append( intervalMax[1] )

        if count == 0:
            plt.figure( 1 )
            _ = plt.plot( baseRanks * kvals, middle, color='black' )
            _ = plt.fill_between( baseRanks * kvals, lowerBound, upperBound, color='blue', alpha=0.9 )
        if count == 1:
            plt.figure( 2 )
            _ = plt.plot( baseRanks * kvals, middle, color='black' )
            _ = plt.fill_between( baseRanks * kvals, lowerBound, upperBound, color='blue', alpha=0.9 )

        if count == 0:
            yMin = min( lowerBound)
            yMax = max( upperBound)
        else:
            if yMin > min( lowerBound ):
                yMin = min( lowerBound )
            if yMax < max( upperBound ):
                yMax = max( upperBound )

        count = count + 1

    attawayData = pd.read_csv( fname )

    baseranks0 = list()
    baseRuntime0 = list()

    ranks0 = list()
    currentRuntime0 = list()

    baseranks1 = list()
    baseRuntime1 = list()

    ranks1 = list()
    currentRuntime1 = list()

    for run in range( 0, len( attawayData ) ):
        label = ''
        myworkload = attawayData['Workload'][run]
        ranks = attawayData['Ranks'][run]
        stencil = attawayData['Stencil'][run]

        currentRunTime = attawayData['Runtime'][run]

        if int( stencil ) > 0:
            label = label + 'Stencil'

        if label == '':
            if myworkload == workload:
                if ranks == baseRanks:
                    baseranks0.append( int( ranks ) )
                    baseRuntime0.append( currentRunTime )
                    print( 'No Stencil - BaseRanks', workload, ranks, currentRunTime )
                else:
                    ranks0.append( int( ranks ) )
                    currentRuntime0.append( currentRunTime )
                    print( 'No Stencil - Other', workload, ranks, currentRunTime )

        if not ( workload == 'hpcg' or workload == 'lammps' ):
            if label == 'Stencil':
                if myworkload == workload:
                    if ranks == baseRanks:
                        baseranks1.append( int( ranks ) )
                        baseRuntime1.append( currentRunTime )
                        print( 'Stencil - BaseRanks', workload, ranks, currentRunTime )
                    else:
                        ranks1.append( int( ranks ) )
                        currentRuntime1.append( currentRunTime )
                        print( 'Stencil - Other', workload, ranks, currentRunTime )

    print( baseranks0 )
    print( baseRuntime0 )
    print( ranks0 )
    print( currentRuntime0 )
    print( baseranks1 )
    print( baseRuntime1 )
    print( ranks1 )
    print( currentRuntime1 )

    if ( workload == 'sleep' ):
        w = 'ftq'
    else:
        w = workload

    plt.figure( 1 )
    _ = plt.plot( baseranks0, baseRuntime0, 'o', color='black', label='Sample workload' )
    _ = plt.plot( ranks0, currentRuntime0, 'o', color='red', label='Scaled-up workload' )
    _ = plt.ylim( 0.95 * yMin, 1.05 * yMax )
    _ = plt.title( 'Attaway ' + w + ' - per run bootstrap, global CIs' )
    _ = plt.xlabel( 'Number of Ranks' )
    _ = plt.ylabel( 'Runtime (s)' )
    _ = plt.legend( loc='lower right', borderaxespad=0.5 )
    plt.tight_layout()
    fig1.savefig( outfile1 )
    plt.close( fig1 )

    if ( workload != 'hpcg' and workload != 'lammps' ):
        plt.figure( 2 )
        _ = plt.plot( baseranks1, baseRuntime1, 'o', color='black', label='Sample workload' )
        _ = plt.plot( ranks1, currentRuntime1, 'o', color='red', label='Scaled-up workload' )
        _ = plt.ylim( 0.95 * yMin, 1.05 * yMax )
        _ = plt.title( 'Attaway ' + w + ' - per run bootstrap, global CIs - Stencil' )
        _ = plt.xlabel( 'Number of Ranks' )
        _ = plt.ylabel( 'Runtime (s)' )
        _ = plt.legend( loc='lower right', borderaxespad=0.5 )

        plt.tight_layout()
        fig2.savefig( outfile2 )
    plt.close( fig2 )