Python getCoverageStats示例，logproj.P8_performanceAssessment.utilities_movements.getCoverageStats Python示例

示例#1

0

显示文件

文件： network_analysis.py 项目： aletuf93/logproj

def F_networkStatistics(D_mov,
                        terminalfieldFrom='LOADING_NODE',
                        terminalfieldto='DISCHARGING_NODE',
                        capacityField='QUANTITY',
                        actual=False,
                        timeColumns={}):
    #crea una cartella per i risultati dei vessel
    outputFigure = {}
    sailingTime = pd.DataFrame()

    if actual == 'PROVISIONAL':
        accuracy, _ = getCoverageStats(D_mov,
                                       analysisFieldList=[
                                           timeColumns['dischargingpta'],
                                           timeColumns['loadingptd']
                                       ],
                                       capacityField=capacityField)
        #calcolo le distanze (come tempi di navigazione)
        D_mov['sailingTime'] = timeStampToDays(
            D_mov[timeColumns['dischargingpta']] -
            D_mov[timeColumns['loadingptd']])

    elif actual == 'ACTUAL':
        accuracy, _ = getCoverageStats(D_mov,
                                       analysisFieldList=[
                                           timeColumns['dischargingata'],
                                           timeColumns['loadingatd']
                                       ],
                                       capacityField=capacityField)
        #calcolo le distanze (come tempi di navigazione)
        D_mov['sailingTime'] = timeStampToDays(
            D_mov[timeColumns['dischargingata']] -
            D_mov[timeColumns['loadingatd']])

    D_filterActual = D_mov.dropna(subset=['sailingTime'])
    sailingTime = D_filterActual.groupby(
        [terminalfieldFrom,
         terminalfieldto])['sailingTime'].mean().reset_index()

    sailingTime = D_filterActual.groupby([terminalfieldFrom,
                                          terminalfieldto]).agg({
                                              'sailingTime':
                                              ['mean', 'std', 'size']
                                          }).reset_index()
    sailingTime.columns = list(map(''.join, sailingTime.columns.values))

    fig1 = plotGraph(sailingTime,
                     terminalfieldFrom,
                     terminalfieldto,
                     'sailingTimemean',
                     'sailingTimesize',
                     'Network flow',
                     arcLabel=False)
    outputFigure[f"NetworkGraph_{actual}"] = fig1

    sailingTime['accuracy'] = [accuracy for i in range(0, len(sailingTime))]

    return outputFigure, sailingTime

示例#2

0

显示文件

文件： level_of_service_assessment.py 项目： aletuf93/logproj

def calculateLoS(D_mov,
                           capacityField='QUANTITY',
                           timeColumns={}
                           ):

    output_figure={}
    coverages=pd.DataFrame()


    if all( column in timeColumns.keys() for column in ['loadingptd','dischargingpta',
                                                        'loadingatd','dischargingata']):
        columnsNeeded = [timeColumns['loadingptd'], timeColumns['dischargingpta'],
                         timeColumns['loadingatd'], timeColumns['dischargingata']]

        accuracy, _ = getCoverageStats(D_mov,analysisFieldList=columnsNeeded,capacityField=capacityField)

        D_time = D_mov.dropna(subset=columnsNeeded)

        plannedTime =  D_time[timeColumns['dischargingpta']] - D_time[timeColumns['loadingptd']]
        actualTime =   D_time[timeColumns['dischargingata']] -  D_time[timeColumns['loadingatd']]

        Los = actualTime<plannedTime
        D_res = Los.value_counts()

        fig1=plt.figure()
        plt.pie(D_res,autopct='%1.1f%%', shadow=True, startangle=90,labels=D_res.index)
        plt.title('Level of Service')

        output_figure['level_of_service']=fig1

        coverages=pd.DataFrame([accuracy])

    return output_figure, coverages

示例#3

0

显示文件

文件： client_demand_assessment.py 项目： aletuf93/logproj

def violinPlantTerminal(D_mov,
                        plantField='LOADING_NODE',
                        clientField='DISCHARGING_NODE',
                        capacityField='QUANTITY'):
    # la funzione realizza un plot a violino per ogni nodo produttivo (plant)
    #indicando le quantita' movimentate verso ogni cliente

    output_figure = {}
    output_df = {}

    accuracy, _ = getCoverageStats(D_mov, [clientField, plantField],
                                   capacityField=capacityField)
    df_out = pd.DataFrame([accuracy])

    D_clientTerminal = D_mov.groupby([plantField, clientField
                                      ]).sum()[capacityField].reset_index()

    #f, ax = plt.subplots(figsize=(7, 7))
    #ax.set(yscale="log")
    fig = plt.figure()
    sns.violinplot(x=plantField,
                   y=capacityField,
                   data=D_clientTerminal,
                   palette="muted")
    output_figure['violin_plant_client'] = fig
    output_df['violin_plant_client_coverages'] = df_out

    return output_figure, output_df

示例#4

0

显示文件

def itemSharePieGraph(D_mov, itemfield, capacityField='QUANTITY'):

    #itemfield rappresenta i codici di famiglie di prodotto per rappresentarne la quota di mercato
    #capacityField e' un campo di capacita' per calcolare le coperture

    #calcolo le coperture
    accuracy, _ = getCoverageStats(D_mov, itemfield, capacityField='QUANTITY')

    #TEU-FEU share
    D_movType = D_mov.groupby([itemfield]).size().reset_index()
    D_movType = D_movType.rename(columns={0: 'Percentage'})
    labels = D_movType[itemfield]
    sizes = D_movType.Percentage
    explode = 0.1 * np.ones(len(sizes))
    fig1, ax1 = plt.subplots(figsize=(20, 10))
    plt.pie(sizes,
            explode=explode,
            labels=labels,
            autopct='%1.1f%%',
            shadow=True,
            startangle=90)
    ax1.axis(
        'equal')  # Equal aspect ratio ensures that pie is drawn as a circle

    #creo tabella per tipo di container
    D_movCode = D_mov.groupby([itemfield]).size().reset_index()
    D_movCode = D_movCode.rename(columns={0: 'Quantity'})
    D_movCode = D_movCode.sort_values(['Quantity'], ascending=False)
    #D_movCode.to_excel(dirResults+'\\02-ContainerTypeStats.xlsx')

    D_movCode['accuracy'] = [accuracy for i in range(0, len(D_movCode))]

    return fig1, D_movCode

示例#5

0

显示文件

文件： demand_assessment.py 项目： aletuf93/logproj

def getAdvanceInPlanning(
        D_mov, loadingptafield='LOADING_TIME_WINDOWS_PROVISIONAL_START'):
    #la funzione calcola la distribuzione del tempo di anticipo di pianificazione
    #come differenza fra il timestamp_ in e la finestra temporale di carico

    output_figure = {}
    output_data = {}
    output_coverage = {}

    #filterNan
    D_mov_filtered = D_mov[['TIMESTAMP_IN', loadingptafield]].dropna()

    if len(D_mov_filtered) == 0:
        return output_figure, pd.DataFrame(
            ['No PTA fields to perform this analysis'])
    if loadingptafield == 'TIMESTAMP_IN':  #se uso la stessa colonna ottengo 0
        mean_advanceInPlanning = std_advanceInPlanning = 0
        advanceInPlanningDistribution = []
    else:
        advanceInPlanning = D_mov_filtered[loadingptafield] - D_mov_filtered[
            'TIMESTAMP_IN']
        advanceInPlanningD = advanceInPlanning.dt.components['days']
        advanceInPlanningH = advanceInPlanning.dt.components['hours']
        advanceInPlanningM = advanceInPlanning.dt.components['minutes']
        advanceInPlanning = advanceInPlanningD + advanceInPlanningH / 24 + advanceInPlanningM / (
            60 * 24)
        advanceInPlanning = advanceInPlanning[advanceInPlanning > 0]
        mean_advanceInPlanning = np.mean(advanceInPlanning)
        std_advanceInPlanning = np.std(advanceInPlanning)
        advanceInPlanningDistribution = advanceInPlanning

    if len(advanceInPlanningDistribution) > 0:
        #Advance in planning
        fig_planningAdvance = plt.figure()
        plt.title('Days of advance in booking')
        plt.hist(advanceInPlanning,
                 color='orange',
                 bins=np.arange(0, max(advanceInPlanningDistribution), 1))
        plt.xlabel('days')
        plt.ylabel('N.ofBooks')

        #output_figure
        output_figure['ADVANCE_IN_PLANNING'] = fig_planningAdvance

    #output_data
    output_data['ADVANCE_PLANNING_MEAN'] = mean_advanceInPlanning
    output_data['ADVANCE_PLANNING_STD'] = std_advanceInPlanning
    output_data['SERIES'] = advanceInPlanningDistribution

    #get coverage
    output_coverage['ADVANCE_PLANNING_MEAN'] = getCoverageStats(
        D_mov, loadingptafield, capacityField='QUANTITY')
    output_coverage['ADVANCE_PLANNING_STD'] = output_coverage[
        'ADVANCE_PLANNING_MEAN']
    output_coverage['SERIES'] = output_coverage['ADVANCE_PLANNING_MEAN']

    D_global = pd.DataFrame([output_data, output_coverage]).transpose()
    D_global.columns = ['VALUE', 'ACCURACY']

    return output_figure, D_global

示例#6

0

显示文件

文件： level_of_service_assessment.py 项目： aletuf93/logproj

def travelTimedistribution(D_mov,
                           capacityField='QUANTITY',
                           loadingTA='PTA_FROM',
                           loadingTD='PTD_FROM',
                           dischargingTA='PTA_TO',
                           dischargingTD='PTD_TO',
                           ):

    df_traveltime=pd.DataFrame(columns=['U_L_BOUND','TIME_MEAN','TIME_STD'])
    imageResults={}

    #get coverage
    accuracy_ub, _ = getCoverageStats(D_mov,analysisFieldList=[dischargingTD, loadingTA],capacityField=capacityField)


    #Expected travel time per container (UPPER BOUND)
    ExpectedTravelTime_ub=ts.timeStampToDays(D_mov[dischargingTD]-D_mov[loadingTA])


    ExpectedTravelTime_ub=ExpectedTravelTime_ub[ExpectedTravelTime_ub>0]
    mean_ExpectedTravelTime=np.mean(ExpectedTravelTime_ub)
    std_ExpectedTravelTime=np.std(ExpectedTravelTime_ub)

    data={'U_L_BOUND':'upperBound',
          'TIME_MEAN':mean_ExpectedTravelTime,
          'TIME_STD':std_ExpectedTravelTime,
          'accuracy':str(accuracy_ub) }
    temp=pd.DataFrame(data,index=[0])
    df_traveltime=df_traveltime.append(temp)



    #aspetto di graficare il LB e poi salvo


    #get coverage
    accuracy_lb, _ = getCoverageStats(D_mov,analysisFieldList=[dischargingTA, loadingTD],capacityField=capacityField)
    #Expected travel time per container (LOWER BOUND)
    ExpectedTravelTime_lb=ts.timeStampToDays(D_mov[dischargingTA]-D_mov[loadingTD])


    ExpectedTravelTime_lb=ExpectedTravelTime_lb[ExpectedTravelTime_lb>0]
    mean_ExpectedTravelTime=np.mean(ExpectedTravelTime_lb)
    std_ExpectedTravelTime=np.std(ExpectedTravelTime_lb)




    data={'U_L_BOUND':'lowerBound',
          'TIME_MEAN':mean_ExpectedTravelTime,
          'TIME_STD':std_ExpectedTravelTime,
          'accuracy':str(accuracy_lb)}
    temp=pd.DataFrame(data,index=[0])
    df_traveltime=df_traveltime.append(temp)

    # salvo figura
    #definisco udm
    udm='days'
    value_ub=ExpectedTravelTime_ub
    value_lb=ExpectedTravelTime_lb
    if mean_ExpectedTravelTime<1/24/60:
        udm='minutes'
        value_ub=ExpectedTravelTime_ub*24*60
        value_lb=ExpectedTravelTime_lb*24*60

    elif mean_ExpectedTravelTime<1: #se ho dei numeri inferiori all'unita', cambio udm
        udm='hours'
        value_ub=ExpectedTravelTime_ub*24
        value_lb=ExpectedTravelTime_lb*24

    fig1=plt.figure()
    plt.hist(value_ub,color='orange')
    plt.hist(value_lb,color='blue',alpha=0.6)
    plt.title(f"Travel time ({udm})")
    plt.xlabel(f"{udm}")
    plt.ylabel('Quantity')
    plt.legend(['Upper bound','Lower bound'])

    imageResults[f"travel_time_per_movement"]=fig1

    return imageResults, df_traveltime

示例#7

0

显示文件

文件： level_of_service_assessment.py 项目： aletuf93/logproj

def checkPlannedActual(D_mov,locfrom = 'LOADING_NODE',
                            locto= 'DISCHARGING_NODE',
                            capacityField='QUANTITY',
                            voyagefield ='VOYAGE_CODE',
                            vehiclefield='VEHICLE_CODE',
                            timeColumns={}):

    df_results={}
    output_figure={}

    D = createTabellaMovimenti( D_mov,
                                locfrom = locfrom,
                                locto= locto,
                                capacityField=capacityField,
                                timeColumns=timeColumns
                                )
    if any(column not in D.columns for column in ['PTA','PTD','ATA','ATD']):
        print ("WARNING: no actual and provisional columns in D_mov")
        return output_figure, df_results
    accuracy, _ = getCoverageStats(D_mov,analysisFieldList=[locfrom, locto, voyagefield, vehiclefield,*list(timeColumns.values())
                                                                          ],capacityField='QUANTITY')

    D_movimenti=D.groupby([vehiclefield,'Location','PTA','PTD','ATA','ATD',voyagefield])['Movementquantity'].sum().reset_index()
    D_movimenti['AsPlanned']=True #memorizzo anche in tabella movimenti se ho rispettato le route
    colsCheckRoute=['VoyageCode','PlanPerformed']
    results_route=pd.DataFrame(columns=colsCheckRoute)

    colsCheckArcs=['VoyageCode','plannedLocation','actualLocation']
    results_arcExchange=pd.DataFrame(columns=colsCheckArcs)

    #identifico le route
    routeCode=np.unique(D_movimenti[voyagefield][~D_movimenti[voyagefield].isna()])
    for i in range(0,len(routeCode)):
        codiceRoute=routeCode[i]
        dataRoute=D_movimenti[D_movimenti[voyagefield]==codiceRoute]

        #ordino per PLANNED
        sortpl=dataRoute.sort_values(by='PTA')
        ordinePlanned=sortpl.index.values

        #ordino per ACTUAL
        sortact=dataRoute.sort_values(by='ATA')
        ordineActual=sortact.index.values

        check=all(ordineActual==ordinePlanned)

        if(check): #la route è eseguita come pianificato
            #aggiorno tabella voyage
            temp=pd.DataFrame([[codiceRoute,True]],columns=colsCheckRoute);
            results_route=results_route.append(temp)
        else: #la route non è eseguita come pianificato
            #aggiorno tabella voyage
            temp=pd.DataFrame([[codiceRoute,False]],columns=colsCheckRoute);
            results_route=results_route.append(temp)

            #aggiorno tabella  arc exchange

            #identifico gli indici incriminati
            indexFrom=sortpl[~(ordineActual==ordinePlanned)].index.values
            indexTo=sortact[~(ordineActual==ordinePlanned)].index.values

            locFrom=dataRoute.Location[indexFrom]
            locTo=dataRoute.Location[indexTo]
            for j in range(0,len(locFrom)):
                temp=pd.DataFrame([[codiceRoute,locFrom.iloc[j],locTo.iloc[j]]],columns=colsCheckArcs);
                results_arcExchange=results_arcExchange.append(temp)

            #Segno in tabella movimenti se il tragitto pianificato è stato rispettato
            D_movimenti.loc[(D_movimenti[voyagefield]==codiceRoute) & (D_movimenti.Location.isin(locFrom)),'AsPlanned']=False



    #calcolo statistiche sulle modifiche
    stat_exchange=results_arcExchange.groupby(['plannedLocation','actualLocation']).size().reset_index()
    stat_exchange.rename(columns={0:'count'}, inplace=True)
    stat_exchange=stat_exchange.sort_values(by='count',ascending=False)

    stat_exchange['accuracy']= [accuracy for i in range(0,len(stat_exchange))]
    results_route['accuracy']= [accuracy for i in range(0,len(results_route))]

    df_results['routeExchange'] = stat_exchange
    df_results['routeExecutedAsPlanned'] = results_route



    #creo pie-chart con la percentuale di route rispettate


    sizes=results_route.groupby(['PlanPerformed']).size()
    labels=sizes.index.values
    explode = 0.1*np.ones(len(sizes))

    fig1, ax1 = plt.subplots(figsize=(20,10))
    plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
            shadow=True, startangle=90)
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
    plt.title('Route as planned')
    output_figure['routeAsPlannedPie']=fig1



    # calcolo un differenziale planned-actual anche a seconda di quanto siamo lontani nel tempo dalla creazione del Record

    D_movimenti['latenessTD']=lateness_TD=ts.timeStampToDays(D_movimenti.ATD-D_movimenti.PTD)
    D_movimenti['tardinessTD']=tardiness_TD=lateness_TD.clip(0,None) #azzera tutti i valori fuori dall'intervallo 0, +inf
    lateness_TD_mean=np.mean(lateness_TD)
    tardiness_TD_mean=np.mean(tardiness_TD)

    lateness_TA=ts.timeStampToDays(D_movimenti.ATA-D_movimenti.PTA)
    tardiness_TA=lateness_TA.clip(0,None)
    lateness_TA_mean=np.mean(lateness_TA)
    tardiness_TA_mean=np.mean(tardiness_TA)


    gap_handling=ts.timeStampToDays((D_movimenti.ATD-D_movimenti.ATA) - (D_movimenti.PTD-D_movimenti.PTA))
    handling_gap_mean=np.mean(gap_handling)

    cols=['mean lateness - dep.','mean lateness - arr.','mean tardiness - dep.','mean tardiness - arr.','mean handling gap']
    schedule_results=pd.DataFrame([[lateness_TD_mean,lateness_TA_mean,tardiness_TD_mean,tardiness_TA_mean,handling_gap_mean]],columns=cols)
    schedule_results['accuracy']= [accuracy for i in range(0,len(schedule_results))]

    df_results['schedule_results'] = schedule_results

    return output_figure, df_results

示例#8

0

显示文件

文件： client_demand_assessment.py 项目： aletuf93/logproj

def paretoNodeClient(D_mov,
                     clientfield='KLANT',
                     locationfromfield='LOADING_NODE',
                     locationtofield='DISCHARGING_NODE',
                     vehiclefield='VEHICLE_CODE',
                     capacityField='QUANTITY'):
    outputfigure = {}
    output_df = {}

    #se sono gli stessi campi non riesco a cumulare nulla
    if (clientfield == locationfromfield) | (clientfield == locationtofield):
        print("Same field for client and location from/to. Cannot proceed")
        return outputfigure, output_df
    for barge in set(D_mov[vehiclefield]):
        #print(barge)

        #filtro il dataframe
        D_clNode = D_mov[D_mov[vehiclefield] == barge]
        if len(D_clNode) > 0:
            # calcolo le coperture
            accuracy, _ = getCoverageStats(D_clNode, [
                clientfield, locationfromfield, locationtofield, vehiclefield
            ],
                                           capacityField=capacityField)

            D_clNode_from = pd.DataFrame(
                D_clNode.groupby([clientfield,
                                  locationtofield]).size()).reset_index()
            D_clNode_from = D_clNode_from.rename(
                columns={locationtofield: 'Location'})

            D_clNode_to = pd.DataFrame(
                D_clNode.groupby([clientfield,
                                  locationfromfield]).size()).reset_index()
            D_clNode_to = D_clNode_to.rename(
                columns={locationfromfield: 'Location'})

            D_clNode_all = pd.concat([D_clNode_from, D_clNode_to], axis=0)
            D_clNode_all = D_clNode_all.sort_values(by=0, ascending=False)
            D_clNode_all = D_clNode_all.dropna()
            D_clNode_all = D_clNode_all.reset_index(drop=True)

            #elimino le location già incontrate
            setLocation = []
            for row in D_clNode_all.iterrows():
                index = row[0]
                rr = row[1]
                if str(rr.Location).lower().strip() in setLocation:
                    D_clNode_all = D_clNode_all.drop(index)
                else:
                    setLocation.append(str(rr.Location).lower().strip())

            #aggiungo nodi che non cumuano nulla per rappresentarli nella pareto
            D_clNode_all = D_clNode_all.groupby([clientfield
                                                 ])['Location'].nunique()
            D_clNode_all = pd.DataFrame(D_clNode_all)
            for client in set(D_clNode[clientfield]):
                if client not in D_clNode_all.index.values:
                    #print(client)
                    temp = pd.DataFrame([0],
                                        index=[client],
                                        columns=['Location'])
                    D_clNode_all = pd.concat([D_clNode_all, temp])

            D_clNode_all = pd.DataFrame(D_clNode_all)
            D_clNode_all['Client'] = D_clNode_all.index.values
            D_clNode_all['accuracy'] = [
                accuracy for i in range(0, len(D_clNode_all))
            ]

            titolo = f"BargeCode: {barge}"
            fig = paretoChart(D_clNode_all, 'Client', 'Location', titolo)
            outputfigure[f"pareto_vehicle_{barge}"] = fig
            output_df[f"pareto_vehicle_{barge}"] = D_clNode_all
    return outputfigure, output_df

示例#9

0

显示文件

文件： client_demand_assessment.py 项目： aletuf93/logproj

def clientStatistics(D_mov,
                     clientfield='KLANT',
                     itemfamily='ContainerSize',
                     capacityfield='QUANTITY'):

    imageResult = {}
    df_results = pd.DataFrame()

    accuracy, _ = getCoverageStats(D_mov,
                                   clientfield,
                                   capacityField='QUANTITY')
    D_OrderPerClient = D_mov.groupby([clientfield]).size().reset_index()
    D_OrderPerClient = D_OrderPerClient.rename(columns={0: 'TotalOrders'})
    D_OrderPerClient = D_OrderPerClient.sort_values([clientfield])

    #creo pie-chart
    labels = D_OrderPerClient[clientfield]
    sizes = D_OrderPerClient.TotalOrders
    explode = 0.1 * np.ones(len(sizes))

    fig1, ax1 = plt.subplots(figsize=(20, 10))
    plt.pie(sizes,
            explode=explode,
            labels=labels,
            autopct='%1.1f%%',
            shadow=True,
            startangle=90)
    ax1.axis(
        'equal')  # Equal aspect ratio ensures that pie is drawn as a circle
    #plt.title('Orders per client')
    #fig1.savefig(dirResults+'\\03-ClientDiagnosis_OrdersPerClient.png')
    imageResult['clients_pie'] = fig1

    #Conto TEU e FEU per cliente
    D_movTypePerClient = D_mov.groupby([clientfield,
                                        itemfamily]).size().reset_index()
    D_movTypePerClient = D_movTypePerClient.rename(
        columns={0: 'TotalContainer'})
    D_movTypePerClient = D_movTypePerClient.pivot(index=clientfield,
                                                  columns=itemfamily,
                                                  values='TotalContainer')

    #cols=['TEU','FEU','L5GO']
    #D_movTypePerClient=D_movTypePerClient[cols]

    D = pd.merge(D_movTypePerClient,
                 D_OrderPerClient,
                 left_on=[clientfield],
                 right_on=[clientfield])
    D = D.fillna(0)

    #Fare un salvataggio finale della tabella per cliente
    df_results = D
    #aggiungo accuratess
    df_results['accuracy'] = [accuracy for i in range(0, len(df_results))]

    #pareto sulle capacità prenotate per cliente
    D_capacityPerClient = D_mov.groupby([clientfield
                                         ])[capacityfield].sum().reset_index()
    fig1 = paretoChart(D_capacityPerClient, clientfield, capacityfield,
                       'Pareto clients')

    imageResult['paretoClient'] = fig1
    return imageResult, df_results

示例#10

0

显示文件

def itemLifeCycle(D_mov,
                  itemfield='CONTAINER',
                  locationfrom='LOADING_NODE',
                  locationto='DISCHARGING_NODE',
                  capacityField='QUANTITY',
                  timeColumns={},
                  sortTimefield='PTA_FROM',
                  numItemTosave=1):
    # costruisce il ciclo di vita di carico/scarico per ogni itemfield. Gli itemfield devono rappresentare prodotti/unita' di
    #carico fisicamente diverse

    df_lifeCycle = {}
    figureOutput = {}

    #verifico di avere tutte le colonne necessarie
    if all(column in timeColumns.keys() for column in
           ['loadingpta', 'loadingptd', 'dischargingpta', 'dischargingptd']):

        #Container lifeCycle
        D_movLifeCycle = D_mov.groupby([itemfield]).size().reset_index()
        D_movLifeCycle = D_movLifeCycle.rename(columns={0: 'Movements'})
        D_movLifeCycle = D_movLifeCycle.sort_values(
            ['Movements'], ascending=False).reset_index()
        for j in range(0, min(numItemTosave, len(D_movLifeCycle))):

            itemName = D_movLifeCycle[itemfield].iloc[j]
            mostTravelled = D_movLifeCycle[itemfield][j]
            MostTravelledMovements = D_mov[D_mov[itemfield] == mostTravelled]
            MostTravelledMovements = MostTravelledMovements.sort_values(
                [sortTimefield]).reset_index()

            #identifico la copertura
            allcolumns = [
                itemfield, timeColumns['loadingpta'],
                timeColumns['loadingptd'], timeColumns['dischargingpta'],
                timeColumns['dischargingptd']
            ]
            accuracy, _ = getCoverageStats(MostTravelledMovements,
                                           analysisFieldList=allcolumns,
                                           capacityField=capacityField)
            MostTravelledMovements['accuracy'] = [
                accuracy for i in range(0, len(MostTravelledMovements))
            ]
            df_lifeCycle[f"lifeCycle_{itemName}"] = MostTravelledMovements

            #Trasformarlo in movimenti singoli (come per le analisi di capacità)
            D_movimentiPerContainer = createTabellaMovimenti(
                MostTravelledMovements,
                locfrom=locationfrom,
                locto=locationto,
                capacityField=capacityField,
                timeColumns=timeColumns)

            D_movimentiPerContainer = D_movimentiPerContainer.sort_values(
                ['PTA'])
            #D_movimentiPerContainer=D_movimentiPerContainer[~(D_movimentiPerContainer.Type=='Transit')]

            cols = ['DateTime', 'Location', 'value']
            graficoLifeCycle = pd.DataFrame(columns=cols)

            for i in range(0, len(D_movimentiPerContainer)):
                movimento = D_movimentiPerContainer.iloc[i, :]
                if (movimento.InOut == 'IN'):
                    temp = pd.DataFrame(
                        [[movimento.PTA, movimento.Location, 0.5]],
                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)
                    temp = pd.DataFrame(
                        [[movimento.PTD, movimento.Location, 0.5]],
                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)
                    temp = pd.DataFrame([[
                        movimento.PTD + pd.to_timedelta(1, unit='s'),
                        movimento.Location, 1
                    ]],
                                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)
                elif (movimento.InOut == 'OUT'):
                    temp = pd.DataFrame(
                        [[movimento.PTA, movimento.Location, 0.5]],
                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)
                    temp = pd.DataFrame(
                        [[movimento.PTD, movimento.Location, 0.5]],
                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)
                    temp = pd.DataFrame([[
                        movimento.PTD + pd.to_timedelta(1, unit='s'),
                        movimento.Location, 0
                    ]],
                                        columns=cols)
                    graficoLifeCycle = graficoLifeCycle.append(temp)

            fig1 = plt.figure(figsize=(20, 10))
            plt.step(graficoLifeCycle.DateTime,
                     graficoLifeCycle.value,
                     where='post',
                     color='orange')
            plt.xticks(rotation=30)
            plt.xlabel('timeline')
            plt.ylabel('status')
            plt.title('Itemfield: ' + str(mostTravelled) + ' life cycle')
            #fig1.savefig(dirResults+'\\02-ContainerLifeCycle'+str(mostTravelled)+'.png')
            figureOutput[f"loadingUnloading_itemfield_{itemName}"] = fig1

            graficoLifeCycle['distance'] = 0
            #creo grafico spazio-tempo
            for i in range(1, len(graficoLifeCycle)):
                movPrecedente = graficoLifeCycle.iloc[i - 1]
                movCurrent = graficoLifeCycle.iloc[i]
                if movCurrent.Location == movPrecedente.Location:
                    graficoLifeCycle.iloc[
                        i, graficoLifeCycle.columns.
                        get_loc('distance')] = graficoLifeCycle.distance.iloc[
                            i - 1]
                else:
                    graficoLifeCycle.iloc[
                        i, graficoLifeCycle.columns.
                        get_loc('distance'
                                )] = graficoLifeCycle.distance.iloc[i - 1] + 1

            fig1 = plt.figure(figsize=(20, 10))
            plt.plot(graficoLifeCycle.distance,
                     graficoLifeCycle.DateTime,
                     color='orange')
            plt.xlabel('distance')
            plt.ylabel('timeline')
            plt.title('Container: ' + str(mostTravelled) +
                      ' time-distance graph')
            figureOutput[f"spaceTime_itemfield_{itemName}"] = fig1
    else:
        print(f"WARNING: NO PTA AND PTD")
    return figureOutput, df_lifeCycle

示例#11

0

显示文件

def calculateMultipleOptimalLocation(D_table,
                             timeColumns,
                             distanceType,
                             latCol,
                             lonCol,
                             codeCol_node, 
                             descrCol_node,
                             cleanOutliers=False,
                             k=1,
                             method='kmeans'):
    '''
    #this function defines k facility location using an aggregation method
    
    # this function import a table D_table where each row is a node of the network
    #columns "NODE_DESCRIPTION" describe the node 
    #timeColumns e' la lista delle colonne con l'orizzonte temporale che contengono i dati di flusso
    #latCol identify the latitude of the node
    #lonCol identify the longitude of the node
    #codeCol_node is a column with description of the node (the same appearing in plantListName)
    #descrCol_node is a column with description of the node
    #cleanOutliers if True use IQR to remove latitude and longitude outliers
    # k is the number of optimal point to define
    # method is the method to cluster the points: kmeans, gmm
    
    
    # it returns a dataframe D_res with the ID, LATITUDE, LONGITUDE AND YEAR
    # for each flow adding the column COST AND FLOW representing the distance 
    # travelled (COST) and the flow intensity (FLOW). The column
    # COST_NORM is a the flows scaled between 0 and 100
    
    # it returns a dataframe D_res_optimal with the loptimal latitude and longitude for each
    # time frame, and a column COST and FLOW with the total cost (distance) and flows
    
    '''
    # pulisco i dati e calcolo le coperture
    output_coverages={}
    
    analysisFieldList=[latCol, lonCol]
    outputCoverages, _ = getCoverageStats(D_table,analysisFieldList,capacityField=timeColumns[0])
    D_table=D_table.dropna(subset=[latCol,lonCol])
    if cleanOutliers:
        D_table, coverages, =cleanUsingIQR(D_table, [latCol,lonCol])
        outputCoverages = (coverages[0]*outputCoverages[0],coverages[1]*outputCoverages[1])
    output_coverages['coverages'] = pd.DataFrame(outputCoverages)
    
    #sostituisco i nulli rimasti con zeri
    D_table=D_table.fillna(0)
    
    
    #identifico gli anni nella colonna dizionario
    yearsColumns = timeColumns
    
    #clusterizzo i punti
    
    
    if method == 'kmeans':
        km = cluster.KMeans(n_clusters=k).fit(D_table[[latCol,lonCol]])
        D_table['CLUSTER'] = pd.DataFrame(km.labels_)
        
    elif method == 'gmm':
        gmm = GaussianMixture(n_components=k, covariance_type='full').fit(D_table[[latCol,lonCol]])
        D_table['CLUSTER']=pd.DataFrame(gmm.predict(D_table[[latCol,lonCol]]))
    else:
        print("No valid clustering method")
        return [], [], []
    
    
    # identifico le colonne utili
    D_res=pd.DataFrame(columns=[codeCol_node, descrCol_node,latCol,lonCol,'YEAR','COST','CLUSTER'])
    D_res_optimal=pd.DataFrame(columns=['PERIOD',latCol,lonCol,'YEAR','COST','FLOW','CLUSTER'])
    
    #analizzo ogni cluster separatamente
    for cluster_id in set(D_table['CLUSTER']):
        #cluster_id=0
        D_table_filtered=D_table[D_table['CLUSTER']==cluster_id]
        for year in yearsColumns:
            #year = yearsColumns[0]
            D_filter_columns=[codeCol_node,descrCol_node,latCol,lonCol,year,'CLUSTER']
            D_filtered = D_table_filtered[D_filter_columns]
            D_filtered = D_filtered.rename(columns={year:'FLOW'})
            D_filtered['YEAR']=year
            
            # define optimal location
            if distanceType.lower()=='rectangular':
                lat_optimal, lon_optimal = optimalLocationRectangularDistance(D_filtered, latCol, lonCol, 'FLOW')
                D_filtered['COST']=func_rectangularDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
            elif distanceType.lower()=='gravity':
                lat_optimal, lon_optimal = optimalLocationGravityProblem(D_filtered, latCol, lonCol, 'FLOW')
                D_filtered['COST']=func_gravityDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
            elif distanceType.lower()=='euclidean':
                lat_optimal, lon_optimal = optimalLocationEuclideanDistance(D_filtered, latCol, lonCol, 'FLOW')
                D_filtered['COST']=func_euclideanDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
            D_res=D_res.append(D_filtered)
            
            
            D_res_optimal=D_res_optimal.append(pd.DataFrame([[f"OPTIMAL LOCATION YEAR: {year}",
                                                              lat_optimal, 
                                                              lon_optimal, 
                                                              year,
                                                              sum(D_res['COST']),
                                                              sum(D_res['FLOW']),
                                                              cluster_id
                                                              ]], columns=D_res_optimal.columns))
    
        
    #D_res['COST_norm']=(D_res['COST']-min(D_res['COST']))/(max(D_res['COST'])-min(D_res['COST']))*10
    D_res['FLOW_norm']=(D_res['FLOW']-min(D_res['FLOW']))/(max(D_res['FLOW'])-min(D_res['FLOW']))*100
    

    D_res=D_res.rename(columns={'COST':'COST_TOBE'})
    
    return D_res, D_res_optimal, output_coverages

示例#12

0

显示文件

def calculateOptimalLocation(D_table,
                             timeColumns,
                             distanceType,
                             latCol,
                             lonCol,
                             codeCol_node, 
                             descrCol_node,
                             cleanOutliers=False):
    '''
    # this function import a table D_table where each row is a node of the network
    #columns "NODE_DESCRIPTION" describe the node 
    #timeColumns e' la lista delle colonne con l'orizzonte temporale che contengono i dati di flusso
    #latCol identify the latitude of the node
    #lonCol identify the longitude of the node
    #codeCol_node is a column with description of the node (the same appearing in plantListName)
    #descrCol_node is a column with description of the node
    #cleanOutliers if True use IQR to remove latitude and longitude outliers
    
    
    # it returns a dataframe D_res with the ID, LATITUDE, LONGITUDE AND YEAR
    # for each flow adding the column COST AND FLOW representing the distance 
    # travelled (COST) and the flow intensity (FLOW). The column
    # COST_NORM is a the flows scaled between 0 and 100
    
    # it returns a dataframe D_res_optimal with the loptimal latitude and longitude for each
    # time frame, and a column COST and FLOW with the total cost (distance) and flows
    '''
    # pulisco i dati e calcolo le coperture
    output_coverages={}
    
    analysisFieldList=[latCol, lonCol]
    outputCoverages, _ = getCoverageStats(D_table,analysisFieldList,capacityField=timeColumns[0])
    D_table=D_table.dropna(subset=[latCol,lonCol])
    if cleanOutliers:
        D_table, coverages, =cleanUsingIQR(D_table, [latCol,lonCol])
        outputCoverages = (coverages[0]*outputCoverages[0],coverages[1]*outputCoverages[1])
    output_coverages['coverages'] = pd.DataFrame(outputCoverages)
    
    #sostituisco i nulli rimasti con zeri
    D_table=D_table.fillna(0)
    
    
    #identifico gli anni nella colonna dizionario
    yearsColumns = timeColumns
    
    
    # identifico le colonne utili
    D_res=pd.DataFrame(columns=[codeCol_node, descrCol_node,latCol,lonCol,'YEAR','COST',])
    D_res_optimal=pd.DataFrame(columns=['PERIOD',latCol,lonCol,'YEAR','COST','FLOW'])
    
    for year in yearsColumns:
        #year = yearsColumns[0]
        D_filter_columns=[codeCol_node,descrCol_node,latCol,lonCol,year]
        D_filtered = D_table[D_filter_columns]
        D_filtered = D_filtered.rename(columns={year:'FLOW'})
        D_filtered['YEAR']=year
        
        # define optimal location
        if distanceType.lower()=='rectangular':
            lat_optimal, lon_optimal = optimalLocationRectangularDistance(D_filtered, latCol, lonCol, 'FLOW')
            D_filtered['COST']=func_rectangularDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
        elif distanceType.lower()=='gravity':
            lat_optimal, lon_optimal = optimalLocationGravityProblem(D_filtered, latCol, lonCol, 'FLOW')
            D_filtered['COST']=func_gravityDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
        elif distanceType.lower()=='euclidean':
            lat_optimal, lon_optimal = optimalLocationEuclideanDistance(D_filtered, latCol, lonCol, 'FLOW')
            D_filtered['COST']=func_euclideanDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW'])
        D_res=D_res.append(D_filtered)
        
        
        D_res_optimal=D_res_optimal.append(pd.DataFrame([[f"OPTIMAL LOCATION YEAR: {year}",
                                                          lat_optimal, 
                                                          lon_optimal, 
                                                          year,
                                                          sum(D_res['COST']),
                                                          sum(D_res['FLOW']),
                                                          ]], columns=D_res_optimal.columns))
    
        
    #D_res['COST_norm']=(D_res['COST']-min(D_res['COST']))/(max(D_res['COST'])-min(D_res['COST']))*10
    D_res['FLOW_norm']=(D_res['FLOW']-min(D_res['FLOW']))/(max(D_res['FLOW'])-min(D_res['FLOW']))*100
    

    D_res=D_res.rename(columns={'COST':'COST_TOBE'})
    
    return D_res, D_res_optimal, output_coverages

示例#13

0

显示文件

def defineDistanceTableEstimator(D_mov,lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov,G,cleanOutliersCoordinates=False,capacityField='QUANTITY'):
    
    '''
    D_mov is the dataframe with movements
    lonCol_From_mov is the name of the D_mov dataframe with longitude of the loading node
    latCol_From_mov is the name of the D_mov dataframe with latitude of the loading node
    lonCol_To_mov is the name of the D_mov dataframe with longitude of the discharging node
    latCol_To_mov is the name of the D_mov dataframe with latitude of the loading node
    G is a road graph obtained with osmnx
    cleanOutliersCoordinates is true to remove outliers in latitude and longitude
    capacityField is a field of capacity to measure the coverage statistics on it
    '''
    
    #clean data and get coverages
    analysisFieldList = [lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov]
    coverages,_ = getCoverageStats(D_mov,analysisFieldList,capacityField=capacityField)
    D_dist = D_mov[[lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov]].drop_duplicates().dropna().reset_index()
    if cleanOutliersCoordinates:
        D_dist,coverages_outl=cleanUsingIQR(D_dist, [lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov])
        coverages = (coverages[0]*coverages_outl[0],coverages[1]*coverages_outl[1])
    
    df_coverages = pd.DataFrame(coverages)
        
        
    D_dist['REAL_DISTANCE'] = np.nan
    D_dist['MERCATOR_X_FROM'] = np.nan
    D_dist['MERCATOR_Y_FROM'] = np.nan
    D_dist['MERCATOR_X_TO'] = np.nan
    D_dist['MERCATOR_Y_TO'] = np.nan
    
    for index, row in D_dist.iterrows():
        
        #get the coordinates
        lonFrom = row[lonCol_From_mov]
        latFrom = row[latCol_From_mov]
        lonTo = row[lonCol_To_mov]
        latTo = row[latCol_To_mov]
        
        #get the closest node on the graph
        node_from = ox.get_nearest_node(G, (latFrom,lonFrom), method='euclidean')
        node_to = ox.get_nearest_node(G, (latTo,lonTo), method='euclidean')
        length = nx.shortest_path_length(G=G, source=node_from, target=node_to, weight='length')
        D_dist['REAL_DISTANCE'].loc[index]=length
        
        #convert into mercator coordinates
        x_merc_from, y_merc_from =mercatorProjection(latFrom,lonFrom)
        x_merc_to, y_merc_to =mercatorProjection(latTo,lonTo)
        
        D_dist['MERCATOR_X_FROM'].loc[index]=x_merc_from
        D_dist['MERCATOR_Y_FROM'].loc[index]=y_merc_from
        D_dist['MERCATOR_X_TO'].loc[index]=x_merc_to
        D_dist['MERCATOR_Y_TO'].loc[index]=y_merc_to
    
    
    D_dist['EUCLIDEAN_DISTANCE'] = 1000*func_euclideanDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1)
    D_dist['RECTANGULAR_DISTANCE'] = 1000*func_rectangularDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1)
    D_dist['GRAVITY_DISTANCE'] = 1000*func_gravityDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1)
    
    
    error_euclidean = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['EUCLIDEAN_DISTANCE'])
    error_rectangular = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['RECTANGULAR_DISTANCE'])
    error_gravity = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['GRAVITY_DISTANCE'])
    
    print(f"MSE EUCLIDEAN: {np.round(error_euclidean,2)}")
    print(f"MSE RECTANGULAR: {np.round(error_rectangular,2)}")
    print(f"MSE GRAVITY: {np.round(error_gravity,2)}")
    return D_dist, df_coverages

示例#14

0

显示文件

文件： demand_assessment.py 项目： aletuf93/logproj

def bookingStatistics(D_mov,
                      capacityField='QUANTITY',
                      timeVariable='TIMESTAMP_IN',
                      samplingInterval=['day', 'week', 'month']):
    #Analisi trend mensili, settimanali, giornalieri e per giorno della settimana
    #timeVariable e' una variabile di raggruppamento base tempo
    #capacityField e' la variabile di capacita' per studiare le coperture

    #creo dizionari di risultati
    imageResults = {}
    dataframeResults = {}
    dataResults_trend = {}
    coverage_stats = {}

    #calcolo le coperture
    accuracy, _ = getCoverageStats(D_mov,
                                   analysisFieldList=timeVariable,
                                   capacityField=capacityField)

    D_OrderTrend = D_mov.groupby([timeVariable]).size().reset_index()
    D_OrderTrend.columns = ['DatePeriod', 'Orders']
    D_OrderTrend = D_OrderTrend.sort_values(['DatePeriod'])
    #D_OrderTrend['DatePeriod']=pd.to_datetime(D_OrderTrend['DatePeriod'])

    for spInterval in samplingInterval:
        if spInterval == 'month':
            timeSeries_analysis = ts.raggruppaPerMese(D_OrderTrend,
                                                      'DatePeriod', 'Orders',
                                                      'sum')

        elif spInterval == 'week':
            timeSeries_analysis = ts.raggruppaPerSettimana(
                D_OrderTrend, 'DatePeriod', 'Orders', 'sum')

        elif spInterval == 'day':
            timeSeries_analysis = D_OrderTrend.set_index('DatePeriod')
            timeSeries_analysis = timeSeries_analysis['Orders']

        #trend giornaliero
        fig1 = plt.figure()
        plt.plot(timeSeries_analysis.index.values,
                 timeSeries_analysis,
                 color='orange')
        plt.title(f"TREND: {timeVariable} per {spInterval}")
        plt.xticks(rotation=30)
        imageResults[f"trend_{spInterval}"] = fig1

        #distribuzione
        fig2 = plt.figure()
        plt.hist(timeSeries_analysis, color='orange')
        plt.title(f"Frequency analysis of {timeVariable} per {spInterval}")
        plt.xlabel(f"{timeVariable}")
        plt.ylabel(f"{spInterval}")
        imageResults[f"pdf_{spInterval}"] = fig2
        #fig1.savefig(dirResults+'\\02-ContainerPDFDaily.png')

        daily_mean = np.mean(timeSeries_analysis)
        daily_std = np.std(timeSeries_analysis)

        #calcolo i valori
        dataResults_trend[f"{timeVariable}_{spInterval}_MEAN"] = daily_mean
        dataResults_trend[f"{timeVariable}_{spInterval}_STD"] = daily_std

        #assegno le coperture
        coverage_stats[f"{timeVariable}_{spInterval}_MEAN"] = accuracy
        coverage_stats[f"{timeVariable}_{spInterval}_STD"] = accuracy

    #salvo dataframe con i risultati dei trend e le coperture
    D_trend_stat = pd.DataFrame([dataResults_trend,
                                 coverage_stats]).transpose()
    D_trend_stat.columns = ['VALUE', 'ACCURACY']
    dataframeResults['trend_df'] = D_trend_stat

    #distribuzione per giorno della settimana
    D_grouped = ts.raggruppaPerGiornoDellaSettimana(D_OrderTrend,
                                                    timeVariable='DatePeriod',
                                                    seriesVariable='Orders')
    D_grouped['accuracy'] = [accuracy for i in range(0, len(D_grouped))]
    dataframeResults['weekday_df'] = D_grouped
    #D_grouped.to_excel(dirResults+'\\02-ContainerWeekday.xlsx')

    fig3 = plt.figure()
    plt.bar(D_grouped.index.values, D_grouped['mean'], color='orange')
    plt.title(f"N.of {timeVariable} per day of the week")
    plt.xlabel('day of the week')
    plt.ylabel('Frequency')
    imageResults[f"pdf_dayOfTheWeek"] = fig3
    #fig1.savefig(dirResults+'\\02-ContainerPerweekDay.png')

    #D_movDaily.to_excel(dirResults+'\\02-ContainerDailyStats.xlsx')
    return imageResults, dataframeResults

示例#15

0

显示文件

文件： terminal_assessment.py 项目： aletuf93/logproj

def E_terminalStatistics(D_mov,
                         timefield='TIMESTAMP_IN',
                         locfrom='LOADING_NODE',
                         locto='DISCHARGING_NODE',
                         voyagefield='VEHICLE_CODE',
                         capacityField='QUANTITY',
                         timeColumns={},
                         censoredData=False,
                         actual='PROVISIONAL',
                         splitInOut=True):

    outputfigure = {}
    D_terminal = pd.DataFrame()

    #calcolo coperture e verifico colonne in input
    if actual == 'PROVISIONAL':
        colonneNecessarie = [
            'loadingpta', 'loadingptd', 'dischargingpta', 'dischargingptd'
        ]
        if all([column in timeColumns.keys() for column in colonneNecessarie]):
            allcolumns = [
                locfrom, locto, timeColumns['loadingpta'],
                timeColumns['loadingptd'], timeColumns['dischargingpta'],
                timeColumns['dischargingptd']
            ]
            accuracy, _ = getCoverageStats(D_mov,
                                           analysisFieldList=allcolumns,
                                           capacityField='QUANTITY')
        else:
            colonneMancanti = [
                column for column in colonneNecessarie
                if column not in timeColumns.keys()
            ]
            D_coverages = pd.DataFrame(
                [f"NO columns {colonneMancanti} in timeColumns"])
    elif actual == 'ACTUAL':
        colonneNecessarie = [
            'loadingata', 'loadingatd', 'dischargingata', 'dischargingatd'
        ]
        if all([column in timeColumns.keys() for column in colonneNecessarie]):
            allcolumns = [
                locfrom, locto, timeColumns['loadingata'],
                timeColumns['loadingatd'], timeColumns['dischargingata'],
                timeColumns['dischargingatd']
            ]
            accuracy, _ = getCoverageStats(D_mov,
                                           analysisFieldList=allcolumns,
                                           capacityField='QUANTITY')
        else:
            colonneMancanti = [
                column for column in colonneNecessarie
                if column not in timeColumns.keys()
            ]
            D_coverages = pd.DataFrame(
                [f"NO columns {colonneMancanti} in timeColumns"])
    #assegno accuratezza
    D_coverages = pd.DataFrame(accuracy)

    #creo tabella allocation productivity terminal
    D_terminal = createTabellaProductivityAllocationTerminal(
        D_mov,
        timefield=timefield,
        locfrom=locfrom,
        locto=locto,
        capacityField=capacityField,
        voyagefield=voyagefield,
        timeColumns=timeColumns,
        censoredData=censoredData,
        actual=actual,
        splitInOut=splitInOut)

    BookingTrendcols = [
        'Terminal', '00', '01', '02', '03', '04', '05', '06', '07', '08', '09',
        '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21',
        '22', '23'
    ]
    D_bookingTerminal = pd.DataFrame(columns=BookingTrendcols)
    Terminals = np.unique(D_terminal.Location)
    for i in range(0, len(Terminals)):

        #Cerco correlazione fra tempo di handling e quantità
        terminal = Terminals[i]
        dataTerminalTemp = D_terminal[D_terminal.Location == terminal]

        for hh in ['IN', 'OUT']:
            dataTerminal = dataTerminalTemp[dataTerminalTemp.InOut == hh]
            dataTerminal = dataTerminal.dropna()
            dataTerminal = dataTerminal[
                dataTerminal['handlingTime'] >
                0]  #rimuovo i tempi di movimentazione nulli
            if (len(dataTerminal) > 1):

                #Analisi di regressione semplice
                fig1 = plt.figure()
                sns.regplot(dataTerminal['Movementquantity'],
                            dataTerminal['handlingTime'],
                            color='orange',
                            marker="o")
                plt.ylabel('Handling Time')
                plt.xlabel('Handled quantity ' + hh)
                plt.title(hh + ' Terminal: ' + str(terminal))
                outputfigure[f"productivity_IN_regression_{terminal}"] = fig1

                #traccio l'analisi in frequenza di quei rapporti (produttività oraria)

                fig2 = plt.figure()
                #bins=np.arange(0,max(np.sqrt(max(np.abs(dataTerminal['hourProductivity']))),1))
                plt.hist(dataTerminal['hourProductivity'], color='orange')
                plt.ylabel('Frequency')
                plt.xlabel(hh + ' Movements per hour')
                plt.title('Productivity ' + hh + ' Terminal : ' +
                          str(terminal))
                outputfigure[f"productivity_IN_pdf_{terminal}"] = fig2
                plt.close('all')

                #Identifico il trend sulle time windows
                for j in range(0, len(dataTerminal)):

                    #azzero le statistiche orarie
                    H_00 = 0
                    H_01 = 0
                    H_02 = 0
                    H_03 = 0
                    H_04 = 0
                    H_05 = 0
                    H_06 = 0
                    H_07 = 0
                    H_08 = 0
                    H_09 = 0
                    H_10 = 0
                    H_11 = 0
                    H_12 = 0
                    H_13 = 0
                    H_14 = 0
                    H_15 = 0
                    H_16 = 0
                    H_17 = 0
                    H_18 = 0
                    H_19 = 0
                    H_20 = 0
                    H_21 = 0
                    H_22 = 0
                    H_23 = 0

                    caricoScarico = dataTerminal.iloc[j]
                    if actual == 'PROVISIONAL':
                        istInizio = caricoScarico.PTA
                        istFine = caricoScarico.PTD
                    elif actual == 'ACTUAL':
                        istInizio = caricoScarico.ATA
                        istFine = caricoScarico.ATD
                    qty = caricoScarico.CurrentCapacity
                    oraInizio = istInizio.hour
                    oraFine = istFine.hour

                    if (oraFine > oraInizio):
                        for k in range(oraInizio, oraFine + 1):
                            if k == 0:
                                H_00 = H_00 + qty
                            elif k == 1:
                                H_01 = H_01 + qty
                            elif k == 2:
                                H_02 = H_02 + qty
                            elif k == 3:
                                H_03 = H_03 + qty
                            elif k == 4:
                                H_04 = H_04 + qty
                            elif k == 5:
                                H_05 = H_05 + qty
                            elif k == 6:
                                H_06 = H_06 + qty
                            elif k == 7:
                                H_07 = H_07 + qty
                            elif k == 8:
                                H_08 = H_08 + qty
                            elif k == 9:
                                H_09 = H_09 + qty
                            elif k == 10:
                                H_10 = H_10 + qty
                            elif k == 11:
                                H_11 = H_11 + qty
                            elif k == 12:
                                H_12 = H_12 + qty
                            elif k == 13:
                                H_13 = H_13 + qty
                            elif k == 14:
                                H_14 = H_14 + qty
                            elif k == 15:
                                H_15 = H_15 + qty
                            elif k == 16:
                                H_16 = H_16 + qty
                            elif k == 17:
                                H_17 = H_17 + qty
                            elif k == 18:
                                H_18 = H_18 + qty
                            elif k == 19:
                                H_19 = H_19 + qty
                            elif k == 20:
                                H_20 = H_20 + qty
                            elif k == 21:
                                H_21 = H_21 + qty
                            elif k == 22:
                                H_22 = H_22 + qty
                            elif k == 23:
                                H_23 = H_23 + qty
                    temp = pd.DataFrame([[
                        terminal, H_00, H_01, H_02, H_03, H_04, H_05, H_06,
                        H_07, H_08, H_09, H_10, H_11, H_12, H_13, H_14, H_15,
                        H_16, H_17, H_18, H_19, H_20, H_21, H_22, H_23
                    ]],
                                        columns=BookingTrendcols)
                    D_bookingTerminal = D_bookingTerminal.append(temp)

    #identifico il trend complessivo
    DailyWorkloadNetwork = D_bookingTerminal[[
        '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11',
        '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23'
    ]]
    DailyWorkloadNetwork = DailyWorkloadNetwork.sum(axis=0, skipna=True)

    fig1 = plt.figure()
    plt.stem(DailyWorkloadNetwork)
    plt.title('Network Handling time windows')
    plt.ylabel('Total Container Handled')
    plt.xlabel('Daily timeline')

    outputfigure[f"productivity_workload_network"] = fig1
    #plt.close('all')

    #Traccio il profilo di workload per ogni terminal
    for i in range(0, len(Terminals)):

        terminal = Terminals[i]
        DailyWorkloadTerminal = D_bookingTerminal[D_bookingTerminal.Terminal ==
                                                  terminal]

        DailyWorkloadTerminal = DailyWorkloadTerminal[[
            '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
            '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21',
            '22', '23'
        ]]
        DailyWorkloadTerminal = DailyWorkloadTerminal.mean(axis=0, skipna=True)

        fig1 = plt.figure()
        plt.stem(DailyWorkloadTerminal)
        plt.title('Terminal: ' + str(terminal) + ' Handling time windows')
        plt.ylabel('Average Quantity Handled per hour')
        plt.xlabel('Daily timeline')
        outputfigure[f"productivity_workload_{terminal}"] = fig1
        plt.close('all')
    return outputfigure, D_terminal, D_coverages