def ReplaceWithMedian( self, mylist ): #Replace in the given list the missing values with the median value list1 = self.DeleteMissingValue( mylist ) #Create a new list, which is the given list deleting the missing values (calling the DeleteMissingValue method) A = BasicStatisticalMeasures( ) #Call the BasicStatisticalMeasures to calculate the median value median = A.median(list1) #Calculate the median value of the new list i = 0 for value in mylist: if value is '': mylist[ i] = median #For the values in the initial given list if there is a missing value replace it with the mean value of the new list i += 1 return mylist #Return the given list, in which the missing values are replaced with the mean value
def DeleteExtremeOutliers( self, mylist): #Delete only the extreme ouliers in a given data set A = BasicStatisticalMeasures() Q1 = A.quantile(mylist)[1] Q3 = A.quantile(mylist)[3] IQ = A.IQR(mylist) LOF = Q1 - 3 * IQ UOF = Q3 + 3 * IQ i = 0 listx = [] for value in mylist: if not ( value < LOF or value > UOF ): #If the value is beyond the outer fence ([LOF,UOF]) on either side (extreme outlier) doesn't pass the control and deleted listx.append(value) i += 1 return listx
def DeleteOutliers( self, mylist ): #Delete the ouliers (both mild and extreme) in a given data set A = BasicStatisticalMeasures( ) #Call the BasicStatisticalMeasures to calculate the quantiles and interquartile range Q1 = A.quantile(mylist)[1] Q3 = A.quantile(mylist)[3] IQ = A.IQR(mylist) LIF = Q1 - 1.5 * IQ #Calculate the lower inner fence UIF = Q3 + 1.5 * IQ #Calculate the upper inner fence LOF = Q1 - 3 * IQ #Calculate the lower outer fence UOF = Q3 + 3 * IQ #Calculate the upper outer fence i = 0 listx = [] for value in mylist: if not ( (value < LOF or value > UOF) or (value < LIF or value > UIF) ): #If the value is beyond the inner fence ([LIF,UIF]) on either side (mild outlier) or beyond the outer fence ([LOF,UOF]) on either side (extreme outlier) doesn't pass the control and deleted listx.append(value) i += 1 return listx
C = HandleOutliers() MA_Proc = C.DeleteOutliers(MA_Proc) M1A_Proc = C.DeleteOutliers(M1A_Proc) M1B_Proc = C.DeleteOutliers(M1B_Proc) M2A_Proc = C.DeleteOutliers(M2A_Proc) M2B_Proc = C.DeleteOutliers(M2B_Proc) M3A_Proc = C.DeleteOutliers(M3A_Proc) M3B_Proc = C.DeleteOutliers(M3B_Proc) MM_Proc = C.DeleteOutliers(MM_Proc) PrA_Proc = C.DeleteOutliers(PrA_Proc) PrB_Proc = C.DeleteOutliers(PrB_Proc) PaA_Proc = C.DeleteOutliers(PaA_Proc) PaB_Proc = C.DeleteOutliers(PaB_Proc) #Call the BasicStatisticalMeasures object and calculate the mean value of the processing times for each station E = BasicStatisticalMeasures() meanMA_Proc = E.mean(MA_Proc) meanM1A_Proc = E.mean(M1A_Proc) meanM2A_Proc = E.mean(M2A_Proc) meanM3A_Proc = E.mean(M3A_Proc) meanM1B_Proc = E.mean(M1B_Proc) meanM2B_Proc = E.mean(M2B_Proc) meanM3B_Proc = E.mean(M3B_Proc) meanMM_Proc = E.mean(MM_Proc) meanPrA_Proc = E.mean(PrA_Proc) meanPrB_Proc = E.mean(PrB_Proc) meanPaA_Proc = E.mean(PaA_Proc) meanPaB_Proc = E.mean(PaB_Proc) stopTime = datetime.datetime( 2014, 3, 27, 8, 40, 00