def variance(data): data = check(data) average = mean(data) a = [] for i in data: a.append(sq(subtraction(average, i))) return mean(a)
def population_correlation_coefficient(list_x, list_y): total = 0 x = standard_deviation(list_x) y = standard_deviation(list_y) for i in range(len(list_x)): diff_x = subtraction(list_x[i], mean(list_x)) diff_y = subtraction(list_y[i], mean(list_y)) total = total + multiplication(division(diff_x, x), division( diff_y, y)) return round( float( multiplication(division(1, addition(len(list_x), len(list_y))), total)), 4)
def sampleCorrelation(dataX, dataY): #dataX= [] #dataY = [] meanX = mean(dataX) meanY = mean(dataY) deviationX = standard_deviation(dataX) deviationY = standard_deviation(dataY) rNumerator = 0.0 for i in range(len(dataX)): rNumerator += product(subtraction(dataX[i], meanX), subtraction(dataY[i], meanY)) rDenominator = product(deviationX, deviationY) r = division(rNumerator, rDenominator) return r
def variance(data): mean_value = mean(data) terms = [pow((reading - mean_value), 2) for reading in data] total = 0 for num in terms: total = addition(total, num) return total / float(len(data) - 1)
def zscore(data, x): data = check(data) m = mean(data) sd = stddev(data) num = subtraction(m, x) result = division(sd, num) return result
def confidence_interval(numbers): m = mean(numbers) confidence_level = 0.95 z = (1-confidence_level) / 2 sd = standard_deviation(numbers) n = squareroot(len(numbers)) return [subtraction(multiplication(division(n, sd), z), m), addition(multiplication(division(n, sd), z), m)]
def confidenceinterval(a, conf): n = len(a) m = mean(a) sample_stddev = samplestddev(a) h = marginoferror(a, conf) return round(m,3), round(m-h,3), round(m+h,3)
def skewness(data): try: List1 = [] List2 = [] List3 = [] List4 = [] x = 0 nStddev = stddev(data) # pprint(nStddev) nMean = mean(data) nCount = len(data) for n in data: List1.append(subtraction(nMean, n)) # pprint(List1) for n2 in List1: List2.append(division(nStddev, n2)) # pprint(List2) for n3 in List2: List3.append(n3**3) # pprint(List3) for n4 in List3: x = x + n4 # pprint(x) # pprint(nCount) nskewness = division(nCount, x) # pprint(float(nskewness)) return nskewness except ZeroDivisionError: print("Error - Cannot divide by 0") except ValueError: print("Error - Invalid data inputs")
def zscore(data): mean_value = mean(data) stdev_value = stdev(data) result = [] for num in data: result.append((num - mean_value) / stdev_value) return result
def skew(set): set = list((1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) me = mean(set) med = median(set) std = sd(set) sk = (3 * (me - med) / std) return sk
def zScore(data): x = random.choice(data) meanData = mean(data) standardDeviation = standard_deviation(data) numerator = subtraction(x, meanData) z = division(numerator, standardDeviation) return z
def samp_mean(numbers): ss = random.randint(1, len(numbers)) new_values = getSample(numbers, ss) n = round(mean(new_values), 2) actual_mean = round(statistics.mean(new_values), 2) # to compare calculated result return n, actual_mean
def population_variance(lst): # below is population variance formula ttl = 0 for i in range(len(lst)): ttl += (lst[i] - mean(lst))**2 result = ttl / len(lst) return round(float(result), 3)
def median(data): # Validations empty_list_check(data) check_for_valid_numbers(data) data_len = len(data) data.sort() # data set has even number of elements if data_len % 2 == 0: # find middle mid = math.trunc(division(2, data_len)) # find middle left value mid_left = data[mid - 1] # find middle right value mid_right = data[mid] list_of_items = [] list_of_items.insert(0, mid_left) list_of_items.insert(1, mid_right) return mean(list_of_items) else: # data set has odd number of elements return data[math.floor(division(2, data_len))]
def standard_deviation(data): avg = mean(data) num_values = len(data) sd1 = 0 for num in data: sd1 = addition(sd1, squared(subtraction(mean, num))) return squarerooted(division(num_values, sd1))
def StandardDeviationPopulation(data): Sum1 = 0 for i in data: x = abs(i - mean(data)) Sum1 = square(x) + Sum1 n = len(data) stand_dev = math.sqrt(Sum1) / n return stand_dev
def confidenceInterval(nums): length = len(nums) numsMean = mean(nums) stanDev = stdDev(nums) lowerBound = numsMean + 1.96 * (stanDev / math.sqrt(length)) upperBound = numsMean - 1.96 * (stanDev / math.sqrt(length)) returnResult = [lowerBound, upperBound] return returnResult
def ZScore(num): z_mean = mean(num) sdev = sd(num) z_list = [] for x in num: z = round(((x - z_mean) / sdev), 6) z_list.append(z) return z_list
def variance(data): n = len(data) varianceValue = 0 meanData = mean(data) for i in data: varianceValue += (i-meanData)**2 varianceValue /= n return varianceValue
def zscore(numbers): row_value = 151 sd = psd(numbers) m = mean(numbers) result = subtraction(row_value, m) z_score = division(result, sd) print(z_score) return z_score
def StandardDeviationSample(data): Sum = 0 for i in data: x = abs(i - mean(data)) Sum = square(x) + Sum n = len(data) stand_dev = math.sqrt(Sum / (n - 1)) return stand_dev
def zscore(a): zmean = mean(a) sd = stddev(a) zlist = [] for x in a: z = round(((x - zmean) / sd), 6) zlist.append(z) return zlist
def StdDevPop(data): Sum2 = 0 for i in data: x = abs(i - mean(data)) Sum2 = square(x) + Sum2 n = len(data) stdDev = math.sqrt(Sum2) / n return stdDev
def StdDevSample(data): Sum1 = 0 for i in data: x = abs(i - mean(data)) Sum1 = square(x) + Sum1 n = len(data) stdDev = math.sqrt(Sum1 / (n - 1)) return stdDev
def _ss(data, c=None): if c is None: c = mean(data) total = total2 = 0 for x in data: total += (x - c)**2 total2 += (x - c) total -= total2**2 / len(data) return total
def conf_interval(data): x = mean(data) dev = psd(data) z = 1.96 # for 95% confidence standard_error = division(dev, squareroot(len(data))) conf_upper_level = round(addition(x, multiplication(z, standard_error)), 2) conf_lower_level = round(subtraction(multiplication(z, standard_error), x), 2) return conf_upper_level, conf_lower_level
def median(data): data.sort() if len(data) % 2 != 0: center = int((len(data) - 1) / 2) return data[center] elif len(data) % 2 == 0: center1 = int(len(data) / 2) center2 = int(len(data) / 2) - 1 return int(mean([data[center1], data[center2]]))
def standard_deviation(numbers): n = len(numbers) c = 0 t = 0 for i in range(0, n, 1): c = subtraction(mean(numbers), numbers[i]) t = addition(square(c), t) x = division((n - 1), t) return sqrt(x)
def mean_deviation(data): try: # 1. find the mean of the data calculatedMean = mean(data) distanceArray = [] meanDeviationValue = 0 for item in data: distanceArray.append(abs(subtraction(item, calculatedMean))) # 2. find the distance of each value from that mean # iterate the data list, subtract it from general mean, store it in a list meanDeviationValue = mean(distanceArray) return meanDeviationValue except ZeroDivisionError: print("Error - Cannot divide by 0") except ValueError: print("Error - Invalid data inputs")
def sample_correlation(data, data1): try: mean1 = mean(data) mean2 = mean(data1) List1 = [] List2 = [] for num in data: a = subtraction(int(round(mean1, 0)), num) List1.append(a) for num in data1: b = subtraction(mean2, num) List2.append(b) c = np.multiply(List1, List2) cc = 0 for num in c: cc = cc + num d = 0 e = 0 # pprint(List1) # pprint(List2) for num in List1: d = d + square(num) for num in List2: e = e + square(num) f = multiplication(int(d), e) g = square_root(int(f)) h = division(int(g), cc) # pprint(float(cc)) # pprint(e) # pprint(f) # pprint(float(g)) # pprint(str(round(h,9))) nCorrelation = round(h, 9) # pprint(nCorrelation) return nCorrelation except ZeroDivisionError: print("Error - Cannot divide by 0") except ValueError: print("Error - Invalid data inputs")