def Pop_correlation_coefficient(): lst = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] x_data = [1, 25, 34, 4, 51] y_data = [6, 7, 8, 9, 10] x_mean = mean(x_data) y_mean = mean(y_data) a = [] b = [] ab = [] x = st_dev(x_data) y = st_dev(y_data) divisor = multiplication(x, y) z = len(lst) for i in x_data: new1 = subtraction(x_mean, i) zx = division(new1, x) a.append(zx) # (zx)i = (xi – x̄) / s x for i in y_data: new2 = subtraction(y_mean, i) zy = division(new2, y) b.append(zy) ab = [a[i] * b[i] for i in range(len(x_data))] tot_sum = sum(ab) result = tot_sum / 4 return result
def Pop_correlation_coefficient(x_data, y_data): x_mean = mean(x_data) y_mean = mean(y_data) a = [] b = [] tot_sum = 0 x = st_dev(x_data) y = st_dev(y_data) for i in x_data: new1 = subtraction(x_mean, i) zx = division(new1, x) a.append(zx) for i in y_data: new2 = subtraction(y_mean, i) zy = division(new2, y) b.append(zy) for i in range(len(x_data)): ab = multiplication(a[i], b[i]) tot_sum = addition(tot_sum, ab) cal_result = division(tot_sum, subtraction(1, len(x_data))) return cal_result
def variance_samp_prop(numbers): ran = random.randint(1, len(numbers)) values = getSample(numbers, ran) p = proportion(values) m = multiplication(p, subtraction(p, 1)) x = subtraction(len(values), 1) h = division(x, m) return h
def v_samp_proportion(lst): ss = random.randint(1, len(lst)) new_values = getSample(lst, ss) p = proportion(new_values) c = multiplication(p, subtraction(p, 1)) y = subtraction(len(new_values), 1) x = division(c, y) return x
def st_dev(lst): diffs = 0 m = mean(lst) for l in lst: diffs = addition(diffs, square(subtraction(l, m))) sd = division(diffs, subtraction(1, len(lst))) x = root(sd) return x
def samp_st_dev(numbers): ss = random.randint(1, len(numbers)) new_values = getSample(numbers, ss) c = 0 t = 0 n = len(new_values) for i in range(0, n, 1): c = subtraction(new_values[i], mean(new_values)) t = addition(square(c), t) x = division(subtraction(1, n), t) actual_sd = statistics.stdev(new_values) # Calculated using stat library to compare return root(x), actual_sd
def sample_st_deviation(data, sample_size): dev = 0 sample = getSample(data, sample_size) sample_values = len(sample) x_bar = sample_mean() x = sample_values n = subtraction(sample_values, 1) for dev in sample: dev = subtraction(x, x_bar) square_x_bar = square(dev) add = addition(square_x_bar, square_x_bar) divide = division(add, n) return squareroot(divide)
def population_correlation_coefficient(list_x, list_y): total = 0 x = standard_deviation(list_x) y = standard_deviation(list_y) for i in range(len(list_x)): diff_x = subtraction(list_x[i], mean(list_x)) diff_y = subtraction(list_y[i], mean(list_y)) total = total + multiplication(division(diff_x, x), division( diff_y, y)) return round( float( multiplication(division(1, addition(len(list_x), len(list_y))), total)), 4)
def sample_std_dev(data): total = 0 samples = random.randint(1, len(data)) new_samples = get_sample(data, samples) new_mean = population_mean(new_samples) for number in new_samples: result = subtraction(number, new_mean) sq = square(result) total = addition(total, sq) n = len(new_samples) d = division(subtraction(1, n), total) sample_sd = sq_rt(d) return sample_sd
def ssd(data): total = 0 sample = random.randint(1, len(data)) new_sample = getSample(data, sample) new_mean = mean(new_sample) for numb in new_sample: result = subtraction(numb, new_mean) sq = squaree(result) total = addition(total, sq) n = len(new_sample) d = division(subtraction(1, n), total) samp_sd = squar_rot(d) # actual_sd = statistics.stdev(new_sample) return samp_sd
def variance_of_population_proportion(numbers): n = len(numbers) prop = proportion(numbers) prop_2 = subtraction(prop, 1) x = multiplication(prop, prop_2) variance_of_pp = division(x, n) return variance_of_pp
def Sample_Correlation(list1, list2): n = len(list1) avg_x = average(list1) avg_y = average(list2) rod = 0 x2 = 0 y2 = 0 for i in range(n): x = subtraction(list1[i], avg_x) y = subtraction(list2[i], avg_y) rod += product(x, y) x2 += square(x) y2 += square(y) return rod / squareRoot(x2 * y2)
def confidence_interval(numbers): m = mean(numbers) confidence_level = 0.95 z = (1-confidence_level) / 2 sd = standard_deviation(numbers) n = squareroot(len(numbers)) return [subtraction(multiplication(division(n, sd), z), m), addition(multiplication(division(n, sd), z), m)]
def skewness(data): try: List1 = [] List2 = [] List3 = [] List4 = [] x = 0 nStddev = stddev(data) # pprint(nStddev) nMean = mean(data) nCount = len(data) for n in data: List1.append(subtraction(nMean, n)) # pprint(List1) for n2 in List1: List2.append(division(nStddev, n2)) # pprint(List2) for n3 in List2: List3.append(n3**3) # pprint(List3) for n4 in List3: x = x + n4 # pprint(x) # pprint(nCount) nskewness = division(nCount, x) # pprint(float(nskewness)) return nskewness except ZeroDivisionError: print("Error - Cannot divide by 0") except ValueError: print("Error - Invalid data inputs")
def variance(data): data = check(data) average = mean(data) a = [] for i in data: a.append(sq(subtraction(average, i))) return mean(a)
def zscore(a, b, c): score = float(a) zmean = float(b) zstd = float(c) numerator = subtraction(score, zmean) zscore = division(numerator, zstd) return zscore
def standard_deviation(data): avg = mean(data) num_values = len(data) sd1 = 0 for num in data: sd1 = addition(sd1, squared(subtraction(mean, num))) return squarerooted(division(num_values, sd1))
def zscore(data, x): data = check(data) m = mean(data) sd = stddev(data) num = subtraction(m, x) result = division(sd, num) return result
def z_score(numbers): row_value = 151 std_dev = pop_stand_dev(numbers) mean = population_mean(numbers) result = subtraction(row_value, mean) z_score_ = division(result, std_dev) print(z_score_) return z_score_
def z_score(data): x = 62 u = population_mean(data) sample_sd = sample_st_deviation(data) y = subtraction(x, u) return division(sample_sd, y) #this may not work
def systemicSample(aLst): lenLst = len(aLst) num = (RandomNumber.random_number_seed(2, lenLst, seed=lenLst)) nNum = round(division(num, 4)) if nNum == 1: n = 3 sample = [] temp = subtraction(nNum, 1) while temp <= subtraction(lenLst, 1): val = aLst[temp] sample.append(val) temp += nNum return sample
def z_score(num): z_mean = populationmean(num) sd = stddev(num) zlist = [] for x in num: z = round(division(subtraction(x, z_mean), sd), 6) zlist.append(z) return zlist
def cochran(data, lstLen, seed): z_s = Z_Score.zscore(data, seed) p_p = PopulationProportion.proportion(data, lstLen, seed) m_e = MarginError.margin(data, seed) q = subtraction(1, p_p) cochran = (exponentiation(z_s, 2) * p_p * q) / exponentiation(m_e, 2) return cochran
def standard_deviation(numbers): # complete n = len(numbers) c = 0 t = 0 for i in range(0, n, 1): c = subtraction(mean(numbers), numbers[i]) t = addition(square(c), t) x = division((n - 1), t) return root(x)
def sample_correlation(data, data1): try: mean1 = mean(data) mean2 = mean(data1) List1 = [] List2 = [] for num in data: a = subtraction(int(round(mean1, 0)), num) List1.append(a) for num in data1: b = subtraction(mean2, num) List2.append(b) c = np.multiply(List1, List2) cc = 0 for num in c: cc = cc + num d = 0 e = 0 # pprint(List1) # pprint(List2) for num in List1: d = d + square(num) for num in List2: e = e + square(num) f = multiplication(int(d), e) g = square_root(int(f)) h = division(int(g), cc) # pprint(float(cc)) # pprint(e) # pprint(f) # pprint(float(g)) # pprint(str(round(h,9))) nCorrelation = round(h, 9) # pprint(nCorrelation) return nCorrelation except ZeroDivisionError: print("Error - Cannot divide by 0") except ValueError: print("Error - Invalid data inputs")
def confidence_interval(data): z_value = 1.05 mean =sample_mean(data) sd = pop_standard_dev(data) x = len(data) y = division(squareroot(x), sd) margin_of_error = multiplication(z_value, y) a = subtraction(mean, margin_of_error) b = addition(mean, margin_of_error) return a, b
def psd(numbers): num_values = len(numbers) result = mean(numbers) total = 0 for numb in numbers: result2 = subtraction(numb, result) sq = squaree(result2) total = addition(total, sq) return squar_rot(division(num_values, total))
def get_z_score(data): if isinstance(data, float): data = [data] value_mean = get_mean(data) z = [] for i in range(0, len(data)): a = subtraction(value_mean, data[i]) b = division(get_standard_deviation(data), a) z.append(b) return z
def unknown_pop_sample(data, seed, percent): z_s = Z_Score.zscore(data, seed) m_e = MarginError.margin(data, seed) p = percent q = subtraction(1, p) val = division(z_s, m_e) samplePop = squaring(val) * p * q return samplePop
def zscore(numbers): # complete u = mean(numbers) sig = standard_deviation(numbers) n = len(numbers) zsc = [] for i in numbers: z = 0 z = round(division(sig, subtraction(u, i)), 3) # z = float((numbers[i] - u) / sig) zsc.append(z) return zsc