def main(): dcleaner = dclean.DataClean() data = dcleaner.get_data() data = [int(val) for val in data] data.sort() print("The data set we will use for calculating the range, variance, and standard deviation (std_dev) is:\n\t{}".format(data)) summ_x_sqr, summ, n, avg, rang, pop_variance, samp_variance, pop_std_dev, samp_std_dev = get_std_devs(data) print("For population:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}".format(rang, pop_variance, pop_std_dev)) print("For sample:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}".format(rang, samp_variance, samp_std_dev)) mean = get_mean(data) count = count_std_devs(mean,samp_std_dev,173) print("173 is {} standard deviations from the mean".format(count)) q1, med, q3, iqr = get_median(data) print("the median for this data set is {}".format(med[0])) print("quartile 1 = {}, quartile 3 = {}, irq = {}".format(q1[0], q3[0], iqr))
def main(): dcleaner = dclean.DataClean() data = dcleaner.get_data() data = [int(val) for val in data] data.sort() print( "The data set we will use for calculating the range, variance, and standard deviation (std_dev) is:\n\t{}" .format(data)) summ_x_sqr, summ, n, avg, rang, variance, std_dev = get_std_devs(data) print(summ_x_sqr) print("sum: {}\nn: {}\navg: {}".format(summ, n, avg)) print("The range is: {}\nThe variance is: {}\nThe std_dev is: {}".format( rang, variance, std_dev))
def main(): dcleaner = dclean.DataClean() family_count = dcleaner.get_data() family_count = [int(cnt) for cnt in family_count] bill_low = dcleaner.get_data() bill_high = dcleaner.get_data() bill_range = [(int(lo),int(hi)) for lo,hi in zip(bill_low,bill_high)] data = [(num,rng) for num,rng in zip(family_count,bill_range)] data.sort(key=lambda tpl:tpl[1][0]) print("The data set we will use for calculating the range, variance, and standard deviation (std_dev) is:\n\t{}".format(data)) summ_x_sqr, summ, n, avg, rang, pop_variance, samp_variance, pop_std_dev, samp_std_dev = get_std_devs([x[0] for x in data]) print("For population:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}".format(rang, pop_variance, pop_std_dev)) print("For sample:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}".format(rang, samp_variance, samp_std_dev)) mean = get_mean([x[0] for x in data]) count = count_std_devs(mean,samp_std_dev,173) print("173 is {} standard deviations from the mean".format(count)) q1, med, q3, iqr = get_median([x[0] for x in data]) print("the median for this data set is {}".format(med[0])) print("quartile 1 = {}, quartile 3 = {}, irq = {}".format(q1[0], q3[0], iqr))
def main(): dcleaner = dclean.DataClean() print("first, enter the score values for exam1, exam2, and the final") time.sleep(.5) data = dcleaner.get_data() print( "Now, enter the associated weights, and sorry but you also need to enter any repeat values" ) time.sleep(.5) weights = dcleaner.get_data() data = [int(val) for val in data] weights = [int(val) for val in weights] print("data converted to int values is now:\n\t{}".format(data)) print("weights converted to int values is now:\n\t{}".format(weights)) wmean = sum([x * w for x, w in zip(data, weights)]) / sum(weights) print("the calculated weighted mean is then {}".format(wmean))
def main(): dcleaner = dclean.DataClean() data = dcleaner.get_data() data = [int(val) for val in data] data.sort() # dset = [] # for val in data: # if val not in dset: # dset.append(val) # tmp = data # data = dset # dset = tmp # del tmp print( "The data set we will use for calculating the range, variance, and standard deviation (std_dev) is:\n\t{}" .format(data)) summ_x_sqr, summ, n, avg, rang, pop_variance, samp_variance, pop_std_dev, samp_std_dev = get_std_devs( data) print( "For population:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}" .format(rang, pop_variance, pop_std_dev)) print( "For sample:\n\tThe range is: {}\n\tThe variance is: {}\n\tThe std_dev is: {}" .format(rang, samp_variance, samp_std_dev)) mean = get_mean(data) count = count_std_devs(mean, samp_std_dev, 173) print("173 is {} standard deviations from the mean".format(count)) q1, med, q3, iqr = get_median(data) print("the median for this data set is {}".format(med[0])) print("quartile 1 = {}, q2==med = {}, quartile 3 = {}, iqr = {}".format( q1[0], med[0], q3[0], iqr)) print("q1's idx = {}, med's idx = {}, q3's idx = {}, and list len = {}". format(q1[1], med[1], q3[1], len(data))) print("the value at the 82% index is: {}".format(data[round( len(data) * .82)])) target = 32 print( "{} occurs at the index position of {} out of {}, for percentage rank of {}" .format(target, data.index(target), len(data), (data.index(target) / len(data))))
lo = int(np.floor((strt + stp) / 2)) hi = int(np.ceil((strt + stp) / 2)) return (data[lo] + data[hi]) / 2, int((lo + hi) / 2) def get_median(data: list): med, midx = _segment_median(data, 0, len(data)) quart1, q1idx = _segment_median(data, 0, midx) quart3, q3idx = _segment_median(data, midx, len(data)) iqr = quart3 - quart1 return (quart1, q1idx), (med, midx), (quart3, q3idx), iqr if __name__ == "__main__": dcleaner = dclean.DataClean() data_list = dcleaner.get_data() data_list = [int(x) for x in data_list] # print(json.dumps(data_dict, indent=4)) data_list.sort() mean_q2 = 0 print("the cleaned up data_list is as follow:\n\t{}".format(data_list)) mean1 = get_mean(data_list) quart1, med, quart3, iqr = get_median(data_list) print("the median for this data set is {}".format(med[0])) print("quartile 1 = {}, quartile 3 = {}, irq = {}".format( quart1[0], quart3[0], iqr)) outliers = [] upper_bound = quart3[0] + iqr * 1.5