def population_statistics( fea_des, dt, treat, tar, threshold, is_above, stat_funcs, ): """ Inputs - fea_des - Str(): Description of the data needed for the calculations of the stat_funcs. Usge - to filter data by (feature, values) pairs dt - Dict(): All the data dt.keys() - Str() dt.values() - List() :: Order() treat - Str(): Specific feture of the data * Must be in data.keys() tar - Str(): Specific feture of the data * Must be in data.keys() threshold - Order(): A value that will be compered to values in data[treat][i] * Must be able to cmp to data[treat][i] is_above - Bool(): If true: creates a list of the members in data[tar], such that the matching data[treat] > threshold else: same but, data[treat] <= threshold. stat_funcs - List() :: Obj-functions: List of functions in statistics.py. Outputs - None Main functionality - Print the stat_funcs of the values in data[tar] while taking into account the matching value in data[treat]. using data.print_details() """ # Not allowed, but much prettier code # d = reduce( # lambda ac, val: filter_by_feature(ac, val[0], val[1])[0], # [dc[i.lower()] for i in fea_des.split(" ") if i.lower() in dc.keys()], # data, # ) new_target_data = [ x for x, y in zip(dt[tar], dt[treat]) if (is_above ^ (y <= threshold)) ] dpy.print_details( {tar: new_target_data}, [tar], stat_funcs, )
def population_statistics(feature_description, data, treatment, target, threshold, is_above, statistic_functions): """ Filters the dictionary to hold only records in which the treatment feature's value is above/equal or below the threshold supplied (according to is_above argument value). Prints statistic values of the target feature from the filtered dictionary. :parameters: feature_description -- a description of the records in data data -- the dictionary to filter from treatment -- a feature to filter by target -- a feature according to which we'll print the statistic values threshold -- a value to filter treatment's values by is_above -- boolean, determines whether to filter the values above threshold the opposite statistic_functions -- list of statistic functions in which we're interested :returns: None """ data1 = {} for key in data.keys(): data1[key] = [] for index in range(len(data[treatment])): if(data[treatment][index] > threshold and is_above) or (data[treatment][index] <= threshold and not is_above): # copying a record to a new dictionary for key in data.keys(): data1[key].append(data[key][index]) print(feature_description) print_details(data1, target, statistic_functions)
def main(argv): features = (argv[2].split(sep=", ")) the_data = data.load_data(argv[1], features) statistic_functions = [statistics.sum, statistics.mean, statistics.median] # seasons = [2,3] # s = set(seasons) # data.filter_by_feature(data, "season", s) #data.print_details(the_data, ["cnt"], statistic_functions) summer_data = data.filter_by_feature(data, "season", 1) data.print_details(summer_data, ["hum", "t1", "cnt"], statistic_functions)
def q1(dic): print("Question1:") feature = ["hum", "t1", "cnt"] categories = ["season", "is_holiday"] names = ["Summer:","Holiday:"] methods = [statistics.sum, statistics.mean, statistics.median] for index, category in enumerate(categories): print(names[index]) data1, data2 = data.filter_by_feature(dic, category, {1}) data.print_details(data1, feature, methods) print("All:") data.print_details(dic, feature, methods)
def population_statistics(feature_description, data1, treatment, target, threshold, is_above, statistic_functions): print(feature_description) dic = {} for key in list(data1.keys()): dic.setdefault(key, []) if is_above: for row, value in enumerate(data1[treatment]): if value > threshold: for key in list(data.keys()): dic[key].append(data1[key][row]) else: for row, value in enumerate(data1[treatment]): if value <= threshold: for key in list(data1.keys()): dic[key].append(data1[key][row]) data.print_details(dic, [target], statistic_functions)
def Q1(data): """ this func prints the requirements for Q1 :param data: dict :return: none """ print("Question 1:") features = ["hum", "t1", "cnt"] summer = (dt.filter_by_feature(data, "season", {1}))[0] holiday = (dt.filter_by_feature(data, "is_holiday", {1}))[0] all = data q1_categories = {"Summer": summer, "Holiday": holiday, "All": all} for category in q1_categories: print("%s:" % category) dt.print_details(q1_categories[category], features, stats.statistic_functions)
def population_statistics(feature_description, data, treatment, target, threshold, is_above, statistic_functions): data1 = {} for key in data.keys(): data1[key] = [] #seasons=["summer","winter","automn","spring"] #features=feature_description.split(sep=" ") #if features[0] in seasons: #for index in range(len(data["season"])): # if data[season][index] in values: # for key in data.keys(): # data1[key].append(data[key][index]) for index in range(len(data[treatment])): if ((data[treatment][index] > threshold and is_above) or (data[treatment][index] <= threshold and not is_above)): for key in data.keys(): data1[key].append(data[key][index]) print_details(data1, target, statistic_functions)
def question1(data): print("Question 1:") print("Summer:") winter, non_winter = filter_by_feature(data, 'season', [1]) features = ['hum', 't1', 'cnt'] statistic_functions = ['sum', 'mean', 'median'] print_details(winter, features, statistic_functions) print("Holiday:") holiday, non_holiday = filter_by_feature(data, 'is_holiday', [1]) print_details(holiday, features, statistic_functions) print("All:") print_details(data, features, statistic_functions)
def main(argv): """ produce answers to Q1, Q2 by user's input :argument: path to scv file :parameter: features in csv file for assignment :return: none """ # Q1 print('Question 1:') features = argv[2].split( ", ") # create a list with continuous variables only features.remove("season"), features.remove("is_holiday") stats = [statistics.sum, statistics.mean, statistics.med] # create a list with statistic functions data_all = data.load_data(argv[1], argv[2]) data_summer, data_not_summer = data.filter_by_features( data_all, "season", {1}) print("Summer:") data.print_details(data_summer, features, stats) data_holiday, data_not_holiday = data.filter_by_features( data_all, "is_holiday", {1}) print("Holiday:") data.print_details(data_holiday, features, stats) print("All:") data.print_details(data_all, features, stats) # Q2 lim_temp = 13 print("\nQuestion 2:") data_winter, data_not_winter = data.filter_by_features( data_all, 'season', {3}) data_winter_holiday, data_winter_weekday = data.filter_by_features( data_winter, 'is_holiday', {1}) features.remove("hum"), features.remove("t1") stats.remove(statistics.sum) print("If t1<=13.0, then:") # prints info 13 or under degrees statistics.population_statistics("winter holiday records", data_winter_holiday, "t1", "cnt", lim_temp, 0, stats) statistics.population_statistics("winter weekday records", data_winter_weekday, "t1", "cnt", lim_temp, 0, stats) print("If t1>13.0, then:") # prints info above 13 degrees statistics.population_statistics("winter holiday records", data_winter_holiday, "t1", "cnt", lim_temp, 1, stats) statistics.population_statistics("winter weekday records", data_winter_weekday, "t1", "cnt", lim_temp, 1, stats)
def main(argv): """Main interface for running the program""" # "london_sample.csv", ['season', 't1', 'is_holiday', 'cnt', 'hum'] # Q1 print(argv) d = data.load_data(argv[1], argv[2].split(', ')) print("Question 1:") print("Summer:") d1, d2 = data.filter_by_feature(d, 'season', [1]) data.print_details(d1, ['hum', 't1', 'cnt'], [statistics.sum, statistics.mean, statistics.median]) print("Holiday:") d1, d2 = data.filter_by_feature(d, 'is_holiday', [1]) data.print_details(d1, ['hum', 't1', 'cnt'], [statistics.sum, statistics.mean, statistics.median]) print("All:") data.print_details(d, ['hum', 't1', 'cnt'], [statistics.sum, statistics.mean, statistics.median]) # Q2 print("") print("Question 2:") print("If t1<=13.0, then:") print("Winter holiday records:") data_main = data.load_data(argv[1], argv[2].split(', ')) data_main_1, data_main_2 = data.filter_by_feature(data_main, 'season', [3]) winter_holiday, is_not_holiday = data.filter_by_feature( data_main_1, 'is_holiday', [1]) statistics.population_statistics('cnt', winter_holiday, 't1', 'cnt', 13.0, False, [statistics.mean, statistics.median]) print("Winter weekday records:") statistics.population_statistics('cnt', is_not_holiday, 't1', 'cnt', 13.0, False, [statistics.mean, statistics.median]) print("If t1>13.0, then:") print("Winter holiday records:") statistics.population_statistics('cnt', winter_holiday, 't1', 'cnt', 13.0, True, [statistics.mean, statistics.median]) print("Winter weekday records:") statistics.population_statistics('cnt', is_not_holiday, 't1', 'cnt', 13.0, True, [statistics.mean, statistics.median])
def main(argv): # loading the csv, the statistic functions and features for the program str_input = argv[2] features: list = str_input.split(", ") dictionary = data.load_data(argv[1], features) statistic_functions = [statistics.sum, statistics.mean, statistics.median] continuous_features = features[:3] """ ##############################~~QUESTION 1~~################################### holder are just spare dictionaries to store the second returned dictionary, they are not being used during the program """ data_summer, holder = data.filter_by_feature(dictionary, features[3], SUMMER) data_holiday, holder = data.filter_by_feature(dictionary, features[4], HOLIDAY) print(f"Question 1:") print(f"Summer:") data.print_details(data_summer, continuous_features, statistic_functions) print(f"Holiday:") data.print_details(data_holiday, continuous_features, statistic_functions) print(f"All:") data.print_details(dictionary, continuous_features, statistic_functions) """ ############################~~QUESTION 2~~############################### """ descriptions = ["Winter holiday records:", "Winter weekday records:"] data_winter, holder = data.filter_by_feature(dictionary, features[3], WINTER) new_data_holiday, data_weekday = data.filter_by_feature( data_winter, features[4], HOLIDAY) list_of_dictionaries = [new_data_holiday, data_weekday] new_statistic_functions = [statistics.mean, statistics.median] signs_list = ["<=", ">"] print(f"\nQuestion 2:") for i in range(SECTIONS): print(f"If {features[1]}{signs_list[i]}13.0, then:") for j in range(SECTIONS): data.population_statistics(descriptions[j], list_of_dictionaries[j], features[1], features[2], THRESHOLD, i, new_statistic_functions)
def main(argv): print("Question 1:") csv_path = argv[1] features = argv[2] features = list(features.split(", ")) data = data_lib.load_data(csv_path, features) statistic_functions = [statistics.sum, statistics.mean, statistics.median] data1, data2 = data_lib.filter_by_feature(data, "season", [1]) print("Summer:") data_lib.print_details(data1, ["hum", "t1", "cnt"], statistic_functions) data1, data2 = data_lib.filter_by_feature(data, "is_holiday", [1]) print("Holiday:") data_lib.print_details(data1, ["hum", "t1", "cnt"], statistic_functions) print("All:") data_lib.print_details(data, ["hum", "t1", "cnt"], statistic_functions) print("Question 2:") t_threshold = 13 statistic_func = [statistics.mean, statistics.median] print("If t1<=13.0, then:") data1, data2 = data_lib.filter_by_feature(data, "season", [3]) data1, data2 = data_lib.filter_by_feature(data1, "is_holiday", [1]) print("Winter holiday records:") statistics.population_statistics("Winter holiday records", data1, "t1", ["cnt"], t_threshold, False, statistic_func) print("Winter weekday records:") statistics.population_statistics("Winter weekday records", data2, "t1", ["cnt"], t_threshold, False, statistic_func) print("If t1>13.0, then:") print("Winter holiday records:") statistics.population_statistics("Winter holiday records", data1, "t1", ["cnt"], t_threshold, True, statistic_func) print("Winter weekday records:") statistics.population_statistics("Winter weekday records", data2, "t1", ["cnt"], t_threshold, True, statistic_func)
def main(argv): """ Calculates and prints statistic values for features of records from the data collected in london.csv file. :parameter: argv -- list of arguments supplied (file to run, csv file address, features to get from the csv) :returns: None """ # Question 1 # Saves the features given in a list features = (argv[2].split(sep=", ")) the_data = data.load_data(argv[1], features) statistic_functions = [sum, mean, median] # Saves the relevant records summer_data, not_summer = data.filter_by_feature(the_data, "season", [1]) holiday_data, not_holiday = data.filter_by_feature(the_data, "is_holiday", [1]) print("Question 1:") print("Summer:") data.print_details(summer_data, ["hum", "t1", "cnt"], statistic_functions) print("Holiday:") data.print_details(holiday_data, ["hum", "t1", "cnt"], statistic_functions) print("All:") data.print_details(the_data, ["hum", "t1", "cnt"], statistic_functions) # Question 2 print("\nQuestion 2") print("If t1<=13.0, then:") # Saves the relevant records winter_data, not_winter = data.filter_by_feature(the_data, "season", [3]) w_h_data, not_w_h_data = data.filter_by_feature(winter_data, "is_holiday", [1]) population_statistics("Winter holiday records:", w_h_data, "t1", ["cnt"], THRESHOLD, 0, statistic_functions[1:]) population_statistics("Winter weekday records:", not_w_h_data, "t1", ["cnt"], THRESHOLD, 0, statistic_functions[1:]) print("If t1>13.0, then:") population_statistics("Winter holiday records:", w_h_data, "t1", ["cnt"], THRESHOLD, 1, statistic_functions[1:]) population_statistics("Winter weekday records:", not_w_h_data, "t1", ["cnt"], THRESHOLD, 1, statistic_functions[1:])
def main(argv): """ Main function of the program :param argv: argv[0] = /home/student/your_path/main.py, argv[1] = /home/student/your_path/ london.csv, argv[2] = "hum, t1, cnt, season, is_holiday" :return: none """ # question 1 data_dict = data.load_data(argv[1], argv[2]) statistic_functions = [ statistics.sum_f, statistics.mean, statistics.median ] print("Question 1:") feature_list = ["hum", "t1", "cnt"] data_name, data_not_name = data.filter_by_value(data_dict, 'season', {1}) data.print_details(data_name, feature_list, statistic_functions, 'Summer') data_name, data_not_name = data.filter_by_value(data_dict, 'is_holiday', {1}) data.print_details(data_name, feature_list, statistic_functions, 'Holiday') data.print_details(data_dict, feature_list, statistic_functions, 'All') print() # before question 2 # question 2 print("Question 2:") is_above = 0 title_list = ["Winter holiday records:", "Winter weekday records:"] print("If t1<=13.0, then:") data_winter, data_not_winter = data.filter_by_value( data_dict, 'season', {3}) data_name, data_not_name = data.filter_by_value(data_winter, 'is_holiday', {1}) dict_list = [data_name, data_not_name] for m, k in enumerate(title_list): statistics.population_statistics(k, dict_list[m], 't1', 'cnt', 13.0, is_above, statistic_functions) is_above = 1 print("If t1>13.0, then:") for n, l in enumerate(title_list): statistics.population_statistics(l, dict_list[n], 't1', 'cnt', 13.0, is_above, statistic_functions)
def print_details(self, features, statistic_functions): data.print_details(self.dataset.data,features,statistic_functions)
def main(argv): data = dt.load_data(argv[1], argv[2]) dc = { "spring": ("season", [0]), "summer": ("season", [1]), "autumn": ("season", [2]), "winter": ("season", [3]), "holiday": ("is_holiday", [1]), "weekend": ("is_weekend", [1]), } print("Question 1:") for mode in ["Summer", "Holiday", "All"]: print(f"{mode}:") if mode == "All": dt.print_details(data, ["hum", "t1", "cnt"], [st.sum, st.mean, st.median]) else: val = dc[mode.lower()] dt.print_details( dt.filter_by_feature(data, val[0], val[1])[0], ["hum", "t1", "cnt"], [st.sum, st.mean, st.median], ) print("\nQuestion 2:") threshold = 13.0 winter_data, _ = dt.filter_by_feature(data, dc["winter"][0], dc["winter"][1]) holiday_data, weekday_data = dt.filter_by_feature(winter_data, dc["holiday"][0], dc["holiday"][1]) print(f"If t1<={threshold}, then:") print("Winter holiday records:") st.population_statistics( "Winter holiday records", holiday_data, "t1", "cnt", threshold, False, [st.mean, st.median], ) print("Winter weekday records:") st.population_statistics( "Winter weekday records", weekday_data, "t1", "cnt", threshold, False, [st.mean, st.median], ) print(f"If t1>{threshold}, then:") print("Winter holiday records:") st.population_statistics( "Winter holiday records", holiday_data, "t1", "cnt", threshold, True, [st.mean, st.median], ) print("Winter weekday records:") st.population_statistics( "Winter weekday records", weekday_data, "t1", "cnt", threshold, True, [st.mean, st.median], )
def population_statistics(features_description, data, treatment, target, threshold, is_above, statistic_functions): from data import print_details # non in-function import will cause circular import print(features_description) print_details(data, target, statistic_functions)