def resetPass(customCommand,test=False): from random import sample as randomize from random import random from os.path import exists # Opens the Adj, Adv, and Noun files as arrays av = open(sys.path[0]+"/Adv").read().splitlines() aj = open(sys.path[0]+"/Adj").read().splitlines() nn = open(sys.path[0]+"/Noun").read().splitlines() # Just for fun, some statistics! totalCombos = len(av)*len(aj)*len(nn) combosFormatted = "{:,}".format(totalCombos) avLengths=[] for item in av: avLengths.append(len(item)) ajLengths=[] for item in aj: ajLengths.append(len(item)) nnLengths=[] for item in nn: nnLengths.append(len(item)) from statistics import mean,median,mode print("-"*25+"\n"+ "Total adverbs: "+str(len(av))+"\n"+ "Total adjectives: "+str(len(aj))+"\n"+ "Total nouns: "+str(len(nn))+"\n"+ "Total possible combinations: "+combosFormatted+" (not factoring in numbers)\n"+ "Shortest possible passphrase length: "+str(min(avLengths)+min(ajLengths)+min(nnLengths))+"\n"+ "Longest possible passphrase length: "+str(max(avLengths)+max(ajLengths)+max(nnLengths)+5)+"\n"+ "Mean passphrase length: "+str(int(mean(avLengths)+mean(ajLengths)+mean(nnLengths)+4))+"\n"+ "Median passphrase length: "+str(int(median(avLengths)+median(ajLengths)+median(nnLengths))+4)+"\n"+ "Mode passphrase length: "+str(int(mode(avLengths)+mode(ajLengths)+mode(nnLengths))+4)+"\n"+ "-"*25) # Randomize the order of the arrays av = randomize(av,len(av)) aj = randomize(aj,len(aj)) nn = randomize(nn,len(nn)) # Pick a random word from each randomized array newAdverb = av[int(random()*len(av))].capitalize() newAdjective = aj[int(random()*len(aj))].capitalize() newNoun = nn[int(random()*len(nn))].capitalize() # Possibly add a random number from 1 to 10,000 if maybeNumber(): from math import ceil number = str(ceil(random()*10000)) else: number = '' # Assemble the passphrase newPassphrase = number+newAdverb+newAdjective+newNoun #################################################################### Needs attention print("The new passphrase will be: "+newPassphrase) print("Total entropy: ~"+str(int(entropy(newPassphrase)))) if customCommand == ' {PASSPHRASE}': print("Password display command not found. Aborting.") exit() if not test: import RouterPasswording RouterPasswording.newPassphrase(newPassphrase) from os import system as execute execute(customCommand.replace("{password}",newPassphrase).replace("{passphrase}",newPassphrase))
def find_hit_regions(primer, alignment): #this one is for all the sequences in the alignment '''this is currently super inefficient... It basically does the work of primer_coverage() for every single possible frame in a sliding window for every sequence... If I'm ok with this I should just have this function return the number of mismatches for the positions which best match... If I do that then I could have the amplicon length be something that was returned as well.....hmmm very tempting... I think I should do this. what else besides amplicon length would this allow me to do? I could also have it output potential mispriming sites, and then the amplicon length for the misprimed sites.... I could include a condition where it would print a warning if mispriming is likely, output a spreadsheet that tells you what sequences are likely to misprime, how big the amplicon for the mispriming would be... But this mispriming would only be for these particular sequences that you are tyring to amplify, A much more liekly source of mispriming would just be other random genomic DNA. A metagenome might be a good thing to run this, but that would really take a long time.....''' alignment_len = len(alignment[0]) primer_length = len(primer) number_of_frames = (alignment_len - primer_length) + 1 range_of_frames = range(0, number_of_frames) list_of_indexes = [] first_indexes = [] last_indexes = [] frame_indexes = {} for frame in range_of_frames: frame_indexes[frame] = {} frame_indexes[frame]["first"] = frame frame_indexes[frame]["last"] = frame + primer_length hit_regions = {} for seq in alignment: sequences = {} for frame in frame_indexes: sequence = seq[frame_indexes[frame]["first"]:frame_indexes[frame]["last"]] #print(sequence) sequences[frame] = sequence number_mismatches = {} for key in sequences: number_mismatches[key] = 0 for count, position in enumerate(sequences[key].upper()): #print(count, position) if position not in ambiguous_dna_values[primer[count]]: number_mismatches[key] += 1 indexes = frame_indexes[min(number_mismatches, key=number_mismatches.get)] hit_regions[seq.id] = indexes #print("number of sequences checked: {}".format(len(hit_regions))) #print("Percent complete: {}".format(len(hit_regions)/len(alignment))) #hit_regions = set(hit_regions) #print(hit_regions) starting = [] ending = [] for key in hit_regions: #print(key) starting.append(hit_regions[key]["first"]) ending.append(hit_regions[key]["last"]) #print(starting) #print(ending) starting = mode(starting) ending = mode(ending) return starting, ending
def classify(self, text): features = self.find_features(text) votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = choice_votes / float(len(votes)) return (mode(votes), conf)
def main(): print(stats.mean(range(6))) print(stats.median(range(6))) print(stats.median_low(range(6))) print(stats.median_high(range(6))) print(stats.median_grouped(range(6))) try: print(stats.mode(range(6))) except Exception as e: print(e) print(stats.mode(list(range(6)) + [3])) print(stats.pstdev(list(range(6)) + [3])) print(stats.stdev(list(range(6)) + [3])) print(stats.pvariance(list(range(6)) + [3])) print(stats.variance(list(range(6)) + [3]))
def process_file(filename): # data = np.recfromcsv(filename, delimiter=',', filling_values=numpy.nan, case_sensitive=True, deletechars='', replace_space=' ') with io.open(filename, "r", encoding="UTF-8") as source_file: data_iter = csv.DictReader(source_file) # data = [data for data in data_iter] pricelist = [] unitlist = [] for line in data_iter: pricelist.append(float(line["product_price"])) unitlist.append(line["OKEI_name"]) price_med = statistics.median(pricelist) unit_mode = statistics.mode(unitlist) # df = pd.DataFrame(data) med_outliers = [] mod_outliers = [] with io.open(filename, "r", encoding="UTF-8") as source_file: data_iter = csv.DictReader(source_file) for line in data_iter: if line["OKEI_name"] != unit_mode: mod_outliers.append(line) if (float(line["product_price"]) / price_med) > 3: med_outliers.append(line) return price_med, unit_mode, med_outliers, mod_outliers
def print_posts(posts, post_type, print_num): price_list = [] for post in posts: try: price_list.append(float(post.price)) except ValueError: pass print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{}' '%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'.format(post_type)) if price_list: print('NUM of POSTS: ', len(posts)) print('MEAN: ', statistics.mean(price_list)) print('MEDIAN: ', statistics.median(price_list)) try: print('MODE: ', statistics.mode(price_list)) print('STDEV: ', statistics.stdev(price_list)) except statistics.StatisticsError: pass for post in posts[:print_num]: pprint(post.price) pprint(post.title) pprint(post.carrier) pprint(post.description) pprint('www.kijiji.ca' + post.link)
def print_stats(l): # noqa: C901 try: print("\tMean: {}".format(mean(l))) except StatisticsError as e: print("\tMean: {}".format(str(e))) try: print("\tMedian: {}".format(median(l))) except StatisticsError as e: print("\tMedian: {}".format(str(e))) try: print("\tMode: {}".format(mode(l))) except StatisticsError as e: print("\tMode: {}".format(str(e))) try: print("\tMax: {}".format(max(l))) except StatisticsError as e: print("\tMax: {}".format(str(e))) try: print("\tMin: {}".format(min(l))) except StatisticsError as e: print("\tMin: {}".format(str(e)))
def statistics_for_time_points(time_points: list, header: str) -> str: time_in_seconds = [t.total_seconds() for t in time_points] mean_time = time.strftime("%H:%M", time.gmtime(st.mean(time_in_seconds))) median_time = time.strftime("%H:%M", time.gmtime(st.median(time_in_seconds))) std_deviation = time.strftime("%H:%M", time.gmtime(st.pstdev(time_in_seconds))) try: mode_time = time.strftime("%H:%M", time.gmtime(st.mode(time_in_seconds))) except st.StatisticsError: mode_time = "-" min_time = time.strftime("%H:%M", time.gmtime(min(time_in_seconds))) max_time = time.strftime("%H:%M", time.gmtime(max(time_in_seconds))) value_width = 5 key_width = len(header) - value_width row_format = "\n{{:<{key_width}}}{{:>{value_width}}}".format(key_width=key_width, value_width=value_width) delimiter = "\n" + "-" * len(header) stats_string = header stats_string += delimiter stats_string += row_format.format("Mean:", mean_time) stats_string += row_format.format("Median:", median_time) stats_string += row_format.format("Standard deviation:", std_deviation) stats_string += row_format.format("Mode:", mode_time) stats_string += row_format.format("Earliest:", min_time) stats_string += row_format.format("Latest:", max_time) stats_string += delimiter stats_string += "\n{} values".format(len(time_in_seconds)) return stats_string
def mode(RGB_list, count): ''' Gets mode element of a list given''' temp = [] for index in RGB_list: temp.append(index[count]) return statistics.mode(temp)
def classify(self, features): votes = [] for c in self._classifiers: #c for classifiers v = c.classify(features) #v for votes votes.append(v) #print(votes) return mode(votes)
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) result = mode(votes) return result.lower()
def get_3p_domain_stats(self, num_pages, tld_filter = None): """ determines basic stats for the number of 3p domains contacted per-page note this is distinct domain+pubsuffix, not fqdns (e.g. 'sub.example.com' and sub2.example.com' only count as 'example.com') if tracker_domains have been set the stats will reflect only third-parties which have crossed the threshold (see get_tracker_domains()) """ # each page id corresponds to a list of domains belonging to page elements page_id_to_domains_dict = {} # run query to get all page id, page domain, and element domain entries # there is no third-party filter so each page will have at least one entry for first-party domain for row in self.sql_driver.get_page_id_3p_element_domain_pairs(tld_filter): page_id = row[0] element_domain = row[1] # if the page id is not yet seen enter the current element as a fresh list # otherwise, we add to the existing list # in both cases, if there is a tracker_domain list we only add # domains that are in the list if page_id not in page_id_to_domains_dict: if self.tracker_domains: if element_domain in self.tracker_domains: page_id_to_domains_dict[page_id] = [element_domain] else: page_id_to_domains_dict[page_id] = [element_domain] else: if self.tracker_domains: if element_domain in self.tracker_domains: page_id_to_domains_dict[page_id] = page_id_to_domains_dict[page_id] + [element_domain] else: page_id_to_domains_dict[page_id] = page_id_to_domains_dict[page_id] + [element_domain] # now we determine the number of domains each page is connected to by looking at len of list of 3p domains per_page_3p_element_counts = [] for page_id in page_id_to_domains_dict: per_page_3p_element_counts.append(len(page_id_to_domains_dict[page_id])) # pages that have no 3p elements are not yet in our counts # so for all uncounted pages we add in zeros uncounted_pages = num_pages - len(per_page_3p_element_counts) while uncounted_pages > 0: uncounted_pages -= 1 per_page_3p_element_counts.append(0) # mean and median should always be ok mean = statistics.mean(per_page_3p_element_counts) median = statistics.median(per_page_3p_element_counts) # but mode can throw an error, so catch here try: mode = statistics.mode(per_page_3p_element_counts) except: mode = None return(mean, median, mode)
def basic_stats(total_data): mean = statistics.mean(total_data) median = statistics.median(total_data) mode = statistics.mode(total_data) standard_dev = statistics.stdev(total_data) return [mean, median, mode, standard_dev]
def diff1(listy): pie=listy awe=[] d=reduce(gcd,listy) for elem in listy: awe.append(elem/d) listy=awe new=[listy] old=[pie] for elem in listy: new.append(diff(new[-1])) for elem in listy: old.append(diff(old[-1])) new=new[0:-1] old=old[0:-1] loop=-1 oth=0 for elem in new: loop=loop+1 if elem.count(elem[0])==len(elem): me=loop oth=1 if oth==1: old=old[0:me] old=list(reversed(old)) start=new[0][0] loop=0 for elem in old: loop=loop+elem[-1] return(loop) else: return(mode(pie))
def validate_array(self, arr): ''' given arr if mean and stdev of *arr* is close to target_mean and target_stdev, return true ''' #print('there are {} elements'.format(len(arr))) mean = statistics.mean(arr) #median = statistics.median(arr) stdev = statistics.stdev(arr) mode = 0 # most time we could not get *mode* from this array, pass it try: mode = statistics.mode(arr) except statistics.StatisticsError: pass #print('median: {:.3f}\n'.format(media)) #print('mean: {:.3f}\nstdev: {:.3f}\n'.format(mean, stdev)) if abs(self.target_mean[0] - mean) < self.target_mean[1] \ and abs(self.target_stdev[0] - stdev) < self.target_stdev[1]: self.result_mean = mean self.result_stdev = stdev self.result_mode = mode return True return False
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) print(v) return mode(votes)
def vote(self, training_set): votes = [] for c in self.classifiers: v = c.classify(training_set) votes.append(v) return mode(votes)
def linear(y): x=list(range(1,len(y)+1)) xp=6 yn=diff(y) ynn=diff(yn) cof=np.polyfit(x,y,1) #print(cof) yon=np.polyval(cof,x) newlist=0 newlist2=0 loop=-1 for elem in y: loop=loop+1 newlist=newlist+(elem-yon[loop])**2 newlist2=newlist2+(elem-np.mean(y))**2 newlist=(1-newlist/newlist2)*100 predict=np.polyval(cof,xp) if newlist<99: try: predict=mode(y) except statistics.StatisticsError: predict=y[-1] yon=list(map(int,list(map(round,yon)))) #print(yn[-1]) #plt.plot(yon) #plt.plot(y) #print(yon,y) return(round(float(predict)))
def count_mislabels(labels, true_labels): # 2017-08-17: I will make the assumption that clusters have only 2 values. # clusters = np.unique(true_labels) # mislabels = 0 # for curr_clust in clusters: # print("for label", curr_clust) # print("\t", labels[(true_labels == curr_clust)]) # compare_to = mode(labels[(true_labels == curr_clust)]) # print("\tcompare to:", compare_to, "mislables: ", np.count_nonzero(labels[(true_labels == curr_clust)] != compare_to)) # mislabels += np.count_nonzero(labels[(true_labels == curr_clust)] != compare_to) set_a = labels[true_labels == 0] set_b = labels[true_labels == 1] if len(set_a) <= len(set_b): shorter = set_a longer = set_b else: shorter = set_b longer = set_a long_mode = mode(longer) # this what the label of the longer cluster should be. short_mode = 1 if long_mode == 0 else 0 # Choose the other value for the label of the shorter cluster # start with the longer vector: # print("The long set is", longer, "it has", np.count_nonzero(longer != long_mode), 'mislabels.') # print("The short set is", shorter, "it has", np.count_nonzero(shorter != short_mode), 'mislabels.') # np.count_nonzero(longer != long_mode) + np.count_nonzero(shorter != short_mode) return np.count_nonzero(longer != long_mode) + np.count_nonzero(shorter != short_mode)
def stats_helper(list): """ https://docs.python.org/3/library/statistics.html#statistics.pvariance :param list: :return: """ mean = statistics.mean(list) mode = None try: mode = statistics.mode(list) except statistics.StatisticsError: # no unique mode pass return { 'mean': mean, 'variance': statistics.pvariance(list, mu=mean), 'standard_deviation': statistics.pstdev(list, mu=mean), 'median': statistics.median(list), 'median_low': statistics.median_low(list), 'median_high': statistics.median_high(list), 'median_grouped': statistics.median_grouped(list), 'mode': mode }
def run(data): f = open("analyzer.log", 'a+') c = costs(data) total = total_cost(data) f.write("\n############# COST #############\n") f.write("Total Cost : {0}\n".format(total)) f.write("Total Cost Mean: {0}\n".format(mean(c))) f.write("Total Cost Median: {0}\n".format(median(c))) f.write("Total Cost Mode: {0}\n".format(mode(c))) f.write("Total Cost Variance: {0}\n".format(variance(c))) cost_action = action(data) f.write("Cost by Action: \n") for k, v in cost_action.iteritems(): f.write("\t{0} -> {1} units\n".format(k, v)) f.write("Percentage Cost by Action: \n") for k, v in cost_action.iteritems(): f.write("\t{0} -> {1} %\n".format(k, round(((v * 100.) / total), 2))) f.write("Cost Variance by Action: \n") for k, v in cost_action.iteritems(): c_action = costs_action(data, k) if len(c_action) > 1: f.write("\t{0} -> {1} units\n".format(k, round(variance(c_action), 2))) else: f.write("\t{0} -> {1} units\n".format(k, round(c_action[0], 2))) key_max, max_value = max_action_value(cost_action) f.write("More Expensive Action by value: {0} -> {1}\n".format(key_max[0], cost_action.get(key_max[0]))) key_max, max_value = max_action_percentage(cost_action, total) f.write("More Expensive Action by percentage: {0} -> {1} %\n".format(key_max, round(max_value, 2))) f.close()
def classify(self,features): votes=[] for c in self._classifier: v=c.classify(features) votes.append(v) votes.append("pos") return mode(votes)
def main(): dailymotion = acquire_dailymotion() print "Dailymotion" print "total videos: " + str(len(dailymotion[0])) print "mean views: " + str(statistics.mean(dailymotion[0])) print "median views: " + str(statistics.median(dailymotion[0])) print "STD views: " + str(statistics.stdev(dailymotion[0])) print "Average Date: " + str(convert_to_datetime(statistics.mean(dailymotion[1]))) print "Median Date: " + str(convert_to_datetime(statistics.median(dailymotion[1]))) print "Average Lengths: " + str(statistics.mean(dailymotion[2])) print "Median Lengths: " + str(statistics.median(dailymotion[2])) print "STD Lengths: " + str(statistics.stdev(dailymotion[2])) print "Top 20 most used word in title: " word_count_dailymotion("title") print "Top 20 most used word in description:" word_count_dailymotion("description") youtube = acquire_youtube() print "YouTube" print "total videos: " + str(len(youtube[0])) print "mean views: " + str(statistics.mean(youtube[0])) print "median views: " + str(statistics.median(youtube[0])) print "STD views: " + str(statistics.stdev(youtube[0])) print "Average Date: " + str(convert_to_datetime(statistics.mean(youtube[1]))) print "Median Date: " + str(convert_to_datetime(statistics.median(youtube[1]))) print "Video Definition: " , str(statistics.mode(youtube[2])) , " - " , str(youtube[2].count(statistics.mode(youtube[2]))) ,"/" , str(len(youtube[2])) print "Average Lengths: " + str(statistics.mean(youtube[3])) print "Median Lengths: " + str(statistics.median(youtube[3])) print "STD Lengths: " + str(statistics.stdev(youtube[3])) print "Top 20 most used word in title: " word_count_yt("title") print "Top 20 most used words in description: " word_count_yt("description") client.close()
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = choice_votes / len(votes) return conf
def processPackets(packet_cache, strim, rtrim): sent_tally = [] rcvd_tally = [] analyses = [] for probe_id,packets in packet_cache: try: analysis,s,r = analyzePackets(packets, timestamp_precision) analysis['probe_id'] = probe_id analyses.append(analysis) sent_tally.append(s) rcvd_tally.append(r) except Exception as e: #traceback.print_exc() sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id) db.addTrimAnalyses(analyses) db.conn.commit() return statistics.mode(sent_tally),statistics.mode(rcvd_tally)
def data_stat(rate): """Print mean, median, mode, standard deviation, max, and min of data.""" print('Mean:', stat.mean(rate)) print('Median:', stat.median(rate)) print('Mode:', stat.mode(rate)) print('S.D.:', stat.stdev(rate)) print('Max:', max(rate)) print('Min:', min(rate))
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = float(choice_votes) / float(len(votes)) return format(conf,'.2f')
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) # how many occurences of most popular vote conf = choice_votes / len(votes) # % that was the chosen category return conf
def unanimity(self, training_set): votes = [] for c in self.classifiers: v = c.classify(training_set) votes.append(v) choice = votes.count(mode(votes)) return choice / len(votes)
def get_3p_cookie_stats(self, num_pages, tld_filter = None): """ determines basic stats for the number of 3p cookies contacted per-page note that a single 3p many set more than one cookie if tracker_domains have been set the stats will reflect only third-parties which have crossed the threshold (see get_tracker_domains()) """ # each page id corresponds to a list of cookie ids page_id_to_cookie_id_dict = {} # run query to get all page id, 3p cookie id, 3p cookie domain entries for row in self.sql_driver.get_page_id_3p_cookie_id_3p_cookie_domain(tld_filter): page_id = row[0] cookie_id = row[1] cookie_domain = row[2] # if the page id is not yet seen enter the current cookie id as a fresh list # otherwise, we add to the existing list # in both cases, if there is a tracker_domain list we do not count cookies # set by domains which are not trackers if page_id not in page_id_to_cookie_id_dict: if self.tracker_domains: if cookie_domain in self.tracker_domains: page_id_to_cookie_id_dict[page_id] = [cookie_id] else: page_id_to_cookie_id_dict[page_id] = [cookie_id] else: if self.tracker_domains: if cookie_domain in self.tracker_domains: page_id_to_cookie_id_dict[page_id] = page_id_to_cookie_id_dict[page_id] + [cookie_id] else: page_id_to_cookie_id_dict[page_id] = page_id_to_cookie_id_dict[page_id] + [cookie_id] # determine the number of 3p cookies each page has by looking at len of list of cookie ids per_page_3p_cookie_counts = [] for page_id in page_id_to_cookie_id_dict: per_page_3p_cookie_counts.append(len(page_id_to_cookie_id_dict[page_id])) # pages that have no 3p cookies are not yet in our counts # so for all uncounted pages we add in zeros uncounted_pages = num_pages - len(per_page_3p_cookie_counts) while uncounted_pages > 0: uncounted_pages -= 1 per_page_3p_cookie_counts.append(0) # mean and median should always be ok mean = statistics.mean(per_page_3p_cookie_counts) median = statistics.median(per_page_3p_cookie_counts) # but mode can throw an error, so catch here try: mode = statistics.mode(per_page_3p_cookie_counts) except: mode = None return(mean, median, mode)
df["Height (cm)"] = df["Height (cm)"].astype(float) print(df.info()) print(df['Height (cm)'].unique()) print(df['Bowling Style'].unique()) from statistics import mean df["Height (cm)"].fillna(df["Height (cm)"].mean(), inplace=True) df.head() import statistics from statistics import mode print(statistics.mode(df['Bowling Style'])) df['Bowling Style'].fillna('Right-arm fast-medium', inplace=True) print(df.isnull().sum()) from google.colab import files df.to_csv('421_bowling_missingValues.csv', index=False) files.download('421_bowling_missingValues.csv') df["Bowling Style"] = df["Bowling Style"].astype('category') df["Bowling Style"] = df["Bowling Style"].cat.codes df["Mat"] = df["Mat"].astype(int) df["Inns"] = df["Inns"].astype(int) df["Balls"] = df["Balls"].astype(int)
'Season', 'Age', 'Dis', 'Trau', 'Inter', 'Fever', 'FreqAlc', 'SmokingH', 'SitHours', 'Output' ]) ilosc_danych: int = len(dane) print(dane) # -- print("\nCecha ilościowa: Season") print("""Opis: Sezon, w którym przeprowadzono analizę. 1) zima, 2) wiosna, 3) lato, 4) jesień. (-1, -0,33, 0,33, 1)""") season = dane.Season dominant = statistics.mode(season) licznik = len([1 for i in season if i == dominant]) print("Dominanta: ", dominant) print("Liczebność: ", licznik) print("Częstość: ", licznik / ilosc_danych) print("\nCecha ilościowa: Age") print("Opis: Wiek w momencie analizy. 18–36 (0, 1)") age = round(dane.Age * 18) + 18 print("Średnia: ", np.mean(age)) print("Odchylenie standardowe: ", np.std(age)) print("Mediana: ", np.median(age)) print("Maksimum: ", np.amax(age)) print("Minimum: ", np.amin(age)) print("\nCecha jakościowa: IfDiseases")
emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face)) emotion_text = emotion_labels[emotion_label_arg] emotion_window.append(emotion_text) rgb_face = np.expand_dims(rgb_face, 0) rgb_face = preprocess_input(rgb_face, False) gender_prediction = gender_classifier.predict(rgb_face) gender_label_arg = np.argmax(gender_prediction) gender_text = gender_labels[gender_label_arg] gender_window.append(gender_text) if len(gender_window) > frame_window: emotion_window.pop(0) gender_window.pop(0) try: emotion_mode = mode(emotion_window) gender_mode = mode(gender_window) except: continue if gender_text == gender_labels[0]: color = (0, 0, 255) else: color = (255, 0, 0) draw_bounding_box(face_coordinates, rgb_image, color) draw_text(face_coordinates, rgb_image, gender_mode, color, 0, -20, 1, 1) draw_text(face_coordinates, rgb_image, emotion_mode, color, 0, -45, 1, 1)
import pandas as pd import statistics import csv df = pd.read_csv("height-weight.csv") heightlist = df["Height(Inches)"].to_list() weightlist = df["Weight(Pounds)"].to_list() heightmean = statistics.mean(heightlist) heightmedian = statistics.median(heightlist) heightmode = statistics.mode(heightlist) heightstdev = statistics.stdev(heightlist) print(heightmean) print(heightmedian) print(heightmode) print(heightstdev) firststart = heightmean - heightstdev firstend = heightmean + heightstdev secondstart = heightmean - 2*heightstdev secondend = heightmean + 2*heightstdev thirdstart = heightmean - 3*heightstdev thirdend = heightmean + 3*heightstdev first = [result for result in heightlist if result > firststart and result < firstend] second = [result for result in heightlist if result > secondstart and result < secondend] third = [result for result in heightlist if result > thirdstart and result < thirdend]
def binary_classifier(): train = pd.read_csv('dataset/train2.tsv', delimiter='\t', encoding='utf-8') test = pd.read_csv('dataset/test2.tsv', delimiter='\t', encoding='utf-8') x_test = test.iloc[:, [3, 15]] x_test = np.asarray(x_test) x_test = x_test.tolist() X_test = [] for d in x_test: d = str(d[0]) d = d + str(d[1]) X_test.append(d) y_test = test.iloc[:, 2:3] y_test = np.asarray(y_test) x_train = train.iloc[:, [3, 15]] x_train = np.asarray(x_train) x_train = x_train.tolist() X = [] for d in x_train: d = str(d[0]) d = d + str(d[1]) X.append(d) y_train = train.iloc[:, 2:3] y_train = np.asarray(y_train) f = open("statement.txt", "r") statement = f.read() f = open("justification.txt", "r") justification = f.read() sample_text = statement + justification # Input Word Embeddings ctv = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}', ngram_range=(1, 3), stop_words='english') ctv.fit(list(X) + list(X_test)) sample = [] sample.append(str(sample_text)) xtrain_ctv = ctv.transform(X) xtest_ctv = ctv.transform(X_test) xsample = ctv.transform(sample) y_train_binary = convert_to_bin(y_train) y_train_binary = np.asarray(y_train_binary) # Output (True:1, False:0) le = preprocessing.LabelEncoder() y_train_binary = le.fit_transform(y_train_binary) print("Logistic Regression") logmodel = LogisticRegression() logmodel.fit(xtrain_ctv, y_train_binary) # NaiveBayes print("NB") nbmodel = MultinomialNB() nbmodel.fit(xtrain_ctv, y_train_binary) # Deep Learning Model print("GRU") # Glove embeddings_index = {} f = open('glove.42B.300d.txt', encoding='utf8') for line in tqdm(f): values = line.split() word = ''.join(values[:-300]) coefs = np.asarray(values[-300:], dtype='float32') embeddings_index[word] = coefs f.close() xtrain_glove = [sent2vec(x) for x in tqdm(X)] xtest_glove = [sent2vec(x) for x in tqdm(X_test)] # Scaling scl = preprocessing.StandardScaler() xtrain_glove_scl = scl.fit_transform(xtrain_glove) xtest_glove_scl = scl.transform(xtest_glove) token = text.Tokenizer(num_words=None) max_len = 300 token.fit_on_texts(list(X) + list(X_test)) xtrain_seq = token.texts_to_sequences(X) xvalid_seq = token.texts_to_sequences(X_test) xsample_seq = token.texts_to_sequences(sample) # zero pad the sequences xtrain_pad = sequence.pad_sequences(xtrain_seq, maxlen=max_len) xtest_pad = sequence.pad_sequences(xvalid_seq, maxlen=max_len) xsample_pad = sequence.pad_sequences(xsample_seq, maxlen=max_len) word_index = token.word_index embedding_matrix = np.zeros((len(word_index) + 1, 300)) for word, i in tqdm(word_index.items()): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector nn = Sequential() nn.add( Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=max_len, trainable=False)) nn.add(SpatialDropout1D(0.3)) nn.add(GRU(300, dropout=0.3, recurrent_dropout=0.3, return_sequences=True)) nn.add(GRU(300, dropout=0.3, recurrent_dropout=0.3)) nn.add(Dense(1024, activation='relu')) nn.add(Dropout(0.8)) nn.add(Dense(1024, activation='relu')) nn.add(Dropout(0.8)) nn.add(Dense(1, activation='sigmoid')) nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) nn = load_model('gru_bin.h5') # Ensemble print("Ensemble") pred1_test = logmodel.predict(xsample) pred2_test = nbmodel.predict(xsample) pred3_test = nn.predict(xsample_pad) pred3_bin = [] for i in pred3_test: if i >= 0.55: pred3_bin.append(1) else: pred3_bin.append(0) data = [pred1_test[0], pred2_test[0], pred3_bin[0]] output = mode(data) labels = list(le.inverse_transform([0, 1])) output_file = open("binary_output.txt", "w") output_file.write("Output is " + str(output)) output_file.write("\n") output_file.write("0,1 correspond to " + str(labels) + " respectively") output_file.close()
def Calcular_Mode(self, pdValores): nbrMode = stats.mode(pdValores) return nbrMode
except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) emotion_text = emotion_labels[emotion_label_arg] emotion_window.append(emotion_text) if len(emotion_window) > frame_window: emotion_window.pop(0) try: emotion_mode = mode(emotion_window) except: continue if emotion_text == 'angry': color = emotion_probability * np.asarray((255, 0, 0)) elif emotion_text == 'sad': if freq['min'] < -0.13 and freq['max'] > 0.2: print("CRYING") break color = emotion_probability * np.asarray((0, 0, 255)) elif emotion_text == 'happy': color = emotion_probability * np.asarray((255, 255, 0)) elif emotion_text == 'surprise': color = emotion_probability * np.asarray((0, 255, 255)) else:
import csv import statistics with open('car_data.csv') as csv_file: reader = csv.reader(csv_file) for cat in reader: prices = [int(price[1:].replace(',', '')) for i, row in enumerate(reader) if i is 0 for price in row[1:] if price != 'N/A' and price != ''] mean = statistics.mean(prices) median = statistics.median(prices) mode = statistics.mode(prices) print(f'Mean: {mean}, Median: {median}, Mode: {mode}')
import plotly.express as px import plotly.figure_factory as ff import statistics dice_result = [] for i in range(0, 1000): dice1 = random.randint(1, 6) dice2 = random.randint(1, 6) dice_result.append(dice1 + dice2) mean = sum(dice_result) / len(dice_result) print(mean) median = statistics.median(dice_result) print(median) mode = statistics.mode(dice_result) print(mode) std_deviation = statistics.stdev(dice_result) print(std_deviation) first_std_deviation_start, first_std_deviation_end = mean - std_deviation, mean + std_deviation second_std_deviation_start, second_std_deviation_end = mean - ( 2 * std_deviation), mean + (2 * std_deviation) third_std_deviation_start, third_std_deviation_end = mean - ( 3 * std_deviation), mean + (3 * std_deviation) list_of_data_within_1_std_deviation = [ result for result in dice_result if result > first_std_deviation_start and result < first_std_deviation_end ] list_of_data_within_2_std_deviation = [
def mode(self): return statistics.mode(self.numbers)
def run(workDir): allLines = "" with open(workDir+"/wordsList.txt",mode="r") as input: allLines = input.read() allWords = allLines.split("\n") if not os.path.exists(workDir+"/wordStats"): os.system('mkdir '+workDir+"/wordStats") for word in allWords: if len(word) == 0 or len(word) > 250: continue if not os.path.exists(workDir+"/words/"+word+".csv"): continue with open(workDir+"/words/"+word+".csv",mode="r") as input: allLines = input.read() lines = allLines.split("\n") scoreHash = {} scoreArr = [] for line in lines: arr = line.split(",") if len(arr) != 6: continue if float(arr[1]) == 0: continue if arr[5] not in scoreHash: scoreHash[float(arr[5])] = [] scoreArr.append(float(arr[5])) scoreHash[float(arr[5])].append(float(arr[1])) scoreArr.sort() targets = [] set = [] i = -1.0 allOutput = [] while i < 1: targets = [] set = [] for j in range(0,len(scoreArr)): if scoreArr[j] >= i-0.05 and scoreArr[j] < i+0.05: targets.append(scoreArr[j]) for j in range(0,len(targets)): for k in range(0,len(scoreHash[targets[j]])): set.append(scoreHash[targets[j]][k]) if len(set) == 0: allOutput.append(str(round(i,1))+",0,0,0,0,0,0") elif len(set) == 1: allOutput.append(str(round(i,1))+","+str(len(set)) +","+str(statistics.mean(set)) +","+str(statistics.median(set)) +",0,0,"+str(statistics.mode(set))) else: modeVals = mode(set) modeVal = max(modeVals.mode) allOutput.append(str(round(i,1))+","+str(len(set)) +","+str(statistics.mean(set)) +","+str(statistics.median(set)) +","+str(statistics.stdev(set)) +","+str(skew(set)) +","+str(modeVal) ) i += 0.1 with open(workDir+"/wordStats/"+word+".stats.csv",mode="w") as output: output.write("\n".join(allOutput))
func19(numbs) #21 list=[] for x in range(10): list.append(random.randint(25,110)) print(list) #24 random.shuffle(numbs) print("shuffle ",numbs) #27 list27=[1, 5, 23, 5, 12, 2, 5, 1, 18, 5] print("mode ",statistics.mode(list27)) #29 st="python php pascal javascript java c++" list29=st.split() print(list29) print(max(list29, key=len)) #30 list=[] list27=[1, 5, 23, 5, 12, 2, 5, 1, 18, 5] list27.sort() print(sum(list27)/len(list27)) print(statistics.median(list27)) print(statistics.mode(list27))
def descriptiveAnalysis(self, x, isSample=True): '''Performs basic analysis on a data set, calculates mean, median, standard deviation, etc. Inputs ------- x : numpy.array object The dataset isSample : Boolean (True/False) Some statistical calculations depend upon whether the data is sample or population data showOutput : Boolean (True/False) Whether or not to print out the resulting statistics. Otherwise, the results will only be returned as a dictionary. Outputs ------- stats : dictionary Dictionary containing the calculated statistics ''' self.data = x # sample size self.size = x.size # range data self.min = np.min(x) self.max = np.max(x) self.range = self.max - self.min # Quartiles self.q1 = np.percentile(x, 25) self.q2 = np.percentile(x, 50) self.q3 = np.percentile(x, 75) self.interquartileRange = self.q3 - self.q1 self.mean = np.mean(x) # Mode (most common number) is a robust measure of central location # for nominal level data try: self.mode = mode(x) except StatisticsError as e: logging.exception(e) # TODO what to do in this scenario? Need to set mode # The median is a robust measure of central location for ordinal level # data, and is less affected by the presence of outliers in your data. # When the number of data points is odd, the middle data point is # returned. When the number of data points is even, the median is # interpolated by taking the average of the two middle values # # This is suited for when your data is discrete, and you don’t mind # that the median may not be an actual data point. # # If your data is ordinal (supports order operations) but not numeric # (doesn’t support addition), you should use median_low() or # median_high() instead. self.median = np.median(x) if isSample: ## Sample Data self.stdev = stdev(x) # Variance, or second moment about the mean, is a measure of the # variability (spread or dispersion) of data. A large variance # indicates that the data is spread out; a small variance indicates # it is clustered closely around the mean. self.variance = variance(x) # == stdev**2 else: ## Population Data self.stdev = pstdev(x) self.variance = pvariance(x) # Pearson's second skewness coefficient (median skewness) self.skewCoefficient = 3 * (self.mean - self.median) / self.stdev return self
# Using cv2.putText() method # frame = cv2.putText(frame, 'Left', org, font, fontScale, color, thickness, cv2.LINE_AA) leftwidth.append(w) leftheight.append(h) elif x > b / 2: # frame = cv2.putText(frame, 'Right', org, font, fontScale, color, thickness, cv2.LINE_AA) rightwidth.append(w) rightheight.append(h) cap.release() if b in leftwidth: leftwidth.remove(b) if a in leftheight: leftheight.remove(a) lw = statistics.mode(leftwidth) lh = statistics.mode(leftheight) if b in rightwidth: rightwidth.remove(b) if a in rightwidth: rightheight.remove(a) rw = statistics.mode(rightwidth) rh = statistics.mode(rightheight) avgw = (lw + rw) / 2 avgh = (lh + rh) / 2 print(int(avgw), int(avgh)) # t = int(time.time())*1000
}, { "$sort": { "avgCount": -1 } }] sumByZip = db.crimes.aggregate(pipeline) averages = [] for zipEntry in sumByZip: print zipEntry["_id"], "=", zipEntry["avgCount"] averages.append(zipEntry["avgCount"]) medianValue = statistics.median(averages) avg = statistics.mean(averages) mode = statistics.mode(averages) stdev = statistics.stdev(averages, medianValue) print "==============================" print "mean/avg crimes per zipcode", avg print "median crimes per zipcode", medianValue print "mode crimes per zipcode", mode print "stdev crimes per zipcode", stdev # db.Listing.find().forEach(function(item){ # db.Listing.update({_id: item._id}, {$set: { LowerCaseAddress: item.Address.toLowerCase() }}) # }) def read_file(filename): with open(filename, 'r') as f:
skewness.shape[0])) # Now let's apply the box-cox transformation to correct for skewness skewed_features = skewness.index lam = 0.15 for feature in skewed_features: all_data[feature] = boxcox1p(all_data[feature], lam) # Creating a new feature: Total Square Footage all_data['TotalSF'] = all_data['TotalBsmtSF'] + all_data[ '1stFlrSF'] + all_data['2ndFlrSF'] # Identifying features where a class is over 97% represented low_var_cat = [ col for col in all_data.select_dtypes(exclude=['number']) if 1 - sum(all_data[col] == mode(all_data[col])) / len(all_data) < 0.03 ] low_var_cat # Dropping these columns from both datasets all_data = all_data.drop( ['Street', 'Utilities', 'Condition2', 'RoofMatl', 'Heating', 'PoolQC'], axis=1) # List of columns to Label Encode cols = ('FireplaceQu', 'BsmtQual', 'BsmtCond', 'GarageQual', 'GarageCond', 'ExterQual', 'ExterCond', 'HeatingQC', 'KitchenQual', 'BsmtFinType1', 'BsmtFinType2', 'Functional', 'Fence', 'BsmtExposure', 'GarageFinish', 'LandSlope', 'LotShape', 'PavedDrive', 'Alley', 'CentralAir', 'MSSubClass', 'OverallCond', 'YrSold', 'MoSold')
def func(video_path): # file to store metadata metaData = open( 'C:/Users/ASUS/Desktop/Face Recognition/trial1/Face Detection and Emotion Analysis/src/final1.csv', 'a') writer = csv.writer(metaData) # parameters for loading data and images detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detection = load_detection_model(detection_model_path) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] # starting lists for calculating modes emotion_window = [] toc = time.time() # starting video streaming cv2.namedWindow('window_frame') #video_capture = cv2.VideoCapture(sys.argv[1]) video_capture = cv2.VideoCapture(video_path) #video_capture = cv2.VideoCapture('videoplayback.mp4') while True: bgr_image = video_capture.read()[1] gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) faces = detect_faces(face_detection, gray_image) frame_count = int(video_capture.get(cv2.CAP_PROP_POS_FRAMES)) tic = time.time() for face_coordinates in faces: x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue actor_face = cv2.resize(gray_face, (128, 128)) cv2.imwrite( "E:/tensorflow-master/tensorflow/examples/image_retraining/face.jpg", actor_face) video_capture.set(1, int(frame_count)) ret, frame = video_capture.read() cv2.imwrite( "E:/Object Detection/models-master/tutorials/image/imagenet/object.jpg", gray_image) gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) emotion_text = emotion_labels[emotion_label_arg] emotion_window.append(emotion_text) s2_out = subprocess.check_output([ sys.executable, "E:/tensorflow-master/tensorflow/examples/label_image/label_image.py", "--graph=E:/tmp/output_graph.pb", "--labels=E:/tmp/output_labels.txt", "--input_layer=Mul", "--output_layer=final_result", "--input_mean=128", "--input_std=128", "--image=E:/tensorflow-master/tensorflow/examples/image_retraining/face.jpg" ]) actor_confidence = s2_out.split()[1] if (float(actor_confidence) > 0.5): actor = s2_out.split()[0] else: actor = "" print(s2_out) s3_out = subprocess.check_output([ sys.executable, "E:/Object Detection/models-master/tutorials/image/imagenet/classify_image.py", "--image_file=E:/Object Detection/models-master/tutorials/image/imagenet/object.jpg" ]) object1 = s3_out.split()[0] print(s3_out) writer.writerows([[(tic - toc), frame_count, emotion_text, emotion_probability, actor, actor_confidence, face_coordinates, object1]]) if len(emotion_window) > frame_window: emotion_window.pop(0) try: emotion_mode = mode(emotion_window) except: continue if emotion_text == 'angry': color = emotion_probability * np.asarray((255, 0, 0)) elif emotion_text == 'sad': color = emotion_probability * np.asarray((0, 0, 255)) elif emotion_text == 'happy': color = emotion_probability * np.asarray((255, 255, 0)) elif emotion_text == 'surprise': color = emotion_probability * np.asarray((0, 255, 255)) else: color = emotion_probability * np.asarray((0, 255, 0)) color = color.astype(int) color = color.tolist() draw_bounding_box(face_coordinates, rgb_image, color) draw_text(face_coordinates, rgb_image, emotion_mode, color, 0, -20, 1, 1) draw_text(face_coordinates, rgb_image, actor, color, 0, -45, 1, 1) bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imshow('window_frame', bgr_image) if cv2.waitKey(1) & 0xFF == ord('q'): break
bledne_data = bledny_input_file.read() bledne_data = bledne_data.splitlines() bledne_sequences = [] for i in range(0, len(bledne_data), 2): bledne_sequences.append([bledne_data[i], bledne_data[i + 1]]) bledne_wyrazy = [] for i in bledne_sequences: bledne_roznice = [] bledne_sequence = i[1] bledne_sequence = [int(x) for x in bledne_sequence.split()] most_common_difference = 0 for j in range(len(bledne_sequence) - 1): bledne_roznice.append(bledne_sequence[j + 1] - bledne_sequence[j]) most_common_difference = mode(bledne_roznice) for j, roznica in enumerate(bledne_roznice): if roznica != most_common_difference: if j == 0: bledne_wyrazy.append(bledne_sequence[0]) break else: # tu teoretycznie mozliwe jest wyjscie out of bonds, ale tak naprade nie jest mozliwe, bo jest jest bledny element to poprzedni tez bedzie bledny bledne_wyrazy.append(bledne_sequence[j + 1]) break print(bledne_wyrazy)
median = (len // 2) median = X[median] else: a = len // 2 median1 = X[a] median2 = X[a - 1] median = (median1 + median2) / 2 #find the mode most_frequent = 0 num = 0 for i in X: fre = X.count(i) if (most_frequent < fre): most_frequent = fre num = i print(mean) print(median) print(num) #using stastics module from statistics import mean, median, mode #find the mean mean = mean(X) #find the median median = median(X) #find the mode mode = mode(X)
continue # Save figures and targets np.save(f'{path_dest}original/{i}_{n_ndl}.npy', orig_small) np.save(f'{path_dest}inpainted/{i}_{n_ndl}.npy', last_small) np.savez_compressed(f'{path_dest}mask/{i}_{n_ndl}', mask_small) names_to_save.append(f'{i}_{n_ndl}') # Get the malignancy score malignancy_original = df_one_nodule.malignancy.values malignancies_original.append(malignancy_original) malignancy = list(map(transform_malignancy, malignancy_original)) malignancy = list(filter(None, malignancy)) malignancies.append(malignancy) try: malignancy_mode = mode(malignancy) malignancies_mode.append(malignancy_mode) # Next lines are to append to malignancies_mode_3_agree (if at least 3 reviewers agree on malignancy) agree_with_mode = [ 1 if malignancy_mode == i else 0 for i in malignancy ] agree_with_mode = np.sum(agree_with_mode) if agree_with_mode >= 3: malignancies_mode_3_agree.append(malignancy_mode) names_to_save_3_agree.append(f'{i}_{n_ndl}') except StatisticsError: continue # These coords can be used to 'plot_block_and_cube' # coords_Z_small = coords_Z - z_min_f # coords_X_small = coords_X - x_min_f
k = 5 print(x1,x2,y) nx1 = 8.093607 nx2 = 3.3365732 ddistance = [] for i in range(10): A = np.sum((x1[i]-nx1) ** 2 + (x2[i]-nx2) ** 2) Distance = np.sqrt(A) ddistance.append(Distance) for i in range(len(ddistance)): for j in range(len(ddistance)-i-1): if ddistance[j]<ddistance[j+1]: ddistance[j],ddistance[j+1]=ddistance[j+1],ddistance[j] y[j],y[j+1]=y[j+1],y[j] print("Distance",ddistance) print("Y",y) nbr=list() for i in range(k): nbr.append(y[i]) print(nbr) knn = mode(nbr) print(knn)
def part_1(input_list): sleep_dict, minute_dict = parse_log(input_list) guard = sorted(sleep_dict.items(), key=lambda x: x[1])[-1][0] return guard * mode(minute_dict[guard])
import math # length of a diagonal(대각선의 길이) l = 4 w = 10 d = math.sqrt(l**2 + w**2) print(d) print(math.pow(2, 3)) import random print(random.randint(0,100)) import statistics nums = [1, 5, 33, 12, 46, 33, 2] print(statistics.mean(nums)) print(statistics.median(nums)) print(statistics.mode(nums)) import keyword print(keyword.iskeyword("for")) print(keyword.iskeyword("football")) import hello print(hello.print_hello()) import os print(os.path.join("Users", "bob", "st.txt"))
my_list = [1, 2, 5, 700, 300000] # Find and print the average of num_list (2pts) print(sum(num_list) / len(num_list)) # Remove the lowest number from num_list (2pt) num_list.sort() del num_list[0] print(num_list) # Create and print a new list called top_ten which contains only the 10 highest numbers in num_list(2pts) top_ten = [] top_ten = num_list top_ten.sort() del top_ten[:-10] print(top_ten) # PROBLEM 4 (4pts) # Find the number which appears most often in num_list? print(mode(num_list)) # CHALLENGE PROBLEMS (2pts) # TOUGH PROBLEMS, BUT FEW POINTS # Find the number of prime numbers in num_list? # Hint: One way is to just start removing the ones that aren't # Find the number of palindromes # Hint: This may be easier to do with strings
def mode(cls, numbers): return statistics.mode(numbers)
# Project Euler Solutions: Problem 059 # Copyright (c) noicepollution. All Rights Reserved. # Solution timestamp: 00:39, 18 March 2020 # https://github.com/noicepollution/project-euler from statistics import mode inp_str = open('problem59.txt').read().split(',') inp_str = [int(i) for i in inp_str] lsts = [] for i in range(0, 3): lst = [] for j in range(i, len(inp_str), 3): lst.append(inp_str[j]) lsts.append(lst) freqs = [mode(i) for i in lsts] key = [i ^ 32 for i in freqs] res = sum([inp_str[i] ^ key[i % 3] for i in range(0, len(inp_str))]) print(res)
xValuesMean = statistics.mean(dataListedSeriesA) print("The mean is ", xValuesMean) xValuesMedian = statistics.median(dataListedSeriesA) print("The median is ", xValuesMedian) xValuesVari = statistics.variance(dataListedSeriesA) print("The variance is ", xValuesVari) xValuesHar = statistics.harmonic_mean(dataListedSeriesA) print("The Harmonic Mean is ", xValuesHar) # The mode can sometimes have an error if there is more than one most common number try: xValuesMode = statistics.mode(dataListedSeriesA) print("The Mode is ", xValuesMode) except: print("Plot twist...THERE IS NO MODE!") """ The csv used for this example: 2013,4,5 2013,4,5 2014,6,7 2015,8,8 2016,15,9 2017,15,10 """
def classify(self, features): votes = [] for c in self.classifiers: v = c.classify(features) votes.append(v) return mode(votes)
def test_mode(series): assert eq( c.aggregate(c.ReduceFuncs.Mode(c.item(0))).execute(series), statistics.mode(x[0] for x in series), )
i += 1 print('Columna: %d' % i) for row in range(a[0] - 1): j += 1 if i * j % random.randint( 1000, 2000) == 0: # Generates few missing values per feature X[j, i] = '' ### 2. HANDLING MISSING DATA ################################################ # 2.1 Replacing missing data in categorical variable my most common value from statistics import mode for row in range(a[0] - 1): # 3.2 Handling missing data in Geography if X[row, 4] == '': X[row, 4] = mode(X[:, 4]) for row in range(a[0] - 1): # 3.3 Handling missing data in Gender if X[row, 5] == '': X[row, 5] = mode(X[:, 5]) for row in range(a[0] - 1): # 3.4 Handling missing data in HasCrCard if X[row, 10] == '': X[row, 10] = mode(X[:, 10]) for row in range(a[0] - 1): # 3.5 Handling missing data in IsActive if X[row, 11] == '': X[row, 11] = mode(X[:, 11]) # 2.2 Replacing missing data in countinous variable with mean of column from sklearn.preprocessing import Imputer