def ultimate_evaluate(model): genres = ['action', 'drama', 'horror', 'romance'] testingData = [] testingLabels = [] total = defaultdict.fromkeys(range(len(genres)), 0) correct = defaultdict.fromkeys(range(len(genres)), 0) yTrue, yPredict = [], [] for genreIndex, genre in enumerate(genres): try: genreFeatures = load_pkl(genre + "_histogram_test") genreFeatures = np.array([np.array(f) for f in genreFeatures]) # numpy hack except Exception as e: print e return print "OK." for videoFeatures in genreFeatures: total[genreIndex] += 1 d = defaultdict(int) predictedClasses = model.predict( videoFeatures) #List of predictions, per-frame print predictedClasses for i in predictedClasses: d[i] += 1 predictedGenre = max(d.iteritems(), key=lambda x: x[1])[0] yPredict.append(predictedGenre) yTrue.append(genreIndex) if predictedGenre == genreIndex: correct[genreIndex] += 1 print correct, total confusionMatrix = confusion_matrix(yTrue, yPredict) print confusionMatrix
def __init__(self, u_dict, i_dict, user_list, item_list, k): self.user_dict = u_dict self.item_dict = i_dict self.user_list = user_list self.item_list = item_list self.alpha = 4.2 self.b_u = defaultdict.fromkeys(user_list, 0.1) self.b_i = defaultdict.fromkeys(item_list, 0.1) self.b_u.default_factory = float self.b_i.default_factory = float self.g_u = self.construct_g_u(k) self.g_i = self.construct_g_i(k) self.g_u.default_factory = lambda: np.zeros(k) self.g_i.default_factory = lambda: np.zeros(k)
def _update_features(id): try: webapp = Webapp.objects.get(pk=id) except Webapp.DoesNotExist: _log(id, u'Webapp does not exist') return # We only detect features on packaged webapps. if not webapp.is_packaged: _log(id, u'Webapp is not a packaged app') return # If the app doesn't have a current_version, don't bother either. if not webapp.current_version: _log(id, u'Webapp does not have a current_version') return # If the app already has a non-empty feature profile, don't touch it. features = webapp.current_version.features if features.to_list(): _log(id, u'Webapp already has a non-empty feature profile') return version = webapp.current_version res = run_validator(version.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. feature_profile = validation_result['feature_profile'] keys = ['has_%s' % feature.lower() for feature in feature_profile] data = defaultdict.fromkeys(keys, True) # Update features. features.update(**data)
def extract_nsc(text_body): first_pass_regex_match = [ rtn_regex_grp(p, text_body) for p in nsc_prefix_lst_short ] second_pass_regex_match = re.findall( r'([H|L|J|C|N|Q|F|M|0]{1}[0-9]{2,}\w+)', text_body) cmb_regex_matches = list( set(second_pass_regex_match + [m for m in first_pass_regex_match if m])) try: vouch_nsc = [ r for r in cmb_regex_matches if len(r) < 10 and len(r) > 4 ] except: vouch_nsc = ["Didn't find any NSCs"] print(f'NSCs: {vouch_nsc}') if len(cmb_regex_matches) > 1: vouch_nsc = nsc_exclude(vouch_nsc) if not vouch_nsc: # if after removing CH formulas and the list becomes empty vouch_nsc = ["Didn't find any NSCs"] vouch_nsc = list(defaultdict.fromkeys(vouch_nsc).keys()) else: if not vouch_nsc: vouch_nsc = ["Didn't find any NSCs"] # filter the nscs by values that have all uppercase vouch_nsc = list( filter(lambda x: not any([c.islower() for c in x]), vouch_nsc)) return vouch_nsc
def get_data_and_recipes(self): #### Getting the data recipes_df = pd.read_csv(r'./data/full_dataframe.csv', na_values=['< 1']).fillna(0) recipes_df['ingredients'] = recipes_df.ingredients.apply(literal_eval) recipes_df['recipe_name'] = recipes_df['recipe_name'].astype(str) recipes_df = recipes_df[recipes_df['total_time'] != 0] recipes_df = recipes_df.drop_duplicates(subset=['recipe_name']) print("total number of recipes:", len(recipes_df)) ####Initalize dictionary with all of the recipe names and a proportion of 0 recipe_list = defaultdict.fromkeys(recipes_df['recipe_name'].to_list(), 0) ####Initialize dictionary # all_ingredients = defaultdict(int) # ####Populate dictionary of all ingredients in recipe csv # for ind in recipes_df.index: # temp_list = recipes_df['ingredients'][ind] # for item in temp_list: # all_ingredients[str(item)] += 1 # print("\nUser ingredients: ", user_ingred_list) # print("\nPlease wait...\n") ### Filtering recipe strictly for ingredients n = len(self.user_ingred_list) # if self.strict == "n": # recipes_df = recipes_df[recipes_df["n_ingredients"] <= n] # pass ####Filtering: proportions and banned ingredients for index, row in recipes_df.iterrows(): ingre_list = row['ingredients'].copy() nn = len(ingre_list) if self.strict == "y": nn = len( ingre_list ) * 0.001 # the priority is: the ingredients in recipe are in user's ingredients --> Minimalist list of ingredients else: nn = n # the priority is: the user´s ingredients are in recipe's ingredients --> Long list of ingredients for user_rec in self.user_ingred_list: r = re.compile(r".*\b(" + user_rec + ")\\b", flags=re.IGNORECASE) match = list(filter(r.match, ingre_list)) if match: if self.user_banned_ingre != []: for user_ban in self.user_banned_ingre: r_1 = re.compile(r".*\b(" + user_ban + ")\\b", flags=re.IGNORECASE) match_1 = list(filter(r_1.match, ingre_list)) if not match_1: recipe_list[row['recipe_name']] += 1 / nn # ingre_list.remove(match[0]) else: recipe_list[row['recipe_name']] += 1 / nn # ingre_list.remove(match[0]) return recipe_list, recipes_df
def _ngram_values(string_list, n, readjust_zero_scores=True): '''Given the corpus of strings in 'string_list', computes n-gram statistics across the corpus. Returns the results as a dictionary containing all possible n-grams, where the dictionary keys are the n-grams as strings (e.g., 'aa', 'ab', 'ac', ...) and the dictionary values dictionary are the named tuple NGramData. The numeric values inside the NGramData reflect the frequency statistics for that n-gram across the whole corpus. The optional argument 'readjust_zero_scores' governs what happens to the IDF values assigned to n-grams that do not appear in the corpus at all. If readjust_zero_scores = False, nothing is done, and the values are left at 0. If readjust_zero_scores = True, the value is set equal to the highest IDF value found across the 'string_list' corpus. (In our application, values of 0 in this situation are *not* desirable. In IDF terms, a lower value indicates a more frequently-seen n-gram, whereas in our application, we look for uncommon n-grams and thus we want never-seen n-grams to have a *high* value. This *could* be handled by detecting them when computing string scores, but that simply introduces needless repeated if-then tests in the step of computing scores for strings. It is more efficient to store the desired value. This is the reason the default is readjust_zero_scores = True. Note that it is still possible to determine that a given n-gram does not appear in the corpus simply by looking at the string_frequency field of the NGramData tuple for that n-gram, so we do not really lose any information by doing this.) ''' counts = defaultdict(int) occurrences = defaultdict(set) num_strings = 0 for s in string_list: s = s.lower() num_strings += 1 for ngram in ngrams(s, n): occurrences[ngram].add(s) counts[ngram] += 1 # Set initial values for all n-grams. all_ngrams = defaultdict.fromkeys( _all_possible_ngrams(n), NGramData(string_frequency=0, total_frequency=0, idf=0)) # Set n-gram values based on occurrences in the corpus. max_frequency = max([count for ngram, count in counts.items()]) for ngram, string_list in occurrences.items(): string_freq = len(string_list) total_freq = counts[ngram] score = _ngram_idf_value(num_strings, string_freq, total_freq, max_frequency) all_ngrams[ngram] = NGramData(string_frequency=string_freq, total_frequency=total_freq, idf=score) # Now that we've seen all n-grams actually present in the corpus, go back # and set those that have 0 values to a very high value (=> rare n-gram). if readjust_zero_scores: max_idf = ceil(_highest_idf(all_ngrams)) for ngram, value in all_ngrams.items(): if value.idf == 0: # Can't set a value in an existing tuple; must regenerate tuple all_ngrams[ngram] = NGramData(string_frequency=0, total_frequency=0, idf=max_idf) return all_ngrams
def extract_with_lat_lon(): file_name = '../data/fifi_data.xlsx' required_columns = ['Service Request Number', 'Created Date', 'Location', 'Location Details', 'Description'] xls = pd.ExcelFile(file_name) fifi_dict = defaultdict.fromkeys(xls.sheet_names) # sheet_to_df_map = pd.read_excel(file_name, sheet_name=xls.sheet_names, usecols=required_columns, parse_dates=True) for name in fifi_dict.keys(): fifi_dict[name] = pd.read_excel(xls, name, usecols=required_columns) # Assign the sheet name/key as category for each dataframe, so that the dataframes can be distinguished by category post merge. for name, value in fifi_dict.items(): fifi_dict[name]['Category'] = name df = pd.concat(fifi_dict, ignore_index=True) df['location_latitude'] = df['Location Details'].str.extract("LatLng: (.*),.*$", expand=True) df['location_longitude'] = df['Location Details'].str.extract("LatLng: .*, (.*)$", expand=True) df['location_X'] = df['Location Details'].str.extract("XY: (.*),.*LatLng:.*$", expand=True) df['location_Y'] = df['Location Details'].str.extract("XY: .*,(.*); LatLng:.*$", expand=True) # df = pd.read_csv('fifi_cleaned.csv', parse_dates=True) df['zipcode'] = df.apply(lambda x: get_zipcode(str(x.location_latitude).strip(), str(x.location_longitude).strip()), axis=1) df.to_csv('../data/fifi_cleaned.csv')
def extract_nsc(text_body): # vouch_nsc = re.findall(r'(^[JCNQFM]{1}\w{1,}\d{2,})', text_body) # vouch_nsc = re.findall(r'([^ABD-IK-LO-PR-Zabd-ik-lo-pr-z]\w+\d{2,})', text_body) first_pass_regex_match = [ rtn_regex_grp(p, text_body) for p in nsc_prefix_lst_short ] second_pass_regex_match = re.findall(r'([J|C|N|Q|F|M|0]{1}\d{2,}\w+)', text_body) cmb_regex_matches = list( set(second_pass_regex_match + [m for m in first_pass_regex_match if m])) try: vouch_nsc = [ r for r in cmb_regex_matches if len(r) < 10 and len(r) > 4 ] except: vouch_nsc = ["Didn't find any NSCs"] print(f'NSCs: {vouch_nsc}') if len(cmb_regex_matches) > 1: vouch_nsc = nsc_exclude(vouch_nsc) if not vouch_nsc: # if after removing CH formulas and the list becomes empty vouch_nsc = ["Didn't find any NSCs"] vouch_nsc = list(defaultdict.fromkeys(vouch_nsc).keys()) else: if not vouch_nsc: vouch_nsc = ["Didn't find any NSCs"] # filter the nscs by length of greater than 4 vouch_nsc = list(filter(lambda x: len(x) > 4)) return vouch_nsc
def _update_features(id): try: webapp = Webapp.objects.get(pk=id) except Webapp.DoesNotExist: _log(id, u"Webapp does not exist") return # We only detect features on packaged webapps. if not webapp.is_packaged: _log(id, u"Webapp is not a packaged app") return # If the app doesn't have a current_version, don't bother either. if not webapp.current_version: _log(id, u"Webapp does not have a current_version") return # If the app already has a feature profile, don't touch it. if AppFeatures.objects.filter(version=webapp.current_version).exists(): _log(id, u"Webapp already has a feature profile") return version = webapp.current_version res = run_validator(version.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. feature_profile = validation_result["feature_profile"] keys = ["has_%s" % feature.lower() for feature in feature_profile] data = defaultdict.fromkeys(keys, True) AppFeatures.objects.create(version=version, **data)
def export(self, file_format ="json", directory=None, depth_limit = None): if self.task["format"] is False: self.format = file_format else: self.format = task["format"] if self.format not in ["json", "csv", "sql", "mongodb", "mongo"]: sys.exit("Wrong export format") if depth_limit is not None: completed_depth = depth_limit else: try: completed_depth = int(self.current_level() - 1) except IndexError: completed_depth = 2 if directory is None: directory = os.path.join(RESULT_PATH, self.project) results_fields = defaultdict.fromkeys([u'cited_domains', u'extension', u'title', u'url', u'source_url', u'date', u'depth', u'url_id', u'cited_links', u'cited_links_ids',u'crawl_nb'], 1) for n in self.data.find({"status.0":True},{"_id":1}): print(n.findOne({"_id": n["_id"]},results_fields)) break #query_str = '{last_status:true, depth:{$lte:%i}}, {\"_id\": 0, \"last_cited_links_ids\":1, \"last_title\":1, \"last_text\":1, \"last_status\":1, \"last_date\":1, \"depth\":1, \"url\":1, \"url_id\":1}' %completed_depth #query, projection = {"last_status":True,"depth":{"$lte":completed_depth}},{"_id": 0, "last_cited_links_ids":1, "last_title":1, "last_text":1, "last_status":1, "last_date":1, "depth":1, "url":1, "url_id":1} outfile = os.path.join(directory, "results_export"+self.date.strftime("%d%m%Y_%H-%M")+"."+str(format))
def _changed_features(self): old_features = defaultdict.fromkeys(self.initial_features, True) old_features = set(unicode(f) for f in AppFeatures(**old_features).to_list()) new_features = set(unicode(f) for f in self.instance.to_list()) added_features = new_features - old_features removed_features = old_features - new_features return added_features, removed_features
def find_kmers(orientations, k, alphabet=['>', '<']): orientations = np.array(list(orientations)) patterns = defaultdict.fromkeys( list(map(lambda x: "".join(x), itertools.product(alphabet, repeat=k))), 0) for i in range(orientations.shape[0] - k + 1): current_window = "".join(orientations[i: i + k]) patterns[current_window] = patterns[current_window] + 1 return dict(patterns)
def __init__(self, input_data: dict, batch_sizes: list): self.input_data = input_data self.sizes_map = {} for name, tensors in input_data.items(): self.sizes_map[name] = len(tensors) self.index_map = defaultdict.fromkeys(input_data.keys(), 0) self.batch_sizes = batch_sizes self.size = len(batch_sizes) self.current_group_id = 0
def _changed_features(self): old_features = defaultdict.fromkeys(self.initial_features, True) old_features = set( unicode(f) for f in AppFeatures(**old_features).to_list()) new_features = set(unicode(f) for f in self.instance.to_list()) added_features = new_features - old_features removed_features = old_features - new_features return added_features, removed_features
def gmnb_ngram_factors(real_strings, nonsense_strings, n, theta=0): # This actually stores the logarithm of the probabilities, because that's # the quantity used when applying the Bayes formula and it's more efficient # to precompute the log than to have to compute it over and over again. occurrences_real = defaultdict(int) occurrences_nonsense = defaultdict(int) for string in real_strings: for ngram in ngrams(string.lower(), n): occurrences_real[ngram] += 1 for string in nonsense_strings: for ngram in ngrams(string.lower(), n): occurrences_nonsense[ngram] += 1 real_sum = sum(occurrences_real.values()) nonsense_sum = sum(occurrences_nonsense.values()) all_ngrams = all_possible_ngrams(n) num_ngrams = len(all_ngrams) num_real_strings = len(real_strings) num_nonsense_strings = len(nonsense_strings) # In this generalized version of tf-idf, if the tf for a given document # is 0, then the whole term is 0. So our default is simple: # weights = defaultdict.fromkeys(all_ngrams, NGramFactor(real_term=0, nonsense_term=0)) missing_real = log(relative_frequency(0, real_sum, num_ngrams, 1)) missing_nonsense = log(relative_frequency(0, nonsense_sum, num_ngrams, 1)) weights = defaultdict.fromkeys(all_ngrams, NGramFactor(real_term=missing_real, nonsense_term=missing_nonsense)) # We store everything that is constant for a given n-gram. # theta_over_N = theta/num_ngrams # for ngram in all_ngrams: # real_term = missing_real # nonsense_term = missing_nonsense # if occurrences_real[ngram] > 0: # idf = log(num_real_strings/occurrences_real[ngram]) # real = relative_frequency(occurrences_real[ngram], real_sum, num_ngrams) # real_term = idf*log((1 - theta)*real + theta_over_N) # if occurrences_nonsense[ngram] > 0: # idf = log(num_nonsense_strings/occurrences_nonsense[ngram]) # nonsense = relative_frequency(occurrences_nonsense[ngram], nonsense_sum, num_ngrams) # nonsense_term = idf*log((1 - theta)*nonsense + theta_over_N) # weights[ngram] = NGramFactor(real_term=real_term, nonsense_term=nonsense_term) theta_over_N = theta/num_ngrams for ngram in all_ngrams: real = relative_frequency(occurrences_real[ngram], real_sum, num_ngrams) real_term = log((1 - theta)*real + theta_over_N) nonsense = relative_frequency(occurrences_nonsense[ngram], nonsense_sum, num_ngrams) nonsense_term = log((1 - theta)*nonsense + theta_over_N) weights[ngram] = NGramFactor(real_term=real_term, nonsense_term=nonsense_term) return weights
def largest_connected_component(self): visited = defaultdict.fromkeys(self.adj.keys(), False) largest = 1 for key in visited: print(visited) if not visited[key]: size = self.dfs(key, visited) if size > largest: largest = size return largest
def create_word_indices(self): self.n_words = len( self.ordered_data ) # number of total number of words in corpus combinint all documents self.vocab = list(set(self.ordered_data)) # set of all unique words. self.len_vocab = len(self.vocab) word_ind = defaultdict.fromkeys(self.vocab, 0) for i in range(self.len_vocab): word_ind[self.vocab[i]] = i + 1 self.word_indices = np.zeros(self.n_words, dtype=int) for i in range(self.n_words): self.word_indices[i] = word_ind[self.ordered_data[i]]
def convert_to_defaultdict(x): if isinstance(x, list) or isinstance(x, np.ndarray): k = defaultdict(int, izip(xrange(len(x)), x)) elif isinstance(x, dict): k = defaultdict(int, x) elif isinstance(x, set): k = defaultdict.fromkeys(x, 1) k.default_factory = int else: raise ValueError("Invalid param type %s for jaccard similarity" % type(x)) return k
def gen_requests(max_requests, batch_size, f): client_key_pub = ecdsa_sig.get_asymm_key(CLIENT_ID - 1, ktype='verify') client_key_pem = ecdsa_sig.get_asymm_key(CLIENT_ID - 1, ktype='sign') keys_to_seq_tracker = defaultdict.fromkeys(range(N), 0) for i in range(max_requests): # print("for request: [%s]" % i) r = random.randint(1, max_requests) r2 = random.randint(max_requests / 2, max_requests) # amount = random.randint(0,100) amount = 50 msg = "TRAN%s%s" % ( str(amount).zfill(4), str(r2).zfill(4), #.decode('latin1') ) msg = bytes(msg, encoding="utf-8") + client_key_pub.to_string() if i == 0: print(len(client_key_pub.to_string()), "ECDSA") _id = r % N # print("current ID: ", _id) current_key = ecdsa_sig.get_asymm_key(_id, ktype='sign') # import pdb; pdb.set_trace() seq = keys_to_seq_tracker[_id] view = 0 # TODO: do we even need view in request from client?? req = message.add_sig(current_key, _id, seq, view, "REQU", msg, i) keys_to_seq_tracker[ _id] += 1 # Increase seq count since same node might send another request req.sig = client_key_pem.to_string() msg = req.SerializeToString() if i == 0: print("one request:", len(msg)) ################################ #padding = "0123456789" * 10 #msg += padding * 1024 #msg += "x" * (400 - len(msg)) msg += msg * (batch_size - 1) ################################ temp = message.add_sig(current_key, _id, 0, 0, "REQU", msg, i + offset) size = temp.ByteSize() if i == 0: print("inner message:", len(msg)) print("message byte size:", size) b = struct.pack("!I", size) f.write(b + temp.SerializeToString()) # if i % batch_size == 0: s = "Generation Progress: {:.1%}".format(i / max_requests) print(s, end='') backspace(len(s)) # time.sleep(0.02) # see the counter print("keys_to_seq_tracker: ", keys_to_seq_tracker)
def iter_params(data, lam): thresh = 0.00000001 alpha = 4.0 u_init = field_rating_average(data, 'reviewerID') - alpha i_init = field_rating_average(data, 'itemID') - alpha user_list = [review['reviewerID'] for review in data] item_list = [review['itemID'] for review in data] b_u = defaultdict.fromkeys(user_list, u_init) b_i = defaultdict.fromkeys(item_list, i_init) b_u.default_factory = float b_i.default_factory = float u_dict, i_dict = build_ui_dicts(data) while True: #print alpha #alpha_old, b_u_old, b_i_old = copy(alpha), deepcopy(b_u), deepcopy(b_i) alpha_old = copy(alpha) alpha = update_alpha(data, alpha, b_u, b_i) b_u = update_bu(u_dict, alpha, b_u, b_i, lam) b_i = update_bi(i_dict, alpha, b_u, b_i, lam) if abs(alpha_old - alpha) < thresh: break return alpha, b_u, b_i, user_list, item_list
def mnb_ngram_weights(real_strings, nonsense_strings, n, smoothing=1): # This is almost exactly the algorithm given in Figure 13.2 of the book # "Introduction to information retrieval" by Manning, C. D., Raghavan, # P., & Schütze, H. (2009, Online edition ed., Cambridge University # Press). The differences are: # # 1) This ignores the probability of the priors, P(c). In our training # process, we have almost exactly balanced sets of real and nonsense # strings, which means the value of (count of docs in class)/(count # of docs in training set) is equal for both classes, and thus does not # change the results of the maximum a posteriori analysis later. # # 2) The normal approach to Naive Bayes (and what Manning et al. do) # would be to store the score for an n-gram rather than the logarithm # of the score. This code actually stores the logarithm of the # probabilities, because that's the quantity used when applying the # Bayes formula and it's more efficient to precompute the log than to # have to compute it over and over again in mnb_score_function() occurrences_real = defaultdict(int) occurrences_nonsense = defaultdict(int) for string in real_strings: for ngram in ngrams(string.lower(), n): occurrences_real[ngram] += 1 for string in nonsense_strings: for ngram in ngrams(string.lower(), n): occurrences_nonsense[ngram] += 1 real_sum = sum(occurrences_real.values()) nonsense_sum = sum(occurrences_nonsense.values()) all_ngrams = all_possible_ngrams(n) num_ngrams = len(all_ngrams) # Initialize all n-gram values to the value that would come from zero # occurrences of an n-gram in a given training set. missing_real = log(relative_frequency(0, real_sum, num_ngrams, smoothing)) missing_nonsense = log(relative_frequency(0, nonsense_sum, num_ngrams, smoothing)) weights = defaultdict.fromkeys(all_ngrams, NGramWeight(log_real=missing_real, log_nonsense=missing_nonsense, ts_real=False, ts_nonsense=False)) for ngram in all_ngrams: real = log(relative_frequency(occurrences_real[ngram], real_sum, num_ngrams, smoothing)) nonsense = log(relative_frequency(occurrences_nonsense[ngram], nonsense_sum, num_ngrams, smoothing)) weights[ngram] = NGramWeight(log_real=real, log_nonsense=nonsense, ts_real=bool(occurrences_real[ngram]), ts_nonsense=bool(occurrences_nonsense[ngram])) return weights
def rtn_possible_wp(pdf_pt): ''' returns possible word pairs that the model deeemed as a taxonomy name; and the filtered text list with part of speech tagging ''' possible_list = [] for wp in filter_text_pos(pdf_pt): res = model.predict([wp]) print(wp, res) if res > 0.4: possible_list.append(wp) if possible_list: return list(defaultdict.fromkeys(possible_list).keys()) return ["Sorry, failed to find taxonomy-like names in the document"]
def rtn_possible_wp(vc, pdf_pt): ''' returns possible word pairs that the classifier deeemed as a taxonomy name; it is not that smart; requires a VoteClassifer object, and the filtered text list with part of speech tagging ''' possible_list = [] for wp in filter_text_pos(pdf_pt): res = vc.classify(wp) if res: possible_list.append(wp) if possible_list: return list(defaultdict.fromkeys(possible_list).keys()) return ["Sorry, failed to find taxonomy-like names in the document"]
def filter_top_electeurs(votes, SEUIL): from collections import Counter, defaultdict electeurs_d = defaultdict.fromkeys([v["electeur"] for v in votes], []) for vote in votes: electeurs_d[vote["electeur"]].append(v) top_users = [] f = Counter([data["electeur"] for data in votes]) for n,cpt in f.items(): if cpt < SEUIL: del electeurs_d[n] #for k, v in electeurs_d.items(): # top_users.append(v) print len(electeurs_d), "electeurs uniques ayant voté au moins %i fois" %SEUIL return electeurs_d
def validate_agent_per_data_game(agent: IAgent, dg: DataGame, min_tricks: int=0)\ -> Tuple[np.ndarray, np.ndarray]: """ Validate a agent by comparing its performances to data. :param agent: IAgent to check vs the data :param dg: DataGame object :param min_tricks: minimum tricks index to start validation from :return: tuple of 2 arrays for experiences and succeeds. each element in each array represents the number of played tricks (== 13 - (#card in hand)) """ all_hands, all_tricks, chosen_cards = dg.all_relevant_snapshots() tricks_num = len(all_hands) // 2 checks, succeeds = np.zeros(12), np.zeros(12) for pos_idx, position in enumerate(dg.winner): for trick_idx in range(min_tricks, tricks_num): curr_hands = all_hands[pos_idx * tricks_num + trick_idx] curr_trick = all_tricks[pos_idx * tricks_num + trick_idx] chosen_card = chosen_cards[pos_idx * tricks_num + trick_idx] # Create teams, such that first team is the winner teams = [ Team(curr_hands[0], curr_hands[2]), Team(curr_hands[1], curr_hands[3]) ] if curr_hands[0].position not in dg.winner: teams[0], teams[1] = teams[1], teams[0] curr_state = State(trick=curr_trick, teams=teams, players=curr_hands, prev_tricks=dg.tricks[:trick_idx], score=defaultdict.fromkeys(teams, 0), curr_player=curr_hands[position.value - 1]) sg = SimulatedGame( agent=agent, other_agent=SimpleAgent('soft_long_greedy_action'), verbose_mode=False, state=curr_state) validation = 'simple' if isinstance(agent, SimpleAgent) else '' played_card = sg.play_single_move(validation=validation) if played_card == chosen_card: succeeds[trick_idx] += 1 checks[trick_idx] += 1 return checks, succeeds
def __init__(self, filename, image_column_name, service_request_column_name, parent_dir): """ Initialize variables and create a dictionary containing categories. :param filename: Name of the file that will be used for crawling. :param image_column_name: Column name of the field that has URLs of images. :param service_request_column_name: Column name of the Service Request Number. :param parent_dir: Name of the parent directory where images for each category will be downloaded. """ self.logger = logging.getLogger(__name__) self.parent_dir = parent_dir self.image_column_name = image_column_name self.service_request_column_name = service_request_column_name self.xls = pd.ExcelFile(filename) self.fifi_dict = defaultdict.fromkeys( self.xls.sheet_names) # Excel file sheet names map to categories
def compute_pagerank(urls, inlinks, outlinks, b=.85, iters=20): """ Return a dictionary mapping each url to its PageRank. The formula is R(u) = (1/N)(1-b) + b * (sum_{w in B_u} R(w) / (|F_w|) Initialize all scores to 1.0. Params: urls.......SortedList of urls (names) inlinks....SortedDict mapping url to list of in links (backlinks) outlinks...Sorteddict mapping url to list of outlinks Returns: A SortedDict mapping url to its final PageRank value (float) >>> urls = SortedList(['a', 'b', 'c']) >>> inlinks = SortedDict({'a': ['c'], 'b': set(['a']), 'c': set(['a', 'b'])}) >>> outlinks = SortedDict({'a': ['b', 'c'], 'b': set(['c']), 'c': set(['a'])}) >>> sorted(compute_pagerank(urls, inlinks, outlinks, b=.5, iters=0).items()) [('a', 1.0), ('b', 1.0), ('c', 1.0)] >>> iter1 = compute_pagerank(urls, inlinks, outlinks, b=.5, iters=1) >>> iter1['a'] # doctest:+ELLIPSIS 0.6666... >>> iter1['b'] # doctest:+ELLIPSIS 0.333... """ ###TODO pass pagerank = defaultdict.fromkeys(urls, 1.0) # print(pagerank) for iter in range(iters): for url in urls: sum_temp = 0.0 for cs in inlinks[url]: length = len(outlinks[cs]) sum_temp += pagerank[cs] / length pagerank[url] = (1.0 - b) * (1.0 / len(urls)) + (b * sum_temp) return pagerank
def bnb_ngram_weights(real_strings, nonsense_strings, n): # Count the number of strings in which each n-gram occurs. Note this is # not the same as all occurrences of the n-gram, which would entail # counting cases when an n-gram appears more than once in a string.) occurrences_real = defaultdict(set) occurrences_nonsense = defaultdict(set) for string in real_strings: for ngram in ngrams(string.lower(), n): # Using a set so that if the n-gram appears more than once in a # given string, we only count it once. occurrences_real[ngram].update(string) for string in nonsense_strings: for ngram in ngrams(string.lower(), n): occurrences_nonsense[ngram].update(string) # Initialize all n-gram values to the value that would come from zero # occurrences of an n-gram in a given training set. missing_real = 1/(len(real_strings) + 2) missing_nonsense = 1/(len(nonsense_strings) + 2) all_ngrams = all_possible_ngrams(n) weights = defaultdict.fromkeys(all_ngrams, NGramWeight(found_in_ts=False, log_real=log(missing_real), log_nonsense=log(missing_nonsense), log_one_minus_real=log(1 - log(missing_real)), log_one_minus_nonsense=log(1 - missing_nonsense))) num_real_strings = len(real_strings) num_nonsense_strings = len(nonsense_strings) for ngram in all_ngrams: num_occurrences_real = len(occurrences_real[ngram]) num_occurrences_nonsense = len(occurrences_nonsense[ngram]) real = (num_occurrences_real + 1)/(num_real_strings + 2) nonsense = (num_occurrences_nonsense + 1)/(num_nonsense_strings + 2) found = (num_occurrences_real + num_occurrences_nonsense) > 0 weights[ngram] = NGramWeight(found_in_ts=found, log_real=log(real), log_nonsense=log(nonsense), log_one_minus_real=log(1 - real), log_one_minus_nonsense=log(1 - nonsense)) return weights
def choose_attribute(attributes, examples, labels, attribute_subset): # compute total ca_prob_dict = {} attributes_filtered = [attribute for attribute in attributes if random.random() < attribute_subset] #print attributes_filtered for attribute in attributes_filtered: ca_prob_dict[attribute] = (0, 0) ca_total = len(examples.keys()) ca_total1 = 0 ca_total2 = 0 # compute number of 1s and 2s in sample for example_key in examples.keys(): if labels[example_key - 1] == '1': ca_total1 += 1 elif labels[example_key - 1] == '2': ca_total2 += 1 # compute number of TFs with each word for attribute in attributes_filtered: for key, value in examples.iteritems(): if labels[key - 1] == '1' and attribute in value: ca_prob_dict[attribute] = (ca_prob_dict[attribute][0] + 1, ca_prob_dict[attribute][1]) elif labels[key - 1] == '2' and attribute in value: ca_prob_dict[attribute] = (ca_prob_dict[attribute][0], ca_prob_dict[attribute][1] + 1) # compute information gain ca_ig_dict = defaultdict.fromkeys(attributes_filtered) for key, value in ca_prob_dict.iteritems(): ca_ig_dict[key] = ig(value[0], value[1], ca_total1 - value[0], ca_total2 - value[1], ca_total) # print(value[0], value[1], ca_total1 - value[0], ca_total2 - value[1]) gnf_max_item = max(ca_ig_dict.iteritems(), key=operator.itemgetter(1)) # print(words[gnf_max_item[0] - 1] , gnf_max_item) return gnf_max_item[0], ca_prob_dict[gnf_max_item[0]]
def status(request, addon_id, addon): appeal_form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) publish_form = forms.PublishForm( request.POST if 'publish-app' in request.POST else None, addon=addon) if request.method == 'POST': if 'resubmit-app' in request.POST and appeal_form.is_valid(): if not addon.is_rated(): # Cannot resubmit without content ratings. return http.HttpResponseForbidden( 'This app must obtain content ratings before being ' 'resubmitted.') appeal_form.save() create_comm_note(addon, addon.latest_version, request.user, appeal_form.data['notes'], note_type=comm.RESUBMISSION) if addon.vip_app: handle_vip(addon, addon.latest_version, request.user) messages.success(request, _('App successfully resubmitted.')) return redirect(addon.get_dev_url('versions')) elif 'upload-version' in request.POST and upload_form.is_valid(): upload = upload_form.cleaned_data['upload'] ver = Version.from_upload(upload, addon) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Escalate the version if it uses prerelease permissions. escalate_prerelease_permissions(addon, validation_result, ver) # Set all detected features as True and save them. keys = ['has_%s' % feature.lower() for feature in validation_result['feature_profile']] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = (set((mkt.DEVICE_GAIA,)), set((mkt.DEVICE_MOBILE,)), set((mkt.DEVICE_GAIA, mkt.DEVICE_MOBILE,))) mobile_only = (addon.latest_version and addon.latest_version.features.has_qhd) if set(addon.device_types) in qhd_devices or mobile_only: data['has_qhd'] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _('New version successfully added.')) log.info('[Webapp:%s] New version created id=%s from upload: %s' % (addon, ver.pk, upload)) if addon.vip_app: handle_vip(addon, ver, request.user) return redirect(addon.get_dev_url('versions.edit', args=[ver.pk])) elif 'publish-app' in request.POST and publish_form.is_valid(): publish_form.save() return redirect(addon.get_dev_url('versions')) ctx = { 'addon': addon, 'appeal_form': appeal_form, 'is_tarako': addon.tags.filter(tag_text=QUEUE_TARAKO).exists(), 'tarako_review': addon.additionalreview_set .latest_for_queue(QUEUE_TARAKO), 'publish_form': publish_form, 'QUEUE_TARAKO': QUEUE_TARAKO, 'upload_form': upload_form, } # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values('id', 'version') version_strings = dict((v['id'], v) for v in versions) version_strings['num'] = len(versions) ctx['version_strings'] = json.dumps(version_strings) if addon.status == mkt.STATUS_REJECTED: try: entry = (AppLog.objects .filter(addon=addon, activity_log__action=mkt.LOG.REJECT_VERSION.id) .order_by('-created'))[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx['rejection'] = entry and entry.activity_log if waffle.switch_is_active('preload-apps'): test_plan = PreloadTestPlan.objects.filter( addon=addon, status=mkt.STATUS_PUBLIC) if test_plan.exists(): test_plan = test_plan[0] if (test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST): ctx['outdated_test_plan'] = True ctx['next_step_suffix'] = 'submit' else: ctx['next_step_suffix'] = 'home' ctx['test_plan'] = test_plan return render(request, 'developers/apps/status.html', ctx)
def status(request, addon_id, addon, webapp=False): form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) if request.method == 'POST': if 'resubmit-app' in request.POST and form.is_valid(): form.save() create_comm_note(addon, addon.current_version, request.amo_user, form.data['notes'], note_type=comm.RESUBMISSION) messages.success(request, _('App successfully resubmitted.')) return redirect(addon.get_dev_url('versions')) elif 'upload-version' in request.POST and upload_form.is_valid(): mobile_only = (addon.latest_version and addon.latest_version.features.has_qhd) ver = Version.from_upload(upload_form.cleaned_data['upload'], addon, [amo.PLATFORM_ALL]) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. keys = [ 'has_%s' % feature.lower() for feature in validation_result['feature_profile'] ] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = (set((amo.DEVICE_GAIA, )), set( (amo.DEVICE_MOBILE, )), set(( amo.DEVICE_GAIA, amo.DEVICE_MOBILE, ))) if set(addon.device_types) in qhd_devices or mobile_only: data['has_qhd'] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _('New version successfully added.')) log.info('[Webapp:%s] New version created id=%s from upload: %s' % (addon, ver.pk, upload_form.cleaned_data['upload'])) return redirect(addon.get_dev_url('versions.edit', args=[ver.pk])) ctx = { 'addon': addon, 'webapp': webapp, 'form': form, 'upload_form': upload_form } # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values('id', 'version') version_strings = dict((v['id'], v) for v in versions) version_strings['num'] = len(versions) ctx['version_strings'] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = (AppLog.objects.filter( addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by( '-created'))[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx['rejection'] = entry and entry.activity_log if waffle.switch_is_active('preload-apps'): test_plan = PreloadTestPlan.objects.filter(addon=addon, status=amo.STATUS_PUBLIC) if test_plan.exists(): test_plan = test_plan[0] if (test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST): ctx['outdated_test_plan'] = True ctx['next_step_suffix'] = 'submit' else: ctx['next_step_suffix'] = 'home' ctx['test_plan'] = test_plan return jingo.render(request, 'developers/apps/status.html', ctx)
crap2.incrap2() import testdata.crap2 tc2c2 = testdata.crap2.crap2() #int/double crap def to_ints(l): return [int(x) for x in l] print to_ints([4.0, 4.0, 61]), to_ints((4.0, 4.0, 61)) print int(min(4.0, 4.0, 2)) print int(max(4.0, 4.0, 6)) print int(min(4.0, 4.0, 4.0, 2)) print int(max(4.0, 4.0, 4, 0, 6)) l = [6] l.append(1.0) print to_ints(l) #assorted fixes [1] != [] from collections import defaultdict print sorted(defaultdict.fromkeys(range(7,10), 'a').items()) import collections print sorted(collections.defaultdict.fromkeys(range(7,10), 'a').items()) from string import * class string: pass string.x = 4
def status(request, addon_id, addon, webapp=False): form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) if request.method == 'POST': if 'resubmit-app' in request.POST and form.is_valid(): form.save() messages.success(request, _('App successfully resubmitted.')) return redirect(addon.get_dev_url('versions')) elif 'upload-version' in request.POST and upload_form.is_valid(): mobile_only = (addon.latest_version and addon.latest_version.features.has_qhd) ver = Version.from_upload(upload_form.cleaned_data['upload'], addon, [amo.PLATFORM_ALL]) res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. keys = ['has_%s' % feature.lower() for feature in validation_result['feature_profile']] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = (set((amo.DEVICE_GAIA,)), set((amo.DEVICE_MOBILE,)), set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE,))) if set(addon.device_types) in qhd_devices or mobile_only: data['has_qhd'] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _('New version successfully added.')) log.info('[Webapp:%s] New version created id=%s from upload: %s' % (addon, ver.pk, upload_form.cleaned_data['upload'])) return redirect(addon.get_dev_url('versions.edit', args=[ver.pk])) ctx = {'addon': addon, 'webapp': webapp, 'form': form, 'upload_form': upload_form} # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values('id', 'version') version_strings = dict((v['id'], v) for v in versions) version_strings['num'] = len(versions) ctx['version_strings'] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = (AppLog.objects .filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id) .order_by('-created'))[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx['rejection'] = entry and entry.activity_log return jingo.render(request, 'developers/apps/status.html', ctx)
"slug": article9["article_link"].split("/")[-1], "title": article9["article_link"].split("/")[-1].replace("-", " "), "text": article9['body'], "author": article9["author"], "votes":[], "arguments":article9["arguments"], "votes_arguments": [], "sources":article9["sources"], "votes_sources": [], } ## Les versions alternatives versions = article9["versions"] versions_c = [] # auteurs_d = defaultdict.fromkeys([v["author"] for v in versions], []) versions_d = defaultdict.fromkeys([v["slug"] for v in versions], {}) for v in versions: vn = {"date":v["created_at"], "link":v["link"], "slug":v["slug"], "title":v["title"], "text": v['comment'], "author": v["author"], "votes":[], "arguments":[], "votes_arguments": [], "sources":[], "votes_sources": []} auteurs_d[v["author"]].append(vn)
import itertools #load the files training_file = 'C:/Users/kPasad/Box Sync/ML/Projects/africanSoilPred/data/training.csv' test_file = 'C:/Users/kPasad/Box Sync/ML/Projects/africanSoilPred/data/sorted_test.csv' feat_imp = pk.load(open('C:/Users/kpasad/Box Sync/ML/Projects/africanSoilPred/data/feat_imp.pk','r')) df_train = pd.read_csv(training_file,tupleize_cols =True) df_test = pd.read_csv(test_file) train_dims = df_train.shape algos = ['bayesianRidge,adaBoost','decisionTree','gradBoost','extraTree','linear'] targets = ['Ca','P','pH','SOC','Sand'] #All static data structures here clf = defaultdict.fromkeys(algos) top_preds=defaultdict.fromkeys(targets) master_obj = master() #All parameters go here. derivative_filt = 'disable' feat_list = 'all' cv_factor = 0.7 num_cv_folds =20 learner_id=0 #Feature massage train_cols_to_remove = ['PIDN']+targets x_train=df_train.drop(train_cols_to_remove,axis=1) #Remove the training sample ID and the targets.
def matchingStrings(strings, queries): op = defaultdict.fromkeys(queries, 0) for _ in strings: if _ in queries: op[_] = op.get(_) + 1 return op
def status(request, addon_id, addon, webapp=False): form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) if request.method == "POST": if "resubmit-app" in request.POST and form.is_valid(): form.save() perms = ("reviewer", "senior_reviewer", "staff") create_comm_thread( action="resubmit", addon=addon, comments=form.data["notes"], profile=request.amo_user, version=addon.current_version, perms=perms, ) messages.success(request, _("App successfully resubmitted.")) return redirect(addon.get_dev_url("versions")) elif "upload-version" in request.POST and upload_form.is_valid(): mobile_only = addon.latest_version and addon.latest_version.features.has_qhd ver = Version.from_upload(upload_form.cleaned_data["upload"], addon, [amo.PLATFORM_ALL]) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. keys = ["has_%s" % feature.lower() for feature in validation_result["feature_profile"]] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = ( set((amo.DEVICE_GAIA,)), set((amo.DEVICE_MOBILE,)), set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE)), ) if set(addon.device_types) in qhd_devices or mobile_only: data["has_qhd"] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _("New version successfully added.")) log.info( "[Webapp:%s] New version created id=%s from upload: %s" % (addon, ver.pk, upload_form.cleaned_data["upload"]) ) return redirect(addon.get_dev_url("versions.edit", args=[ver.pk])) ctx = {"addon": addon, "webapp": webapp, "form": form, "upload_form": upload_form} # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values("id", "version") version_strings = dict((v["id"], v) for v in versions) version_strings["num"] = len(versions) ctx["version_strings"] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = ( AppLog.objects.filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by("-created") )[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx["rejection"] = entry and entry.activity_log return jingo.render(request, "developers/apps/status.html", ctx)
# Software Foundation, either version 3 of the License, or (at your option) # any later version. # # biodoop-core is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # biodoop-core. If not, see <http://www.gnu.org/licenses/>. # # END_COPYRIGHT """ Gender encoding. """ from collections import defaultdict UNKNOWN = 0 MALE = 1 FEMALE = 2 MALE_LABELS = ["MALE", "Male", "male", "M", "m", "1"] FEMALE_LABELS = ["FEMALE", "Female", "female", "F", "f", "2"] MAP = defaultdict.fromkeys(MALE_LABELS, MALE) MAP.update(dict.fromkeys(FEMALE_LABELS, FEMALE)) # anything that's not either male or female is unknown MAP.default_factory = int
def ultimate_evaluate(model, extractor_name): genres = ['action', 'horror', 'romance'] testingData = [] testingLabels = [] total = defaultdict.fromkeys(range(len(genres)), 0) correct = defaultdict.fromkeys(range(len(genres)), 0) yTrue, yPredict = [], [] for genreIndex, genre in enumerate(genres): # print "Looking for pickle file: data/{0}{1}.p".format(genre, str(num_of_videos)), try: genreFeatures = load_pkl("test/" + genre + "_test_" + extractor_name) genreFeatures = np.array([np.array(f) for f in genreFeatures]) # numpy hack except Exception as e: print(e) return print("OK.") for videoFeatures in genreFeatures: """to get all frames from a video -- hacky""" total[genreIndex] += 1 d = defaultdict(int) predictedClasses = model.predict_classes( videoFeatures) #List of predictions, per-frame print(predictedClasses) for i in predictedClasses: d[i] += 1 predictedGenre = max(d.items(), key=lambda x: x[1])[0] yPredict.append(predictedGenre) yTrue.append(genreIndex) if predictedGenre == genreIndex: correct[genreIndex] += 1 print(correct, total) confusionMatrix = confusion_matrix(yTrue, yPredict) print(confusionMatrix) ''' tp_action = confusionMatrix[0][0] tp_horror = confusionMatrix[1][1] tp_romance = confusionMatrix[2][2] fp_action = confusionMatrix[1][0] + confusionMatrix[2][0] fp_horror = confusionMatrix[0][1] + confusionMatrix[2][1] fp_romance = confusionMatrix[0][2] + confusionMatrix[1][2] fn_action = confusionMatrix[0][1] + confusionMatrix[0][2] fn_horror = confusionMatrix[1][0] + confusionMatrix[1][2] fn_romance = confusionMatrix[2][0] + confusionMatrix[2][1] tn_action = confusionMatrix[1][1] + confusionMatrix[1][2] + confusionMatrix[2][1] + confusionMatrix[2][2] tn_horror = confusionMatrix[0][0] + confusionMatrix[0][2] + confusionMatrix[2][0] + confusionMatrix[2][2] tn_romance = confusionMatrix[0][0] + confusionMatrix[0][1] + confusionMatrix[1][0] + confusionMatrix[1][1] prec_action = tp_action/(fp_action+tp_action) prec_horror = tp_horror/(fp_horror+tp_horror) prec_romance = tp_romance/(fp_romance+tp_romance) rec_action = tp_action/(fn_action+tp_action) rec_horror = tp_horror/(fn_horror+tp_horror) rec_romance = tp_romance/(fn_romance+tp_romance) ''' total_acc = 0 for i in range(len(genres)): tp = confusionMatrix[i][i] fp = 0 for j in range(len(genres)): if (i != j): fp = fp + confusionMatrix[j][i] fn = 0 for j in range(len(genres)): if (i != j): fn = fn + confusionMatrix[i][j] tn = 0 for j in range(len(genres)): for k in range(len(genres)): if (i != j): if (i != k): tn = tn + confusionMatrix[j][k] prec = tp / (tp + fp) * 100 rec = tp / (tp + fn) * 100 f1 = (prec + rec) / 2 acc = (tp + tn) / (tp + fp + fn + tn) * 100 print("Precision of " + genres[i] + " is " + str(round(prec, 2)) + "%\n") print("Recall of " + genres[i] + " is " + str(round(rec, 2)) + "%\n") print("F1 of " + genres[i] + " is " + str(round(f1, 2)) + "%\n") print("Accuracy of " + genres[i] + " is " + str(round(acc, 2)) + "%\n") print("---------------") total_acc = total_acc + acc total_acc = total_acc / 3 print("Overall Accuracy is " + str(round(total_acc, 2)) + "%\n")
data = movie.map(lambda x : (int(x[0]),int(x[1]))).groupByKey().sortByKey().map(lambda x : (int(x[0]), list(x[1])))\ .filter(lambda x : (len(x[1])>=9)) user_movies = data.collectAsMap() nodes = movie.map(lambda x : (int(x[0]),1)).distinct().sortByKey().map(lambda x : x[0]).collect() print "time taken nodes is ", time.time() - START_TIME edges= createGraph(nodes, user_movies) #connected_users=sc.parallelize(edges).groupByKey().sortByKey()\ # .map(lambda x : (int(x[0]),list(x[1]))).collectAsMap() graph= nx.Graph() graph.add_edges_from(edges) bet_dic =defaultdict.fromkeys(graph.edges(), 0.0) new_bet_dic=BFS(bet_dic,list(graph.nodes())) print "Time taken to finish btw", time.time()-START_TIME new_bet_dic.update((k, (float(round(v/0.2,2)/10))) for k, v in new_bet_dic.items()) max_bet= sorted(new_bet_dic,key=new_bet_dic.get, reverse=True) print "Time taken to reach upated btw", time.time()-START_TIME grps= list() m = graph.number_of_edges() # value of m output = open("Shyamala_Sundararajan_Community.txt", 'w') degree= dict() for i in graph.nodes():
def status(request, addon_id, addon, webapp=False): form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) if request.method == 'POST': if 'resubmit-app' in request.POST and form.is_valid(): form.save() perms = ('reviewer', 'senior_reviewer', 'staff') create_comm_thread(action='resubmit', addon=addon, comments=form.data['notes'], profile=request.amo_user, version=addon.current_version, perms=perms) messages.success(request, _('App successfully resubmitted.')) return redirect(addon.get_dev_url('versions')) elif 'upload-version' in request.POST and upload_form.is_valid(): mobile_only = (addon.latest_version and addon.latest_version.features.has_qhd) ver = Version.from_upload(upload_form.cleaned_data['upload'], addon, [amo.PLATFORM_ALL]) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Set all detected features as True and save them. keys = ['has_%s' % feature.lower() for feature in validation_result['feature_profile']] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = (set((amo.DEVICE_GAIA,)), set((amo.DEVICE_MOBILE,)), set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE,))) if set(addon.device_types) in qhd_devices or mobile_only: data['has_qhd'] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _('New version successfully added.')) log.info('[Webapp:%s] New version created id=%s from upload: %s' % (addon, ver.pk, upload_form.cleaned_data['upload'])) return redirect(addon.get_dev_url('versions.edit', args=[ver.pk])) ctx = {'addon': addon, 'webapp': webapp, 'form': form, 'upload_form': upload_form} # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values('id', 'version') version_strings = dict((v['id'], v) for v in versions) version_strings['num'] = len(versions) ctx['version_strings'] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = (AppLog.objects .filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id) .order_by('-created'))[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx['rejection'] = entry and entry.activity_log if waffle.switch_is_active('preload-apps'): test_plan = PreloadTestPlan.objects.filter( addon=addon, status=amo.STATUS_PUBLIC) if test_plan.exists(): test_plan = test_plan[0] if (test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST): ctx['outdated_test_plan'] = True ctx['next_step_suffix'] = 'submit' else: ctx['next_step_suffix'] = 'home' ctx['test_plan'] = test_plan return jingo.render(request, 'developers/apps/status.html', ctx)
def status(request, addon_id, addon): appeal_form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) publish_form = forms.PublishForm( request.POST if 'publish-app' in request.POST else None, addon=addon) if request.method == 'POST': if 'resubmit-app' in request.POST and appeal_form.is_valid(): if not addon.is_rated(): # Cannot resubmit without content ratings. return http.HttpResponseForbidden( 'This app must obtain content ratings before being ' 'resubmitted.') appeal_form.save() create_comm_note(addon, addon.latest_version, request.user, appeal_form.data['notes'], note_type=comm.RESUBMISSION) if addon.vip_app: handle_vip(addon, addon.latest_version, request.user) messages.success(request, _('App successfully resubmitted.')) return redirect(addon.get_dev_url('versions')) elif 'upload-version' in request.POST and upload_form.is_valid(): upload = upload_form.cleaned_data['upload'] ver = Version.from_upload(upload, addon) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Escalate the version if it uses prerelease permissions. escalate_prerelease_permissions(addon, validation_result, ver) # Set all detected features as True and save them. keys = [ 'has_%s' % feature.lower() for feature in validation_result['feature_profile'] ] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = (set((amo.DEVICE_GAIA, )), set( (amo.DEVICE_MOBILE, )), set(( amo.DEVICE_GAIA, amo.DEVICE_MOBILE, ))) mobile_only = (addon.latest_version and addon.latest_version.features.has_qhd) if set(addon.device_types) in qhd_devices or mobile_only: data['has_qhd'] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _('New version successfully added.')) log.info('[Webapp:%s] New version created id=%s from upload: %s' % (addon, ver.pk, upload)) if addon.vip_app: handle_vip(addon, ver, request.user) return redirect(addon.get_dev_url('versions.edit', args=[ver.pk])) elif 'publish-app' in request.POST and publish_form.is_valid(): publish_form.save() return redirect(addon.get_dev_url('versions')) ctx = { 'addon': addon, 'appeal_form': appeal_form, 'is_tarako': addon.tags.filter(tag_text=QUEUE_TARAKO).exists(), 'tarako_review': addon.additionalreview_set.latest_for_queue(QUEUE_TARAKO), 'publish_form': publish_form, 'QUEUE_TARAKO': QUEUE_TARAKO, 'upload_form': upload_form, } # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values('id', 'version') version_strings = dict((v['id'], v) for v in versions) version_strings['num'] = len(versions) ctx['version_strings'] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = (AppLog.objects.filter( addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by( '-created'))[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx['rejection'] = entry and entry.activity_log if waffle.switch_is_active('preload-apps'): test_plan = PreloadTestPlan.objects.filter(addon=addon, status=amo.STATUS_PUBLIC) if test_plan.exists(): test_plan = test_plan[0] if (test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST): ctx['outdated_test_plan'] = True ctx['next_step_suffix'] = 'submit' else: ctx['next_step_suffix'] = 'home' ctx['test_plan'] = test_plan return render(request, 'developers/apps/status.html', ctx)
x_train["Depth"] = x_train["Depth"].apply(lambda depth: 0 if depth == "Subsoil" else 1) x_train[spectra_features] = fltSpectra n_train = df_train.shape[0] cv_factor = 0.7 num_cv_folds = 20 train_sample_idx = range(0, n_train) num_cv_train_samples = int(df_train.shape[0] * cv_factor) train_sample_idx = deque(range(0, n_train)) # pca = RandomizedPCA(n_components=400) feat_imp = np.zeros([num_cv_folds, len(train_feature_list)]) algos = ["bayesianRidge" ",adaBoost", "decisionTree", "gradBoost", "extraTree", "linear", "ridge", "svr", "randForest"] clf = defaultdict.fromkeys(algos) clf["adaBoost"] = ensemble.AdaBoostRegressor() clf["decisionTree"] = DecisionTreeRegressor(random_state=0) clf["gradBoost"] = ensemble.GradientBoostingRegressor(loss="huber", max_depth=2, n_estimators=500) clf["extraTree"] = ensemble.ExtraTreesRegressor(n_estimators=20) clf["linear"] = linear.LinearRegression() clf["bayesianRidge"] = linear.BayesianRidge() clf["ridge"] = linear.Lasso(alpha=0.1) clf["svr"] = SVR(C=1000, kernel="poly", degree=5) clf["randForest"] = ensemble.RandomForestRegressor(n_estimators=10, criterion="mse") algosToTry = ["svr"] feat_imp = pk.load(open("feature_imp_ca1.pk", "r")) mean_imp = mean(feat_imp, axis=0) sortIdx = np.argsort(mean_imp)
def status(request, addon_id, addon): appeal_form = forms.AppAppealForm(request.POST, product=addon) upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request) publish_form = forms.PublishForm(request.POST if "publish-app" in request.POST else None, addon=addon) if request.method == "POST": if "resubmit-app" in request.POST and appeal_form.is_valid(): if not addon.is_rated(): # Cannot resubmit without content ratings. return http.HttpResponseForbidden("This app must obtain content ratings before being " "resubmitted.") appeal_form.save() create_comm_note( addon, addon.latest_version, request.user, appeal_form.data["notes"], note_type=comm.RESUBMISSION ) if addon.vip_app: handle_vip(addon, addon.latest_version, request.user) messages.success(request, _("App successfully resubmitted.")) return redirect(addon.get_dev_url("versions")) elif "upload-version" in request.POST and upload_form.is_valid(): upload = upload_form.cleaned_data["upload"] ver = Version.from_upload(upload, addon) # Update addon status now that the new version was saved. addon.update_status() res = run_validator(ver.all_files[0].file_path) validation_result = json.loads(res) # Escalate the version if it uses prerelease permissions. escalate_prerelease_permissions(addon, validation_result, ver) # Set all detected features as True and save them. keys = ["has_%s" % feature.lower() for feature in validation_result["feature_profile"]] data = defaultdict.fromkeys(keys, True) # Set "Smartphone-Sized Displays" if it's a mobile-only app. qhd_devices = ( set((amo.DEVICE_GAIA,)), set((amo.DEVICE_MOBILE,)), set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE)), ) mobile_only = addon.latest_version and addon.latest_version.features.has_qhd if set(addon.device_types) in qhd_devices or mobile_only: data["has_qhd"] = True # Update feature profile for this version. ver.features.update(**data) messages.success(request, _("New version successfully added.")) log.info("[Webapp:%s] New version created id=%s from upload: %s" % (addon, ver.pk, upload)) if addon.vip_app: handle_vip(addon, ver, request.user) return redirect(addon.get_dev_url("versions.edit", args=[ver.pk])) elif "publish-app" in request.POST and publish_form.is_valid(): publish_form.save() return redirect(addon.get_dev_url("versions")) ctx = { "addon": addon, "appeal_form": appeal_form, "is_tarako": addon.tags.filter(tag_text=QUEUE_TARAKO).exists(), "tarako_review": addon.additionalreview_set.latest_for_queue(QUEUE_TARAKO), "publish_form": publish_form, "QUEUE_TARAKO": QUEUE_TARAKO, "upload_form": upload_form, } # Used in the delete version modal. if addon.is_packaged: versions = addon.versions.values("id", "version") version_strings = dict((v["id"], v) for v in versions) version_strings["num"] = len(versions) ctx["version_strings"] = json.dumps(version_strings) if addon.status == amo.STATUS_REJECTED: try: entry = ( AppLog.objects.filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by("-created") )[0] except IndexError: entry = None # This contains the rejection reason and timestamp. ctx["rejection"] = entry and entry.activity_log if waffle.switch_is_active("preload-apps"): test_plan = PreloadTestPlan.objects.filter(addon=addon, status=amo.STATUS_PUBLIC) if test_plan.exists(): test_plan = test_plan[0] if test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST: ctx["outdated_test_plan"] = True ctx["next_step_suffix"] = "submit" else: ctx["next_step_suffix"] = "home" ctx["test_plan"] = test_plan return render(request, "developers/apps/status.html", ctx)
import numpy as np import pandas as pd import matplotlib.pyplot as plt import joblib from wifi_scan_bssid import get_wifis from collections import defaultdict model = joblib.load('../ml_data/model_randomforest.plk') wifi_df = joblib.load('../ml_data/input_dataframe.plk') while True: wifis = get_wifis() wifi_dict = defaultdict.fromkeys(wifi_df.columns, 0) for wifi in wifis: if wifi['bssid'] in wifi_dict: wifi_dict[wifi['bssid']] = int(wifi['signal'][:-1]) wifi_df = wifi_df.append(pd.DataFrame.from_dict([wifi_dict])) print('current location : ' + str(model.predict(wifi_df.tail(1))[0]))