def main(): total = 0 for name in mongo.db.list_collection_names(): if not name.startswith("passwords_"): continue count = mongo.db.get_collection(name).count() total += count print(helper.format_number(count), "\t\t", name) print("Total", helper.format_number(total))
def oneSong(request, song_url): #taking url for check if there is "/" at the end url = str(song_url) if("/" in url): url = url[:len(url)-1] play_song = Song.objects.get(url=url) #get last 15 songs for latest songs all_songs = Song.objects.all() be_shown = all_songs[len(all_songs)-15:] last10 = [] for song in be_shown: last10.append(song) last10.reverse() #get number of views for certain song api_key = "AIzaSyCWDfJdQfcOeDGjvL4Rs0sNnaTY0CcBMRY" url_for_details = "https://www.googleapis.com/youtube/v3/videos?id="+ play_song.url + "&key=" + api_key + "&fields=items(id,snippet(channelId,title,categoryId,description),statistics)&part=snippet,statistics" response = urllib2.urlopen(url_for_details) html = response.read() json_file = json.loads(html) views = json_file['items'][0]['statistics']['viewCount'] formatted_views = format_number(views) context = {'last10': last10, 'playSong': play_song, 'viewCount': formatted_views} return render(request, 'rap/index.html', context)
def index(request): #get last song to show play_song = Song.objects.latest('id') #get last 15 songs for latest list all_songs = Song.objects.all() be_shown = all_songs[len(all_songs) - 15:] last10 = [] for song in be_shown: last10.append(song) last10.reverse() #get number of views for last song api_key = "AIzaSyCWDfJdQfcOeDGjvL4Rs0sNnaTY0CcBMRY" url_for_details = "https://www.googleapis.com/youtube/v3/videos?id=" + play_song.url + "&key=" + api_key + "&fields=items(id,snippet(channelId,title,categoryId,description),statistics)&part=snippet,statistics" response = urllib2.urlopen(url_for_details) html = response.read() json_file = json.loads(html) views = json_file['items'][0]['statistics']['viewCount'] formatted_views = format_number(views) context = { 'last10': last10, 'playSong': play_song, 'viewCount': formatted_views } return render(request, 'rap/index.html', context)
def update_by_root_ss(): """ Due to the nature of the duplicate removal code, the this_hits value for the root synset never gets update. Therefore, we have to manually call it. It computes the this_hits value based on its hits_below and total_hits value. """ res = mongo.update_synset_this_hits("entity.n.01") print("Updated: %s -> %s" % (format_number( res["this_hits_old"]), format_number(res["this_hits_new"]))) print("Updated: %s -> %s" % (format_number( res["hits_below_old"]), format_number(res["hits_below_new"]))) print("Updated: %s -> %s" % (format_number( res["total_hits_old"]), format_number(res["total_hits_new"])))
def sum_without_dups_verb(sum_level): """ Sum the total_hits for the noun wordnet tree bottom-up. Duplicates are ignore in the way that only the first occurrence of a duplicate is included in the total sum. All further duplicates (so at a lower level, since we are working bottom-up), will be ignored in the sum. We approach some kind of inverted method here. The sums have already been computed when we looked the passwords up. However, duplicates were not considered in that process. So what we are supposed to do now is identify duplicates (and the synsets that generated them) and subtract the duplicate's hits that were erroneously added the value of the total sum. Important note: It is not enough to only subtract the hits values from the directly attached parent synset (hypernym). The subtractions propagate from a level all the way to the top. Suppose we have to subtract the value 25 at level 5. This means we have to subtract the same value not only on level 5 but all the way to level 0, since a synset on level N always contains a hits_below value, which subsumes the hit values from all of its (in)directly attached children nodes. """ # Has internal hierarchy # Return all synsets of the specified level but grouped by their parents. For each parent group, we store the total hits sum of this parent synsets children as well as the IDs of the children # text _id: Parent # int sum: Hits sum of all child synsets of parent # list childs: ID of the child synsets lowest_level_grps = mongo.db_wn_verb.aggregate([ { "$match": { "level": sum_level } }, # filter by lowest level { "$group": { "_id": "$parent", # group by the parent synsets "sum": { # sum the hits of only the current synsets (including lemmas), disregarding possible hits below this synset # "$sum": "$this_hits" "$sum": "$total_hits" }, "childs": { "$push": { "synset": "$id" } } } } ]) global total_subtractions for item in lowest_level_grps: if not item["_id"] == "root": pass else: # If we reached the end (tree top), we branch into this code print( "Reached the top (level 0), will now subtract from the root nodes..." ) for c in item["childs"]: sid = c["synset"] # If the synset ID is found in the dictionary, we subtract the duplicate hits from the current # synset. Since we are at the top, we don't need to propagate the changes if sid in ignore_dups.keys(): print( "Not first occurrence. Subtracting from current synset:", sid) # Sum all the values, so we can save some database accesses and increase performance, even if # by just a little total_dups = 0 for d in ignore_dups[sid]: print("\t", d) total_dups += d[1] print("\ttotal:", total_dups) # Subtract mongo.subtract_from_this_hits_verb(sid, total_dups) # We need to update total_hits since we have modified this_hits total_hits_c, total_hits_old_c = mongo.update_synset_hits_verb( sid) print("\t\tUpdate:", total_hits_old_c, " -> ", total_hits_c) else: continue print("Finished!") return total_hits = item["sum"] total_hits_old = total_hits orig_sum = total_hits for synset in item["childs"]: synset_id = synset["synset"] # This synset contains duplicates, however these duplicates are the first occuring ones in the Wordnet, so we add them to the total_hits if synset_id in first_occurrence_dups: pass # This synset contains duplicates and these duplicates are not the first occuring ones. This means we need to subtract the hits for the # duplicate passwords from total_hits (they already occurred somewhen earlier, in that case the above case evaluated to true) elif synset_id in ignore_dups.keys(): subtracts = ignore_dups[synset_id] # A synset may contain more than one duplicate # [(pw, 100), (pw2, 200)] sub_sum = 0 for sub in subtracts: # Add the total subtractions # Propagate total subtractions from this synsets parents up to the root synset and update # the hit values for each synset we subtracted from sub_sum += sub[1] start_parent_synset = item["_id"] print( "Propagating changes to hits to synsets on the parent root path... (-%s)" % (format_number(sub_sum))) print(start_parent_synset) continue propagate_verb(sub_sum, start_parent_synset) continue else: # Synset contains no duplicates, hence, no action pass
def sum_without_dups_noun(sum_level): """ Sum the total_hits for the noun wordnet tree bottom-up. Duplicates are ignore in the way that only the first occurrence of a duplicate is included in the total sum. All further duplicates (so at a lower level, since we are working bottom-up), will be ignored in the sum. We approach some kind of inverted method here. The sums have already been computed when we looked the passwords up. However, duplicates were not considered in that process. So what we are supposed to do now is identify duplicates (and the synsets that generated them) and subtract the duplicate's hits that were erroneously added the value of the total sum. Important note: It is not enough to only subtract the hits values from the directly attached parent synset (hypernym). The subtractions propagate from a level all the way to the top. Suppose we have to subtract the value 25 at level 5. This means we have to subtract the same value not only on level 5 but all the way to level 0, since a synset on level N always contains a hits_below value, which subsumes the hit values from all of its (in)directly attached children nodes. """ # Has internal hierarchy # Return all synsets of the specified level but grouped by their parents. For each parent group, we store the total hits sum of this parent synsets children as well as the IDs of the children # text _id: Parent # int sum: Hits sum of all child synsets of parent # list childs: ID of the child synsets lowest_level_grps = mongo.db_wn.aggregate([ { "$match": { "level": sum_level } }, # filter by lowest level { "$group": { "_id": "$parent", # group by the parent synsets "sum": { # sum the hits of only the current synsets (including lemmas), disregarding possible hits below this synset # "$sum": "$this_hits" "$sum": "$total_hits" }, "childs": { "$push": { "synset": "$id" } } } } ]) global total_subtractions for item in lowest_level_grps: total_hits = item["sum"] total_hits_old = total_hits orig_sum = total_hits # print( # "Checking for child-password-duplicates for parent '{}'".format(item["_id"])) for synset in item["childs"]: synset_id = synset["synset"] # This synset contains duplicates, however these duplicates are the first occuring ones in the Wordnet, so we add them to the total_hits if synset_id in first_occurrence_dups: pass # This synset contains duplicates and these duplicates are not the first occuring ones. This means we need to subtract the hits for the # duplicate passwords from total_hits (they already occurred somewhen earlier, in that case the above case evaluated to true) elif synset_id in ignore_dups.keys(): subtracts = ignore_dups[synset_id] # A synset may contain more than one duplicate # [(pw, 100), (pw2, 200)] sub_sum = 0 for sub in subtracts: # Add the total subtractions # Propagate total subtractions from this synsets parents up to the root synset and update # the hit values for each synset we subtracted from sub_sum += sub[1] start_parent_synset = item["_id"] print( "Propagating changes to hits to synsets on the parent root path... (-%s)" % (format_number(sub_sum))) propagate_noun(sub_sum, start_parent_synset) continue else: pass
def misc_lists(): # Exclude non-alphabetical passwords/word bases exclude_filter = mongo_filter.digit_singlechar() aggregate_query = [ { "$match": { "word_base": { "$nin": exclude_filter } } }, { "$group": { "_id": "$source", "sum": { "$sum": "$occurrences" }, "doc_count": { # doc_count "$sum": 1 } } }, { "$sort": { "sum": -1 } } ] results = {} for item in mongo.db_pws_misc_lists.aggregate(aggregate_query): hpp = float(item["sum"]) / float(item["doc_count"]) results[item["_id"]] = { "hpp": hpp, "total_hits": item["sum"], "total_passwords": item["doc_count"] } sorted_o = collections.OrderedDict( sorted(results.items(), key=lambda x: getitem(x[1], "hpp"), reverse=True)) hpp_list = [] total_passwords_list = [] names_list = [] for item in sorted_o: list_name = item values = sorted_o[item] hpp_list.append(values["hpp"]) total_passwords_list.append(values["total_passwords"]) names_list.append(list_name) log_ok( "Ref List: {}, Total Passwords: {}, Total Hits: {}, Hits Per Password: {}" .format(list_name, format_number(values["total_passwords"]), format_number(values["total_hits"]), values["hpp"])) log_ok( "Note: Non-alphabetical and non-alphanumerical passwords have been excluded!" ) # And now also plot this f, ax = plt.subplots(1) xcoords = np.arange(len(names_list)) width = 0.27 hpps = ax.bar(xcoords, hpp_list, width, color="black") total_passes = ax.bar(xcoords + width, total_passwords_list, width, color="grey") plt.xticks(xcoords, names_list, rotation=45) plt.ylabel("Hits Per Password") plt.xlabel("Reference List") plt.title("Hits Per Password for Miscellaneous Lists") ax.legend((hpps, total_passes), ("Hits per Password", "Total Passwords")) ax.set_yscale("log", basey=10) plt.show() return
def wordnet(opts): # We want to exclude the non-alphanumerical and single character lemmas exclude_filter = mongo_filter.digit_singlechar() aggregate_query1 = [{ "$match": { "word_base": { "$nin": exclude_filter } } }, { "$group": { "_id": "$tag", "sum": { "$sum": "$occurrences" } } }] total_hits = 0 for item in mongo.db_pws_wn.aggregate(aggregate_query1): log_ok("Total hits for timestamp '{}': {}".format( item["_id"], format_number(item["sum"]))) total_hits += item["sum"] # Now that we have the total hits, we just need to get the number of total passwords (with the filter still applied) total_passwords = mongo.db_pws_wn.find({ "word_base": { "$nin": exclude_filter } }).count() log_ok("Total passwords generated with WordNet: {}".format( format_number(total_passwords))) hpp = float(total_hits) / float(total_passwords) log_ok("Hits per password: {}".format(hpp)) top_flag = 0 if opts["top"]: if opts["top"] > 40: log_err("--top value too high. Select Value between 1 and 40") return top_flag = opts["top"] else: top_flag = 10 aggregate_query2 = [{ "$match": { "word_base": { "$nin": exclude_filter } } }, { "$group": { "_id": "$synset", "sum": { "$sum": "$occurrences" }, "doc_count": { "$sum": 1 } } }, { "$sort": { "sum": -1 } }, { "$limit": top_flag }] results = {} for item in mongo.db_pws_wn.aggregate(aggregate_query2): synset_hpp = float(item["sum"]) / float(item["doc_count"]) results[item["_id"]] = { "hpp": synset_hpp, "total_hits": item["sum"], "total_passwords": item["doc_count"] } sorted_o = collections.OrderedDict( sorted(results.items(), key=lambda x: getitem(x[1], "hpp"), reverse=True)) hpp_list = [] total_passwords_list = [] names_list = [] for item in sorted_o: list_name = item values = sorted_o[item] hpp_list.append(values["hpp"]) total_passwords_list.append(values["total_passwords"]) names_list.append(list_name) log_ok( "Synset: {}, Total Passwords: {}, Total Hits: {}, Hits Per Password: {}" .format(list_name, format_number(values["total_passwords"]), format_number(values["total_hits"]), values["hpp"])) log_ok( "Note: Non-alphabetical and non-alphanumerical passwords have been excluded!" ) # And now also plot this f, ax = plt.subplots(1) hpps = [len(names_list) * hpp] xcoords = np.arange(len(names_list)) width = 0.27 hpps = ax.bar(xcoords, hpp_list, width, color="black") total_passes = ax.bar(xcoords + width, total_passwords_list, width, color="grey") global_hpps = ax.bar(xcoords + width * 2, hpps, width, color="red") plt.xticks(xcoords, names_list, rotation=45) plt.ylabel("Hits Per Password") plt.xlabel("Reference List") plt.title("Hits Per Password") ax.legend((global_hpps, hpps, total_passes), ("Global Average Hits per Password", "Hits per Password per Synset", "Total Passwords")) ax.set_yscale("log", basey=10) plt.show()