Пример #1
0
def main():
    total = 0
    for name in mongo.db.list_collection_names():
        if not name.startswith("passwords_"):
            continue
        count = mongo.db.get_collection(name).count()
        total += count
        print(helper.format_number(count), "\t\t", name)
    print("Total", helper.format_number(total))
Пример #2
0
def oneSong(request, song_url):

	#taking url for check if there is "/" at the end
	url = str(song_url)
	if("/" in url):
		url = url[:len(url)-1]
	play_song = Song.objects.get(url=url)

	#get last 15 songs for latest songs
	all_songs = Song.objects.all()
	be_shown = all_songs[len(all_songs)-15:]
	last10 = []
	for song in be_shown:
		last10.append(song)
	last10.reverse()

	#get number of views for certain song
	api_key = "AIzaSyCWDfJdQfcOeDGjvL4Rs0sNnaTY0CcBMRY"
	url_for_details = "https://www.googleapis.com/youtube/v3/videos?id="+ play_song.url + "&key=" + api_key + "&fields=items(id,snippet(channelId,title,categoryId,description),statistics)&part=snippet,statistics"
	response = urllib2.urlopen(url_for_details)
	html = response.read()
	json_file = json.loads(html)
	views = json_file['items'][0]['statistics']['viewCount']
	formatted_views = format_number(views)

	context = {'last10': last10, 'playSong': play_song, 'viewCount': formatted_views}
	return render(request, 'rap/index.html', context)
Пример #3
0
def index(request):
    #get last song to show
    play_song = Song.objects.latest('id')

    #get last 15 songs for latest list
    all_songs = Song.objects.all()
    be_shown = all_songs[len(all_songs) - 15:]
    last10 = []
    for song in be_shown:
        last10.append(song)
    last10.reverse()

    #get number of views for last song
    api_key = "AIzaSyCWDfJdQfcOeDGjvL4Rs0sNnaTY0CcBMRY"
    url_for_details = "https://www.googleapis.com/youtube/v3/videos?id=" + play_song.url + "&key=" + api_key + "&fields=items(id,snippet(channelId,title,categoryId,description),statistics)&part=snippet,statistics"
    response = urllib2.urlopen(url_for_details)
    html = response.read()
    json_file = json.loads(html)
    views = json_file['items'][0]['statistics']['viewCount']
    formatted_views = format_number(views)

    context = {
        'last10': last10,
        'playSong': play_song,
        'viewCount': formatted_views
    }
    return render(request, 'rap/index.html', context)
Пример #4
0
def update_by_root_ss():
    """
    Due to the nature of the duplicate removal code, the this_hits value for the root synset
    never gets update. Therefore, we have to manually call it. It computes the this_hits
    value based on its hits_below and total_hits value.
    """
    res = mongo.update_synset_this_hits("entity.n.01")
    print("Updated: %s -> %s" % (format_number(
        res["this_hits_old"]), format_number(res["this_hits_new"])))
    print("Updated: %s -> %s" % (format_number(
        res["hits_below_old"]), format_number(res["hits_below_new"])))
    print("Updated: %s -> %s" % (format_number(
        res["total_hits_old"]), format_number(res["total_hits_new"])))
Пример #5
0
def sum_without_dups_verb(sum_level):
    """
    Sum the total_hits for the noun wordnet tree bottom-up. Duplicates are ignore in the way that only
    the first occurrence of a duplicate is included in the total sum. All further duplicates (so at a lower level, since we are working bottom-up),
    will be ignored in the sum.
    
    We approach some kind of inverted method here. The sums have already been computed when we looked the passwords up. However, duplicates were not considered in that process.
    So what we are supposed to do now is identify duplicates (and the synsets that generated them) and subtract the duplicate's hits that were erroneously added the value of the total sum.
    Important note: It is not enough to only subtract the hits values from the directly attached parent synset (hypernym). The subtractions propagate from a level all the way to the top.
    Suppose we have to subtract the value 25 at level 5. This means we have to subtract the same value not only on level 5 but all the way to level 0, since a synset on level N always contains
    a hits_below value, which subsumes the hit values from all of its (in)directly attached children nodes.
    """
    # Has internal hierarchy
    # Return all synsets of the specified level but grouped by their parents. For each parent group, we store the total hits sum of this parent synsets children as well as the IDs of the children
    # text _id: Parent
    # int sum: Hits sum of all child synsets of parent
    # list childs: ID of the child synsets
    lowest_level_grps = mongo.db_wn_verb.aggregate([
        {
            "$match": {
                "level": sum_level
            }
        },  # filter by lowest level
        {
            "$group": {
                "_id": "$parent",  # group by the parent synsets
                "sum": {
                    # sum the hits of only the current synsets (including lemmas), disregarding possible hits below this synset
                    # "$sum": "$this_hits"
                    "$sum": "$total_hits"
                },
                "childs": {
                    "$push": {
                        "synset": "$id"
                    }
                }
            }
        }
    ])

    global total_subtractions
    for item in lowest_level_grps:
        if not item["_id"] == "root":
            pass
        else:
            # If we reached the end (tree top), we branch into this code
            print(
                "Reached the top (level 0), will now subtract from the root nodes..."
            )
            for c in item["childs"]:
                sid = c["synset"]
                # If the synset ID is found in the dictionary, we subtract the duplicate hits from the current
                # synset. Since we are at the top, we don't need to propagate the changes
                if sid in ignore_dups.keys():
                    print(
                        "Not first occurrence. Subtracting from current synset:",
                        sid)
                    # Sum all the values, so we can save some database accesses and increase performance, even if
                    # by just a little
                    total_dups = 0
                    for d in ignore_dups[sid]:
                        print("\t", d)
                        total_dups += d[1]
                    print("\ttotal:", total_dups)
                    # Subtract
                    mongo.subtract_from_this_hits_verb(sid, total_dups)
                    # We need to update total_hits since we have modified this_hits
                    total_hits_c, total_hits_old_c = mongo.update_synset_hits_verb(
                        sid)
                    print("\t\tUpdate:", total_hits_old_c, " -> ",
                          total_hits_c)
                else:
                    continue
            print("Finished!")
            return

        total_hits = item["sum"]
        total_hits_old = total_hits
        orig_sum = total_hits
        for synset in item["childs"]:
            synset_id = synset["synset"]
            # This synset contains duplicates, however these duplicates are the first occuring ones in the Wordnet, so we add them to the total_hits
            if synset_id in first_occurrence_dups:
                pass
            # This synset contains duplicates and these duplicates are not the first occuring ones. This means we need to subtract the hits for the
            # duplicate passwords from total_hits (they already occurred somewhen earlier, in that case the above case evaluated to true)
            elif synset_id in ignore_dups.keys():
                subtracts = ignore_dups[synset_id]
                # A synset may contain more than one duplicate
                # [(pw, 100), (pw2, 200)]
                sub_sum = 0
                for sub in subtracts:
                    # Add the total subtractions
                    # Propagate total subtractions from this synsets parents up to the root synset and update
                    # the hit values for each synset we subtracted from
                    sub_sum += sub[1]
                    start_parent_synset = item["_id"]
                    print(
                        "Propagating changes to hits to synsets on the parent root path... (-%s)"
                        % (format_number(sub_sum)))
                    print(start_parent_synset)
                    continue
                    propagate_verb(sub_sum, start_parent_synset)
                    continue
            else:  # Synset contains no duplicates, hence, no action
                pass
Пример #6
0
def sum_without_dups_noun(sum_level):
    """
    Sum the total_hits for the noun wordnet tree bottom-up. Duplicates are ignore in the way that only
    the first occurrence of a duplicate is included in the total sum. All further duplicates (so at a lower level, since we are working bottom-up),
    will be ignored in the sum.
    
    We approach some kind of inverted method here. The sums have already been computed when we looked the passwords up. However, duplicates were not considered in that process.
    So what we are supposed to do now is identify duplicates (and the synsets that generated them) and subtract the duplicate's hits that were erroneously added the value of the total sum.
    Important note: It is not enough to only subtract the hits values from the directly attached parent synset (hypernym). The subtractions propagate from a level all the way to the top.
    Suppose we have to subtract the value 25 at level 5. This means we have to subtract the same value not only on level 5 but all the way to level 0, since a synset on level N always contains
    a hits_below value, which subsumes the hit values from all of its (in)directly attached children nodes.
    """
    # Has internal hierarchy
    # Return all synsets of the specified level but grouped by their parents. For each parent group, we store the total hits sum of this parent synsets children as well as the IDs of the children
    # text _id: Parent
    # int sum: Hits sum of all child synsets of parent
    # list childs: ID of the child synsets
    lowest_level_grps = mongo.db_wn.aggregate([
        {
            "$match": {
                "level": sum_level
            }
        },  # filter by lowest level
        {
            "$group": {
                "_id": "$parent",  # group by the parent synsets
                "sum": {
                    # sum the hits of only the current synsets (including lemmas), disregarding possible hits below this synset
                    # "$sum": "$this_hits"
                    "$sum": "$total_hits"
                },
                "childs": {
                    "$push": {
                        "synset": "$id"
                    }
                }
            }
        }
    ])

    global total_subtractions
    for item in lowest_level_grps:
        total_hits = item["sum"]
        total_hits_old = total_hits
        orig_sum = total_hits
        # print(
        #     "Checking for child-password-duplicates for parent '{}'".format(item["_id"]))
        for synset in item["childs"]:
            synset_id = synset["synset"]
            # This synset contains duplicates, however these duplicates are the first occuring ones in the Wordnet, so we add them to the total_hits
            if synset_id in first_occurrence_dups:
                pass
            # This synset contains duplicates and these duplicates are not the first occuring ones. This means we need to subtract the hits for the
            # duplicate passwords from total_hits (they already occurred somewhen earlier, in that case the above case evaluated to true)
            elif synset_id in ignore_dups.keys():
                subtracts = ignore_dups[synset_id]
                # A synset may contain more than one duplicate
                # [(pw, 100), (pw2, 200)]
                sub_sum = 0
                for sub in subtracts:
                    # Add the total subtractions
                    # Propagate total subtractions from this synsets parents up to the root synset and update
                    # the hit values for each synset we subtracted from
                    sub_sum += sub[1]
                    start_parent_synset = item["_id"]
                    print(
                        "Propagating changes to hits to synsets on the parent root path... (-%s)"
                        % (format_number(sub_sum)))
                    propagate_noun(sub_sum, start_parent_synset)
                    continue
            else:
                pass
Пример #7
0
def misc_lists():
    # Exclude non-alphabetical passwords/word bases
    exclude_filter = mongo_filter.digit_singlechar()
    aggregate_query = [
        {
            "$match": {
                "word_base": {
                    "$nin": exclude_filter
                }
            }
        },
        {
            "$group": {
                "_id": "$source",
                "sum": {
                    "$sum": "$occurrences"
                },
                "doc_count": {  # doc_count
                    "$sum": 1
                }
            }
        },
        {
            "$sort": {
                "sum": -1
            }
        }
    ]

    results = {}

    for item in mongo.db_pws_misc_lists.aggregate(aggregate_query):
        hpp = float(item["sum"]) / float(item["doc_count"])
        results[item["_id"]] = {
            "hpp": hpp,
            "total_hits": item["sum"],
            "total_passwords": item["doc_count"]
        }

    sorted_o = collections.OrderedDict(
        sorted(results.items(),
               key=lambda x: getitem(x[1], "hpp"),
               reverse=True))

    hpp_list = []
    total_passwords_list = []
    names_list = []

    for item in sorted_o:
        list_name = item
        values = sorted_o[item]
        hpp_list.append(values["hpp"])
        total_passwords_list.append(values["total_passwords"])
        names_list.append(list_name)
        log_ok(
            "Ref List: {}, Total Passwords: {}, Total Hits: {}, Hits Per Password: {}"
            .format(list_name, format_number(values["total_passwords"]),
                    format_number(values["total_hits"]), values["hpp"]))

    log_ok(
        "Note: Non-alphabetical and non-alphanumerical passwords have been excluded!"
    )

    # And now also plot this
    f, ax = plt.subplots(1)
    xcoords = np.arange(len(names_list))
    width = 0.27
    hpps = ax.bar(xcoords, hpp_list, width, color="black")
    total_passes = ax.bar(xcoords + width,
                          total_passwords_list,
                          width,
                          color="grey")
    plt.xticks(xcoords, names_list, rotation=45)
    plt.ylabel("Hits Per Password")
    plt.xlabel("Reference List")
    plt.title("Hits Per Password for Miscellaneous Lists")
    ax.legend((hpps, total_passes), ("Hits per Password", "Total Passwords"))
    ax.set_yscale("log", basey=10)
    plt.show()
    return
Пример #8
0
def wordnet(opts):
    # We want to exclude the non-alphanumerical and single character lemmas
    exclude_filter = mongo_filter.digit_singlechar()
    aggregate_query1 = [{
        "$match": {
            "word_base": {
                "$nin": exclude_filter
            }
        }
    }, {
        "$group": {
            "_id": "$tag",
            "sum": {
                "$sum": "$occurrences"
            }
        }
    }]

    total_hits = 0
    for item in mongo.db_pws_wn.aggregate(aggregate_query1):
        log_ok("Total hits for timestamp '{}': {}".format(
            item["_id"], format_number(item["sum"])))
        total_hits += item["sum"]

    # Now that we have the total hits, we just need to get the number of total passwords (with the filter still applied)
    total_passwords = mongo.db_pws_wn.find({
        "word_base": {
            "$nin": exclude_filter
        }
    }).count()
    log_ok("Total passwords generated with WordNet: {}".format(
        format_number(total_passwords)))
    hpp = float(total_hits) / float(total_passwords)
    log_ok("Hits per password: {}".format(hpp))

    top_flag = 0
    if opts["top"]:
        if opts["top"] > 40:
            log_err("--top value too high. Select Value between 1 and 40")
            return
        top_flag = opts["top"]
    else:
        top_flag = 10

    aggregate_query2 = [{
        "$match": {
            "word_base": {
                "$nin": exclude_filter
            }
        }
    }, {
        "$group": {
            "_id": "$synset",
            "sum": {
                "$sum": "$occurrences"
            },
            "doc_count": {
                "$sum": 1
            }
        }
    }, {
        "$sort": {
            "sum": -1
        }
    }, {
        "$limit": top_flag
    }]

    results = {}

    for item in mongo.db_pws_wn.aggregate(aggregate_query2):
        synset_hpp = float(item["sum"]) / float(item["doc_count"])
        results[item["_id"]] = {
            "hpp": synset_hpp,
            "total_hits": item["sum"],
            "total_passwords": item["doc_count"]
        }

    sorted_o = collections.OrderedDict(
        sorted(results.items(),
               key=lambda x: getitem(x[1], "hpp"),
               reverse=True))

    hpp_list = []
    total_passwords_list = []
    names_list = []

    for item in sorted_o:
        list_name = item
        values = sorted_o[item]
        hpp_list.append(values["hpp"])
        total_passwords_list.append(values["total_passwords"])
        names_list.append(list_name)
        log_ok(
            "Synset: {}, Total Passwords: {}, Total Hits: {}, Hits Per Password: {}"
            .format(list_name, format_number(values["total_passwords"]),
                    format_number(values["total_hits"]), values["hpp"]))

    log_ok(
        "Note: Non-alphabetical and non-alphanumerical passwords have been excluded!"
    )

    # And now also plot this
    f, ax = plt.subplots(1)
    hpps = [len(names_list) * hpp]
    xcoords = np.arange(len(names_list))
    width = 0.27
    hpps = ax.bar(xcoords, hpp_list, width, color="black")
    total_passes = ax.bar(xcoords + width,
                          total_passwords_list,
                          width,
                          color="grey")
    global_hpps = ax.bar(xcoords + width * 2, hpps, width, color="red")
    plt.xticks(xcoords, names_list, rotation=45)
    plt.ylabel("Hits Per Password")
    plt.xlabel("Reference List")
    plt.title("Hits Per Password")
    ax.legend((global_hpps, hpps, total_passes),
              ("Global Average Hits per Password",
               "Hits per Password per Synset", "Total Passwords"))
    ax.set_yscale("log", basey=10)
    plt.show()