-
Notifications
You must be signed in to change notification settings - Fork 0
/
sampling.py
51 lines (41 loc) · 1.13 KB
/
sampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import sys
import random
from frequency import histogram
def unweighted_select(hist):
"""Selects a random element from a dictionary's keys
dictionary -> string
"""
num = random.randint(0, len(hist.keys())-1)
print(len(hist.keys()))
return list(hist.keys())[num]
def weighted_select(hist):
"""Selects a element from the histogram taking frequency
into account
dictionary -> string
"""
length = len(hist)
num = random.randint(0, length)
for word in hist:
num -= hist[word]
if num <= 0:
return word
def test_probability(hist):
"""Checks to see if the weighted_select has a
non-uniform selection probability; returns a dictionary
mapping keys to probabilities
dictionary -> dictionary
"""
keys = {}
for key in hist.keys():
keys[key] = 0
for i in range(10000):
key = weighted_select(hist)
keys[key] += 1
for key in keys:
keys[key] = keys[key]/10000
return keys
if __name__ == "__main__":
args = sys.argv[1:]
text = ' '.join(args)
hist = histogram(text)
print(test_probability(hist))