Пример #1
0
def sampling(text_file):
    '''Calling the function that returns the random word '''

    histogram = histogram_dict(text_file)
    count = get_count(histogram_dict(text_file))
    total = len(text_file)

    ' '.join(sentence(count, total, histogram))
    print(sentence(count, total, histogram))
def test(words_list):
    test = []
    histogram_ = histogram.histogram_dict(words_list)
    for _ in range(10000):
        test.append(sample(words_list))
    hist_ran_words = histogram.histogram_dict(test)

    for word in hist_ran_words:
        print(word, 'test', hist_ran_words[word]/10000,
              'distribution', round((histogram_[word]/len(words_list)), 4))

    return
Пример #3
0
def sample(input):
  dict = histogram_dict(input)
  num = randint(0, sum(dict.values()))
  for i in dict:
    num -= dict[i]
    if num <= 0:
      return i
Пример #4
0
def main_sample(words_file):

    words = histogram_dict(words_file)
    count = sum(words.values())
    word = random_word(words, count)

    return (word)
Пример #5
0
def test_sample_freq(histogram):
    word_freq = []
    for _ in range(2000):
        sample = sample_freq(histogram)
        word_freq.append(sample)

    return histogram_dict(word_freq)
Пример #6
0
def main_sample(text_file):
    '''Calling the function that returns the random word '''

    histogram = histogram_dict(text_file)
    count = get_count(histogram)

    word = prob_word(histogram, count)
    display_word = print(word)
    return display_word
Пример #7
0
def dictionary_of_list_counts(word_dict):
    dict_to_return = {}
    for k, v in word_dict.items():
        dict_to_return[k] = histogram.histogram_dict(v)
        # for i in v:
        #     if k in dict_to_return:
        #     dict_to_return[k] = {i: v.count(i)}
    # print(dict_to_return)
    return dict_to_return
def get_word_distribution(words_list):
    histo = histogram.histogram_dict(words_list)
    word_distribution = {}
    range_start = 1
    for word in histo:
        word_distribution[word] = list(
            range(range_start, range_start + histo[word]))
        range_start += histo[word]

    return word_distribution
def create_dict_from_list(clean_list):
    '''Take a clean list, return dictionary structure.
    Each unique ('word', 'second_wrd') stored as key : frequency of the tuple stored as value.'''
    list_of_pairs = []

    for index in range(0, len(clean_list) - 1):
        cur = clean_list[index]
        nxt = clean_list[index + 1]
        list_of_pairs.append((cur, nxt))
    dictionary = h.histogram_dict(list_of_pairs)
    return dictionary
Пример #10
0
def create_dictionary_from_list(clean_list):
    """Take a list and return an dictionary"""

    list_of_pairs = []

    for index in range(0, len(clean_list) - 1):
        current = clean_list[index]
        next = clean_list[index + 1]
        list_of_pairs.append((current, next))
    dictionary = h.histogram_dict(list_of_pairs)
    return dictionary
 def build_text_histogram(self, words_list):
     return histogram.histogram_dict(self.words_list)
Пример #12
0
def sample_freq(histogram):
    # Length of the histogram based off values
    hist_length = sum(histogram.values())
    word_predict = randint(1, hist_length)
    counter = 0
    for word in histogram.keys():

        # Adds the histogram value to the counter
        counter += histogram[word]

        if word_predict <= counter:
            return word


def test_sample_freq(histogram):
    word_freq = []
    for _ in range(2000):
        sample = sample_freq(histogram)
        word_freq.append(sample)

    return histogram_dict(word_freq)


if __name__ == "__main__":
    source = 'small_sample.txt'
    # sample = sample_freq(histogram_dict(read_file(source)))
    # print(sample)

    test = test_sample_freq(histogram_dict(read_file(source)))
    print(test)
Пример #13
0
import random
from histogram import histogram_dict


def random_word(histogram, count):
    '''returns a random word '''
    total = 0
    index = random.randint(1, count)

    for key, value in histogram.items():
        total += value

        if index <= total:
            return key


def main_sample(words_file):

    words = histogram_dict(words_file)
    count = sum(words.values())
    word = random_word(words, count)

    return (word)


if __name__ == '__main__':
    words_file = 'test.txt'
    histogram = histogram_dict(words_file)
    print(random_word(histogram, 100))
Пример #14
0
def main_sample(source_text):
    words = histogram_dict(source_text)
    count = sum(words.values())

    word = prob_word(words, count)
    return word
Пример #15
0
def ensure_randomness(histogram):
    return histogram_dict([random_word(histogram) for i in range(10000)])
Пример #16
0
def ensure_probability(histogram):
    probs = probabilities(histogram)
    return histogram_dict(
        [probabilistic_random_word(probs) for i in range(10000)])
Пример #17
0
def ensure_probability(histogram):
    probs = probabilities(histogram)
    return histogram_dict([probabilistic_random_word(probs) for i in range(10000)])
Пример #18
0
def ensure_randomness(histogram):
    return histogram_dict([random_word(histogram) for i in range(10000)])
Пример #19
0
            return word

def probabilities(freq_dist):
    probabilities = []
    prob = 0
    for word, freq in freq_dist.items():
        prob = round(prob + probability(word, freq_dist), 4)
        probabilities.append((prob, word))
    return sorted(probabilities)

def probability(word, freq_dist):
    numWords = 0
    for _, freq in freq_dist.items():
        numWords += freq
    return round(freq_dist[word] / numWords, 4)

def ensure_randomness(histogram):
    return histogram_dict([random_word(histogram) for i in range(10000)])

def ensure_probability(histogram):
    probs = probabilities(histogram)
    return histogram_dict([probabilistic_random_word(probs) for i in range(10000)])

if __name__ == '__main__':
    import sys
    words = open(sys.argv[1], 'r').read().split()
    hstgm = histogram_dict(words)
    probs = probabilities(hstgm)
    print(probabilistic_random_word(probs))
    print(ensure_probability(hstgm))
Пример #20
0
import random
from histogram import histogram_dict


def prob_word(histogram, count):
    '''returns a random word '''
    total = 0
    index = random.randint(1, count)

    for key, value in histogram.items():
        total += value

        if index <= total:
            return key


def main_sample(source_text):
    words = histogram_dict(source_text)
    count = sum(words.values())

    word = prob_word(words, count)
    return word


if __name__ == '__main__':
    source_text = 'book.txt'
    histogram = histogram_dict(source_text)
    print(prob_word(histogram, 21))
Пример #21
0
    for word, freq in freq_dist.items():
        prob = round(prob + probability(word, freq_dist), 4)
        probabilities.append((prob, word))
    return sorted(probabilities)


def probability(word, freq_dist):
    numWords = 0
    for _, freq in freq_dist.items():
        numWords += freq
    return round(freq_dist[word] / numWords, 4)


def ensure_randomness(histogram):
    return histogram_dict([random_word(histogram) for i in range(10000)])


def ensure_probability(histogram):
    probs = probabilities(histogram)
    return histogram_dict(
        [probabilistic_random_word(probs) for i in range(10000)])


if __name__ == '__main__':
    import sys
    words = open(sys.argv[1], 'r').read().split()
    hstgm = histogram_dict(words)
    probs = probabilities(hstgm)
    print(probabilistic_random_word(probs))
    print(ensure_probability(hstgm))