Пример #1
0
def sample(text):

    # converts text to histogram
    histo = histogram(text)
    # print("histogram: ")
    # print(histogram)

    # returns number of tokens in histogram
    # tokens = 0
    # for word in histo:
    #     tokens += histo[word]
    tokens = unique_words(histo)
    # print("tokens: ")
    # print(tokens)

    # cumulative_probability
    cum_prob = 0

    # random number
    ranum = random.uniform(0, 1)
    # print("random number: ")
    # print(ranum)

    # randomly picks one word based on word frequency
    for word in histo:
        cum_prob += (float(histo[word]) / float(tokens))
        # print("cumulative prob: ")
        # print(cum_prob)
        if cum_prob >= ranum:
            return word
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    total_unique = unique_words(histo) # number of unique words
    print(unique)
    rand_num = random.randint(1, total_unique) # random number in range of number of unique words
    print(rand_num)

    percentages = []

    total_wc = 0
    for item in histo:
        total_wc += item[0] 

    for item in histo:
        freq = freq(item[0], histo)
        perc = freq / total_wc
        instance = [item[0], perc]






    counter = 0
    word = None
     # count until we hit the random number
    for item in histo:
        if counter < rand_num:
            counter += 1
            word = item

    print(word[0])
    return word[0]
Пример #3
0
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    unique = histogram.unique_words(histo)
    rand_num = random.randint(1, unique)

    counter = 0
    while counter < rand_num:
        for item in histo:
            counter += 1
Пример #4
0
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    unique = histogram.unique_words(histo)  # number of unique words
    rand_num = random.randint(
        1, unique)  # random number in range of number of unique words

    counter = 0
    word = None
    while counter < rand_num:  # count until we hit the random number
        for item in histo:
            counter += 1
            word = item
    print(word[1])
    return word[1]
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    total_unique = unique_words(histo)  # number of unique words
    print(unique)
    rand_num = random.randint(
        1, total_unique)  # random number in range of number of unique words
    print(rand_num)

    counter = 0
    word = None
    # count until we hit the random number
    for item in histo:
        if counter < rand_num:
            counter += 1
            word = item

    print(word[0])
    return word[0]
Пример #6
0
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    total_unique = unique_words(histo) # number of unique words
    print(total_unique)
    rand_num = random.randint(1, total_unique) # random number in range of number of unique words
    print(rand_num)

    percentages = []

    total_wc = 0
    for item in histo:
        total_wc += item[0] 

    for item in histo:
        freq = freq(item[0], histo)
        perc = freq / total_wc
        instance = [item[0], perc]
        percentages.append(instance)
    
    return percentages
def stochastic(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''
    unique = histogram.unique_words(histo)
    rand_num = random.randint(0, unique)