Exemplo n.º 1
0
def find_collocations(file_name, data, popular_word):
    text_file = open(file_name, 'r')
    file_content = text_file.read()

    most_common_words = find_most_common_words(file_content, popular_word)

    second_word = None
    third_word = None
    collocations = data

    text_file.seek(0)
    for line in text_file:
        for word in line.split():
            first_word = second_word
            second_word = third_word
            third_word = trim_word(word)
            if (first_word not in most_common_words and second_word not in most_common_words) and \
                    (first_word and first_word[0].islower() and second_word and second_word[0].islower()):
                count_collocations(collocations, stem(first_word.lower()), stem(second_word.lower()))

    # dodatkowa iteracja dla ostatniego slowa
    first_word = second_word
    second_word = third_word
    count_collocations(collocations, first_word, second_word)

    collocations = find_whole_collocations_from_stems(collocations, file_content)
    return collocations, most_common_words, file_content
Exemplo n.º 2
0
def find_collocations(file_name, data, popular_word):
    text_file = open(file_name, 'r')
    file_content = text_file.read()

    most_common_words = find_most_common_words(file_content, popular_word)

    second_word = None
    third_word = None
    collocations = data

    text_file.seek(0)
    for line in text_file:
        for word in line.split():
            first_word = second_word
            second_word = third_word
            third_word = trim_word(word)
            if (first_word not in most_common_words and second_word not in most_common_words) and \
                    (first_word and first_word[0].islower() and second_word and second_word[0].islower()):
                count_collocations(collocations, stem(first_word.lower()),
                                   stem(second_word.lower()))

    # dodatkowa iteracja dla ostatniego slowa
    first_word = second_word
    second_word = third_word
    count_collocations(collocations, first_word, second_word)

    collocations = find_whole_collocations_from_stems(collocations,
                                                      file_content)
    return collocations, most_common_words, file_content
Exemplo n.º 3
0
def find_most_common_words(text, count):
    words = dict()
    for word in text.split():
        word = trim_word(word)
        if word not in words.keys():
            words[word] = 1
        else:
            words[word] += 1
    sorted_words = sorted(words, key=words.get, reverse=True)
    return sorted_words[:count]
Exemplo n.º 4
0
def find_most_common_words(text, count):
    words = dict()
    for word in text.split():
        word = trim_word(word)
        if word not in words.keys():
            words[word] = 1
        else:
            words[word] += 1
    sorted_words = sorted(words, key=words.get, reverse=True)
    return sorted_words[:count]
Exemplo n.º 5
0
def find_collocations(text, data, popular_word):

    most_common_words = find_most_common_words(text, popular_word)

    second_word = None
    third_word = None

    collocations = data
    for word in text.split():
        first_word = second_word
        second_word = third_word
        third_word = trim_word(word)
        if (first_word not in most_common_words and second_word not in most_common_words) and \
                (first_word and first_word[0].islower() and second_word and second_word[0].islower()):
            count_collocations(collocations, stem(first_word.lower()),
                               stem(second_word.lower()))

    # dodatkowa iteracja dla ostatniego slowa
    first_word = second_word
    second_word = third_word
    count_collocations(collocations, first_word, second_word)

    collocations = find_whole_collocations_from_stems(collocations, text)
    return collocations, most_common_words