Exemplo n.º 1
0
    def test_support_of_custom_tokenizer(self):
        data = 'a a b b c'

        def tokenizer(text: str) -> list: return text.split()

        w_c = word_count(data, tokenizer=tokenizer)
        self.assertDictEqual({'a': 2, 'b': 2, 'c': 1}, w_c)
Exemplo n.º 2
0
def summarize_text(text: AnyStr, n_common_words: int = 10) -> str:
    """Returns a string was made of most common words of given text
    :param text: textual data
    :type text: str
    :param n_common_words: count of words to make sentence
    :return: summary of the text
    :rtype: str
    """

    def tokenizer(text):
        return regexp_tokenize(text, "[\w']+")

    word_frequencies = word_count(text, tokenizer)
    common = [w for w, f in word_frequencies.most_common(n_common_words)]
    return " ".join(common).capitalize() + "."
Exemplo n.º 3
0
 def test_best_case(self):
     data = 'a a b b c'
     w_c = word_count(data)
     self.assertDictEqual({'a': 2, 'b': 2, 'c': 1}, w_c)
Exemplo n.º 4
0
    args = parser.parse_args()
    task_num = args.task
    fib_count = args.fib_count
    path = args.path
    data = args.data
except Exception:
    print("Incorrect args")
    sys.exit()

try:
    if task_num == 1:
        if path:
            text = open_as_text(path)
        else:
            text = data
        word_counts = word_count(text)
        print_word_count_result(word_counts)
    elif task_num == 2:
        if path:
            text = open_as_text(path)
        else:
            text = data
        sentence = summarize_text(text)
        print(sentence)
    elif task_num == 3:
        if path:
            nums = open_as_list_of_ints(path)
        else:
            nums = [int(s_i) for s_i in data.split(' ')]
        sorted_nums = quick_sort(nums)
        print_list_of_ints(sorted_nums)