Exemplo n.º 1
0
def test_word_freq_returns_two_cols_if_not_num_list():
    result = word_frequency(['pizza burger', 'pizza sandwitch'])
    assert result.shape[1] == 2
Exemplo n.º 2
0
def test_works_without_numlist_provided():
    result = word_frequency(['Great Text in a List', 'Greater text as well'])
    assert result['word'].eq('text').any()
Exemplo n.º 3
0
def test_word_freq_uses_regex():
    result = word_frequency(['pizza burger', 'pizza sandwitch'], regex='pizza')
    assert result['word'][0] == 'pizza'
Exemplo n.º 4
0
def test_extra_info_provided():
    result = word_frequency(text_list, num_list, extra_info=True)
    assert set(result.columns.values) == {
        'word', 'abs_freq', 'abs_perc', 'abs_perc_cum', 'wtd_freq',
        'wtd_freq_perc', 'wtd_freq_perc_cum', 'rel_value'
    }
Exemplo n.º 5
0
def test_works_fine_with_only_stopwords_supplied():
    result = word_frequency(['on'], [3])
    assert result.shape == (0, 4)
Exemplo n.º 6
0
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert not result['word'].eq('one').any()
    assert not result['word'].eq('two').any()
Exemplo n.º 7
0
def test_extra_info_not_provided():
    result = word_frequency(text_list, num_list, extra_info=False)
    assert set(result.columns.values) == {
        'word', 'abs_freq', 'wtd_freq', 'rel_value'
    }
Exemplo n.º 8
0
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert 'one' not in result['word']
    assert 'two' not in result['word']
Exemplo n.º 9
0
def test_words_separated_with_given_sep():
    for sep in separators:
        result = word_frequency(text_list, num_list, sep=sep)
        assert sep not in result['word']
Exemplo n.º 10
0
def test_word_freq_uses_regex():
    result = word_frequency(['pizza burger', 'pizza sandwitch'], regex='pizza')
    assert result['word'][0] == 'pizza'
Exemplo n.º 11
0
def test_len_result_one_more_than_len_slots():
    for sep in sep_list:
        result = word_frequency(text_list, num_list, sep=sep)
        if sep is not None:
            assert sep not in result['word']
Exemplo n.º 12
0
def test_works_without_numlist_provided():
    result = word_frequency(['Great Text in a List', 'Greater text as well'])
    assert result['word'].eq('text').any()
Exemplo n.º 13
0
def test_works_fine_with_only_stopwords_supplied():
    result = word_frequency(['on'], [3])
    assert result.shape == (0, 4)
Exemplo n.º 14
0
def test_extra_info_provided():
    result = word_frequency(text_list, num_list, extra_info=True)
    assert set(result.columns.values) == {'word', 'abs_freq', 'abs_perc',
                                          'abs_perc_cum', 'wtd_freq',
                                          'wtd_freq_perc', 'wtd_freq_perc_cum',
                                          'rel_value'}
Exemplo n.º 15
0
def test_extra_info_not_provided():
    result = word_frequency(text_list, num_list, extra_info=False)
    assert set(result.columns.values) == {'word', 'abs_freq', 'wtd_freq',
                                          'rel_value'}
Exemplo n.º 16
0
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert not result['word'].eq('one').any()
    assert not result['word'].eq('two').any()