Пример #1
0
def test_generate_from_frequencies():
    # test that generate_from_frequencies() takes input argument dicts
    wc = TextCloud(max_words=50)
    words = wc.process_text(THIS)
    result = wc.generate_from_frequencies(words)

    assert isinstance(result, TextCloud)
Пример #2
0
def test_zero_frequencies():

    word_cloud = TextCloud()

    word_cloud.generate_from_frequencies({'test': 1, 'test1': 0, 'test2': 0})
    assert len(word_cloud.layout_) == 1
    assert word_cloud.layout_[0][0][0] == 'test'
Пример #3
0
def test_coloring_black_works():
    # check that using black colors works.
    mask = np.zeros((50, 50, 3))
    image_colors = ImageColorGenerator(mask)
    wc = TextCloud(width=50, height=50, random_state=42,
                   color_func=image_colors, min_font_size=1)
    wc.generate(THIS)
Пример #4
0
def test_recolor_too_small():
    # check exception is raised when image is too small
    colouring = np.array(Image.new('RGB', size=(20, 20)))
    wc = TextCloud(width=30, height=30, random_state=0, min_font_size=1).generate(THIS)
    image_colors = ImageColorGenerator(colouring)
    with pytest.raises(ValueError, match='ImageColorGenerator is smaller than the canvas'):
        wc.recolor(color_func=image_colors)
Пример #5
0
def test_process_text():
    # test that process function returns a dict
    wc = TextCloud(max_words=50)
    result = wc.process_text(THIS)

    # check for proper return type
    assert isinstance(result, dict)
Пример #6
0
def test_collocation_stopwords():
    wc = TextCloud(collocations=True, stopwords={"you", "very"}, collocation_threshold=9)
    wc.generate(STOPWORDED_COLLOCATIONS)

    assert "thank you" not in wc.words_
    assert "very much" not in wc.words_
    assert "thank" in wc.words_
    # a bigram of all stopwords will be removed
    assert "you very" not in wc.words_
 def make_cloud(self):
     docs=self.articledb.return_doc()
     proc_obj=DocProc()
     doc_string=" ".join(docs)
     doc_vector=[doc_string]
     tokenized_doc_vector=proc_obj.tokenize(doc_vector)
     final_doc_vector=proc_obj.remove_stopwords(tokenized_doc_vector)
     final_text=" ".join(final_doc_vector[0])
     txt_cl=TextCloud(final_text)
     txt_cl.make_cloud()
Пример #8
0
def test_plural_stopwords():
    x = '''was was was was was was was was was was was was was was was
    wa
    hello hello hello hello hello hello hello hello
    goodbye good bye maybe yes no'''
    w = TextCloud().generate(x)
    assert w.words_['wa'] < 1

    w = TextCloud(collocations=False).generate(x)
    assert w.words_['wa'] < 1
Пример #9
0
def test_collocation_stopwords_uppercase():
    wc = TextCloud(collocations=True, stopwords={"thank", "hi", "there"}, collocation_threshold=9)
    wc.generate(STOPWORDED_COLLOCATIONS_UPPERCASE)

    assert "Thank you" not in wc.words_
    assert "thank you" not in wc.words_
    assert "Thank" not in wc.words_
    # a bigram of all stopwords will be removed
    assert "hi There" not in wc.words_
    assert "Hi there" not in wc.words_
    assert "Hi There" not in wc.words_
Пример #10
0
def makeImage(text):
    alice_mask = np.array(Image.open("alice_mask.png"))

    wc = TextCloud(background_color="white", max_words=1000, mask=alice_mask)
    # generate word cloud
    wc.generate_from_frequencies(text)

    # show
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.show()
Пример #11
0
def test_multiple_s():
    text = 'flo flos floss flosss'
    wc = TextCloud(stopwords=[]).generate(text)
    assert "flo" in wc.words_
    assert "flos" not in wc.words_
    assert "floss" in wc.words_
    assert "flosss" in wc.words_
    # not normalizing means that the one with just one s is kept
    wc = TextCloud(stopwords=[], normalize_plurals=False).generate(text)
    assert "flo" in wc.words_
    assert "flos" in wc.words_
    assert "floss" in wc.words_
    assert "flosss" in wc.words_
Пример #12
0
def test_random_state():
    # check that random state makes everything deterministic
    wc = TextCloud(random_state=0)
    wc2 = TextCloud(random_state=0)
    wc.generate(THIS)
    wc2.generate(THIS)
    assert_array_equal(wc, wc2)
Пример #13
0
def test_repeat():
    short_text = "Some short text"
    wc = TextCloud(stopwords=[]).generate(short_text)
    assert len(wc.layout_) == 3
    wc = TextCloud(max_words=50, stopwords=[], repeat=True).generate(short_text)
    # multiple of word count larger than max_words
    assert len(wc.layout_) == 51
    # relative scaling doesn't work well with repeat
    assert wc.relative_scaling == 0
    # all frequencies are 1
    assert len(wc.words_) == 3
    assert_array_equal(list(wc.words_.values()), 1)
    frequencies = [w[0][1] for w in wc.layout_]
    assert_array_equal(frequencies, 1)
    repetition_text = "Some short text with text"
    wc = TextCloud(max_words=52, stopwords=[], repeat=True)
    wc.generate(repetition_text)
    assert len(wc.words_) == 4
    # normalized frequencies
    assert wc.words_['text'] == 1
    assert wc.words_['with'] == .5
    assert len(wc.layout_), wc.max_words
    frequencies = [w[0][1] for w in wc.layout_]
    # check that frequencies are sorted
    assert np.all(np.diff(frequencies) <= 0)
Пример #14
0
def test_empty_text():
    # test originally empty text raises an exception
    wc = TextCloud(stopwords=[])
    with pytest.raises(ValueError):
        wc.generate('')

    # test empty-after-filtering text raises an exception
    wc = TextCloud(stopwords=['a', 'b'])
    with pytest.raises(ValueError):
        wc.generate('a b a')
Пример #15
0
def test_collocations():
    wc = TextCloud(collocations=False, stopwords=set())
    wc.generate(THIS)

    wc2 = TextCloud(collocations=True, stopwords=set())
    wc2.generate(THIS)

    assert "is better" in wc2.words_
    assert "is better" not in wc.words_
    assert "way may" not in wc2.words_
Пример #16
0
def test_plurals_numbers():
    text = THIS + "\n" + "1 idea 2 ideas three ideas although many Ideas"
    wc = TextCloud(stopwords=[]).generate(text)
    # not capitalized usually
    assert "Ideas" not in wc.words_
    # plural removed
    assert "ideas" not in wc.words_
    # usually capitalized
    assert "although" not in wc.words_
    assert "idea" in wc.words_
    assert "Although" in wc.words_
    assert "better than" in wc.words_
Пример #17
0
def test_unicode_stopwords():
    wc_unicode = TextCloud(stopwords=[u'Beautiful'])
    try:
        words_unicode = wc_unicode.process_text(unicode(THIS))
    except NameError:  # PY3
        words_unicode = wc_unicode.process_text(THIS)

    wc_str = TextCloud(stopwords=['Beautiful'])
    words_str = wc_str.process_text(str(THIS))

    assert words_unicode == words_str
Пример #18
0
def test_writing_to_file(tmpdir):
    wc = TextCloud()
    wc.generate(THIS)

    # check writing to file
    filename = str(tmpdir.join("word_cloud.png"))
    wc.to_file(filename)
    loaded_image = Image.open(filename)
    assert loaded_image.size == (wc.width, wc.height)
Пример #19
0
def test_process_text_default_patterns():
    wc = TextCloud(stopwords=set(), include_numbers=True, min_word_length=2)
    words = wc.process_text(THIS)

    wc2 = TextCloud(stopwords=set(), include_numbers=True, min_word_length=1)
    words2 = wc2.process_text(THIS)

    assert "a" not in words
    assert "3" not in words

    assert "a" in words2
    assert "3" in words2
Пример #20
0
def test_check_errors():
    wc = TextCloud()
    with pytest.raises(NotImplementedError):
        wc.to_html()

    try:
        np.array(wc)
        raise AssertionError("np.array(wc) didn't raise")
    except ValueError as e:
        assert "call generate" in str(e)

    try:
        wc.recolor()
        raise AssertionError("wc.recolor didn't raise")
    except ValueError as e:
        assert "call generate" in str(e)
Пример #21
0
def gen_cloud():

    #Load teammember details
    a_file = open(STRENGTHPATH+TEAMMEMBERNAME+'.txt', "r")
    list_of_sentences = [(line.strip()) for line in a_file]
    a_file.close()

    #Generate Strength Sentence Frequency List
    strength_frequency = {}
    count = 30
    iter_num = 0
    for sentence in list_of_sentences:
        iter_num+=1
        strength_frequency[sentence] = count
        count = 22 if iter_num == 1 else 12 if iter_num ==2 else 7 if iter_num ==3 else 4 if iter_num == 4 else 7-iter_num
        if count < 2:
            count=1


    #Select Cloud Shape (mask)
    #sentences = len(list_of_sentences)
    #maskname = 'jetfighter.jpg' if sentences > 25 else 'diamond.jpg' if sentences > 20 else 'oval.jpg' if sentences > 15 else 'diamond.jpg'
    custom_mask = np.array(Image.open(CLOUDMASK))

    #Generate Cloud
    textcloud = TextCloud(width=1000, height=1000,
                          background_color="rgba(255, 255, 255, 0)",
                          mode = 'RGBA',
                          min_font_size=20,
                          max_font_size=400,
                          font_path= FONT,
                          repeat = True,
                          colormap='Set3',
                          # color_func=partial(palette_color_func, palette=5)
                          mask=custom_mask).generate(strength_frequency)

    #save to file
    strength_cloud=np.array(textcloud)
    cv2.imwrite(OFOLDERPATH+TEAMMEMBERNAME+'_cloud.png', strength_cloud)
Пример #22
0
def test_default():
    # test that default word cloud creation and conversions work
    wc = TextCloud(max_words=50)
    wc.generate(THIS)

    # check for proper word extraction
    assert len(wc.words_) == wc.max_words

    # check that we got enough words
    assert len(wc.layout_) == wc.max_words

    # check image export
    wc_image = wc.to_image()
    assert wc_image.size == (wc.width, wc.height)

    # check that numpy conversion works
    wc_array = np.array(wc)
    assert_array_equal(wc_array, wc.to_array())

    # check size
    assert wc_array.shape == (wc.height, wc.width, 3)
Пример #23
0
# subsample by factor of 3. Very lossy but for a wordcloud we don't really care.
parrot_color = parrot_color[::3, ::3]

# create mask  white is "masked out"
parrot_mask = parrot_color.copy()
parrot_mask[parrot_mask.sum(axis=2) == 0] = 255

# some finesse: we enforce boundaries between colors so they get less washed out.
# For that we do some edge detection in the image
edges = np.mean([gaussian_gradient_magnitude(parrot_color[:, :, i] / 255., 2) for i in range(3)], axis=0)
parrot_mask[edges > .08] = 255

# create wordcloud. A bit sluggish, you can subsample more strongly for quicker rendering
# relative_scaling=0 means the frequencies in the data are reflected less
# acurately but it makes a better picture
wc = TextCloud(max_words=2000, mask=parrot_mask, max_font_size=40, random_state=42, relative_scaling=0)

# generate word cloud
wc.generate(text)
plt.imshow(wc)

# create coloring from image
image_colors = ImageColorGenerator(parrot_color)
wc.recolor(color_func=image_colors)
plt.figure(figsize=(10, 10))
plt.imshow(wc, interpolation="bilinear")
wc.to_file("parrot_new.png")

plt.figure(figsize=(10, 10))
plt.title("Original Image")
plt.imshow(parrot_color)
Пример #24
0
def test_mask_contour():
    # test mask contour is created, learn more at:
    # https://github.com/amueller/word_cloud/pull/348#issuecomment-370883873
    mask = np.zeros((234, 456), dtype=np.int)
    mask[100:150, 300:400] = 255

    sm = TextCloud(mask=mask, contour_width=1, contour_color='blue')
    sm.generate(THIS)
    sm_array = np.array(sm)
    sm_total = sm_array[100:150, 300:400].sum()

    lg = TextCloud(mask=mask, contour_width=20, contour_color='blue')
    lg.generate(THIS)
    lg_array = np.array(lg)
    lg_total = lg_array[100:150, 300:400].sum()

    sc = TextCloud(mask=mask, contour_width=1, scale=2, contour_color='blue')
    sc.generate(THIS)
    sc_array = np.array(sc)
    sc_total = sc_array[100:150, 300:400].sum()

    # test `contour_width`
    assert lg_total > sm_total

    # test contour varies with `scale`
    assert sc_total > sm_total

    # test `contour_color`
    assert all(sm_array[100, 300] == [0, 0, 255])
Пример #25
0
def test_relative_scaling_zero():
    # non-regression test for non-integer font size
    wc = TextCloud(relative_scaling=0)
    wc.generate(THIS)
Пример #26
0
def test_process_text_regexp_parameter():
    # test that word processing is influenced by `regexp`
    wc = TextCloud(max_words=50, regexp=r'\w{5}')
    words = wc.process_text(THIS)

    assert 'than' not in words
Пример #27
0
def test_include_numbers():
    wc_numbers = TextCloud(include_numbers=True)
    wc = wc_numbers.process_text(THIS)

    assert '14' in wc.keys()
Пример #28
0
def test_min_word_length():
    wc_numbers = TextCloud(min_word_length=5)
    wc = wc_numbers.process_text(THIS)
    word_lengths = [len(word) for word in wc.keys()]

    assert min(word_lengths) == 5
Пример #29
0
def test_recolor_too_small_set_default():
    # check no exception is raised when default colour is used
    colouring = np.array(Image.new('RGB', size=(20, 20)))
    wc = TextCloud(max_words=50, width=30, height=30, min_font_size=1).generate(THIS)
    image_colors = ImageColorGenerator(colouring, default_color=(0, 0, 0))
    wc.recolor(color_func=image_colors)
Пример #30
0
def test_small_canvas():
    # check font size fallback works on small canvas
    wc = TextCloud(max_words=50, width=21, height=21)
    wc.generate(SMALL_CANVAS)
    assert len(wc.layout_) > 0
Пример #31
0
def test_tiny_canvas():
    # check exception if canvas too small for fallback
    w = TextCloud(max_words=50, width=1, height=1)
    with pytest.raises(ValueError, match="Couldn't find space to draw"):
        w.generate(THIS)
    assert len(w.layout_) == 0