示例#1
0
def test_init_2():
    """
    Test initialization of the class NLPFrame
    """

    initial_df = nlp.NLPFrame({'text_col' : ['Today is a beautiful Monday and I would love getting a coffee. However, startbucks is closed.','It has been an amazing day today!']}, index = [0,1], column = 'non_existing')
    assert initial_df.column == 'text_col'
示例#2
0
def test_init_3():
    """
    Test initialization of the class NLPFrame that doesn't have text column
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    assert initial_df.column == None
示例#3
0
def test_positive_sentence_stopwords():
    """
    Test function to ensure that the `sentence_stopwords` function is properly outputting a positive count.
    """
    initial_df = nlp.NLPFrame({'text_col': ['The coffee is good', 'I would like to have some']}, index = [0,1])
    res = initial_df.sentence_stopwords()

    assert ((res.iloc[0,0] >= 0) & (res.iloc[0,1] >= 0)) , 'Count should be positive'
示例#4
0
def test_polarity_count():
    """
    Test function to ensure that the `polarity` function returns correct count values.
    """
    initial_df = nlp.NLPFrame({'text_col': ['sam is living a great life','he treats them so well']}, index = [0,1])
    res = initial_df.polarity()

    assert ((res.iloc[0,0] >= 0) & (res.iloc[0,1] >= 0)) , 'Count should be positive'
示例#5
0
def test_get_nlp_summary_2():
    """
    Tests get_nlp_summary function for NLPFrame with wrong column specified
    """

    initial_df = nlp.NLPFrame({'text_col' : ['Today is a beautiful Monday and I would love getting a coffee. However, startbucks is closed.','It has been an amazing day today!']}, index = [0,1], column = 'non_existing')
    res = initial_df.get_nlp_summary(column = 'non_existing')
    assert res.equals(pd.DataFrame())
示例#6
0
def test_polarity_input():
    """
    Test function to ensure that the `polarity` function properly classifies between "good" and "bad" text.
    """
    initial_df = nlp.NLPFrame({'text_col': ['he is a good guy','his behaviour is bad']}, index = [0,1])
    res = initial_df.polarity()
    b = ['positive_words', 'negative_words']
    a = list(res.columns)

    assert a==b, 'Default columns should be positive, negative'
示例#7
0
def test_df_sentence_stopwords():
    """
    Test function to ensure that the input data is in string format.
    """
    test_df = nlp.NLPFrame({'text_col' : ['Today is a great day \
                                and I wish I spent more time outside. I decided to go outside.\
                                English!']})
    input_data = test_df['text_col'][0]

    assert type(input_data) == str, 'The inputs are not strings'
示例#8
0
def test_invalid_pos_input_1():
    """
    Test function to ensure that the `get_part_of_speech` function handles the input of multilingual values in pd Series.
    """
    initial_df = nlp.NLPFrame({'text_col': ['彼は新しい仕事に本当に満足している','It has been an amazing day today!']})
    try:
        res = initial_df.get_part_of_speech(show_only=5)
        assert False, 'The function should not be executed when the show only input is not an iterable object!!'
    except TypeError:
        pass
示例#9
0
def test_pos_output_columns():
    """
    Test function to ensure that the `get_part_of_speech` function outputs the POS that are given as input
    """

    initial_df = nlp.NLPFrame({'text_col': ['彼は新しい仕事に本当に満足している','It has been an amazing day today!']}, index = [0,1])
    res = initial_df.get_part_of_speech(show_only=False)
    a = len(res.columns)

    assert a>0, 'The function should return an output even for mixture of English and non-English sentences!'
示例#10
0
def test_pos_chinese_input():
    """
    Test function to ensure that a Chinese string of text is understood by the `get_part_of_speech` function.
    """
    initial_df = nlp.NLPFrame({'text_col': ['彼は新しい仕事に本当に満足している']}, index = [0])
    res = initial_df.get_part_of_speech()
    b = ['adjective', 'noun', 'verb']
    a = list(res.columns)

    assert a==b, 'The function should return an output even for non-English text'
示例#11
0
def test_pos_mixture_input():
    """
    Test function to ensure that the `get_part_of_speech` function can read different languages of strings.
    """
    initial_df = nlp.NLPFrame({'text_col': ['彼は新しい仕事に本当に満足している','It has been an amazing day today!']}, index = [0,1])
    res = initial_df.get_part_of_speech()
    b = ['adjective', 'noun', 'verb']
    a = list(res.columns)

    assert a==b, 'The function should return an output even for mixture of English and non-English sentences!'
示例#12
0
def test_pos_output_columns_3():
    """
    Test function to ensure that the `get_part_of_speech` function error handling
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    try:
        initial_df.get_part_of_speech()
        assert False
    except ValueError:
        pass
示例#13
0
def test_language_2():
    """
    Tests detect_language function for NLPFrame with wrong column specified
    """

    initial_df = nlp.NLPFrame({'text_col' : ['Today is a beautiful Monday and I would love getting a coffee. However, startbucks is closed.','It has been an amazing day today!']}, index = [0,1], column = 'non_existing')
    try:
        initial_df.detect_language(column = 'non_existing')
        assert False
    except ValueError:
        pass
示例#14
0
def test_get_nlp_summary_1():
    """
    Tests get_nlp_summary function for NLPFrame without column with text
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    try:
        initial_df.get_nlp_summary()
        assert False
    except ValueError:
        pass
示例#15
0
def test_language_1():
    """
    Tests detect_language function for NLPFrame without column with text
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    try:
        initial_df.detect_language()
        assert False
    except ValueError:
        pass
示例#16
0
def test_pos_output_columns_2():
    """
    Test function to ensure that the `get_part_of_speech` function error handling
    """

    initial_df = nlp.NLPFrame({'text_col': ['彼は新しい仕事に本当に満足している','It has been an amazing day today!']}, index = [0,1])
    try:
        res = initial_df.get_part_of_speech(show_only=5)
        assert False, 'The function should throw an error when show_only argument is not valid'
    except TypeError:
        pass
示例#17
0
def test_sentence_stopwords_1():
    """
    Tests sentence_stopwords function for NLPFrame without column with text
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    try:
        initial_df.sentence_stopwords()
        assert False
    except ValueError:
        pass
示例#18
0
def test_polarity_1():
    """
    Tests polarity function for NLPFrame without column with text
    """

    initial_df = nlp.NLPFrame({'text_col' : [5,6,8]})
    try:
        initial_df.polarity()
        assert False
    except ValueError:
        pass
示例#19
0
def test_pos_english_input():
    """
    Test function to ensure that an English string is understood by the `get_part_of_speech` function.
    """
    true_results = pd.DataFrame({'adjective': 0.0741, 'noun': 0.2222, 'verb': 0.2963},index = [0])
    initial_df = nlp.NLPFrame({'text_col' : ['Today is a beautiful Monday and I would love getting a coffee. However, startbucks is closed.','It has been an amazing day today!']}, index = [0,1])

    b = ['adjective', 'noun', 'verb']
    res = initial_df.get_part_of_speech()
    a = list(res.columns)

    assert a==b , 'Default columns should be adjective, noun and verb'
示例#20
0
def test_language_works():
    """
    Test function to ensure that the `detect_language` function is properly returning correct languages.
    """
    test_df = nlp.NLPFrame({'english_text' : ['I am a happy person'],
                        'mandarin_text': ['戰國策的版本存在非常多的錯誤'],
                        'spanish_text': ['Hola mi nombre es Bill y me gustan los gatos']
                       })

    assert test_df.detect_language(column = 'english_text' )['language'][0] == 'English', 'Incorrect Language'
    assert test_df.detect_language(column = 'mandarin_text')['language'][0] == 'Chinese', 'Incorrect Language'
    assert test_df.detect_language(column = 'spanish_text' )['language'][0] == 'Spanish', 'Incorrect Language'