示例#1
0
def test_analyzer():
    """
    Analyzer tests
    """
    language_data = text_analyzer.language_data_loader(GRAMMAR_PATH,
                                                       COUNTER_GRAMMAR_PATH,
                                                       START_WORDS_PATH,
                                                       STOP_WORDS_PATH)
    message = "This is a new medicine for hyperthyroidism"
    #  \
    #     "and helps fight against diabetes, cancer + heart disease. " \
    #     "https://t.co/sw6mvsslg"
    analysis = text_analyzer.analyzer(message, language_data['start_words'],
                                      language_data['grammar'],
                                      language_data['counter_grammar'],
                                      language_data['stop_words'],
                                      language_data['magic_bullet_grammar'])
    assert analysis[1] == 'hyperthyroidism'
    assert analysis[0] == 'a new medicine'
    message = "some unrelated message that only talks about watching tv"
    analysis = text_analyzer.analyzer(message, language_data['start_words'],
                                      language_data['grammar'],
                                      language_data['counter_grammar'],
                                      language_data['stop_words'],
                                      language_data['magic_bullet_grammar'])
    assert analysis[0] == '<nothing_found>'
    message = "#hyperthyroidism for hyperthyroidism"
    analysis = text_analyzer.analyzer(message, language_data['start_words'],
                                      language_data['grammar'],
                                      language_data['counter_grammar'],
                                      language_data['stop_words'],
                                      language_data['magic_bullet_grammar'])
    assert analysis[0] == '<nothing_found>'
    assert analysis[1] == 'hyperthyroidism'
示例#2
0
def test_counter_analyzer():
    """
    Counter analyzer tests
    """
    language_data = text_analyzer.language_data_loader(GRAMMAR_PATH,
                                                       COUNTER_GRAMMAR_PATH,
                                                       START_WORDS_PATH,
                                                       STOP_WORDS_PATH)
    message = "A new medicine for obesity"
    analysis = text_analyzer.counter_analyzer(message, 'obesity',
                                              language_data['counter_grammar'])
    assert analysis is False
示例#3
0
def test_language_data_loader():
    """
    Language data loader tests
    """
    language_data = text_analyzer.language_data_loader(GRAMMAR_PATH,
                                                       COUNTER_GRAMMAR_PATH,
                                                       START_WORDS_PATH,
                                                       STOP_WORDS_PATH)
    assert 'eczema' in language_data['start_words'].keys()
    assert '^@\\w+$' in language_data['stop_words']
    assert '[s] \\w+ed to (healthier|better)( \\S+){0,7} [p]' in language_data[
        'grammar']
    assert 'chances for ( \S+){0,5} [p]' in language_data['counter_grammar']
    assert r'[s] effective( \w+){0,2} (in|for|to)( \w+){0,5} [p]' in language_data[
        'grammar']
示例#4
0
from datetime import datetime
from analyzer.engines import user_analyzer
from analyzer.engines import text_analyzer
## Initialization ##

# User analysis
DICTIONARY = user_analyzer.dictionary_parser(
    './language_data/user_dictionary.txt')
LEXICON = user_analyzer.lexicon_generator('./language_data/user_grammar.txt',
                                          DICTIONARY)
STRING_TWITTER_QUERIES = user_analyzer.string_twitter_queriesParser(
    './language_data/string_twitter_queries.txt')

# Text analysis
LANGUAGE_DATA = text_analyzer.language_data_loader(
    './language_data/grammar.txt', './language_data/counter_grammar.txt',
    './language_data/start_words.txt', './language_data/stop_words.txt')


def nlp_analysis(job_json):
    """
    It takes a job as an input and returns an analysis.
    """
    analysis = dict()
    # Get 'profile' and 'health_related'
    user_analysis = user_analyzer.user_analyzer(job_json['user_name'],
                                                job_json['user_description'],
                                                STRING_TWITTER_QUERIES,
                                                LEXICON)

    analysis['profile'] = user_analysis[1]