def test_get_sentences_fail():
    """
	Test that get_sentences fails
	when passed None. 
	"""

    tokenize(None)
def test_get_sentences_fail(): 
	"""
	Test that get_sentences fails
	when passed None. 
	"""

	tokenize(None)
Пример #3
0
def extract_aspects(reviews):
    """
	INPUT: iterable of strings (pd Series, list)
	OUTPUT: list of aspects
	
	Return the aspects from the set of reviews
	"""

    # import the aspect extraction functions
    from opinion_mining.extract_aspects import get_sentences, tokenize, pos_tag, aspects_from_tagged_sents

    # put all the sentences in all reviews in one stream
    #sentences = []
    #for review in reviews:
    #	sentences.extend(get_sentences(review))

    tokenized_sentences = [
        tokenize(sentence)
        for sentence in [sentences for sentences in get_sentences(reviews)]
    ]

    # tokenize each sentence
    #tokenized_sentences = [tokenize(sentence) for sentence in sentences]

    # pos tag each sentence
    tagged_sentences = [pos_tag(sentence) for sentence in tokenized_sentences]

    # from the pos tagged sentences, get a list of aspects
    aspects = aspects_from_tagged_sents(tagged_sentences)

    return aspects
def test_tokenize():
    """"
	Tests to make sure that the tokenizer works
	"""
    sentence = "This is a test SENTENCE"
    tokens = tokenize(sentence)

    for tok in tokens:
        assert tok.islower()

    assert len(tokens) == 5
    assert isinstance(tokens, list)
def test_tokenize():
	""""
	Tests to make sure that the tokenizer works
	"""
	sentence = "This is a test SENTENCE"
	tokens = tokenize(sentence)

	for tok in tokens:
		assert tok.islower()

	assert len(tokens) == 5
	assert isinstance(tokens, list)
def get_sentences_by_aspect(aspect, reviews):
    """
	INPUT: string (aspect), iterable of strings (full reviews)
	OUTPUT: iterable of strings

	Given an aspect and a list of reviews, return a list 
	sof all sentences that mention that aspect.  
	"""

    # THIS CODE IS TOTALLY COPIED FROM MAIN FILE function 'extract_aspects'
    # TODO: REFACTOR THIS IN AN INTELLIGENT WAY.

    from opinion_mining.extract_aspects import get_sentences, tokenize, pos_tag, aspects_from_tagged_sents

    # get
    '''
	sentences = []
	if type(reviews)==str:
		sentences.append(reviews)
	else:
		for review in reviews: 
			sentences.extend(get_sentences(review))
	'''
    sentences = get_sentences(reviews)

    # tokenize each sentence

    tokenized_sentences = [tokenize(sentence) for sentence in sentences]

    sent_set = []
    for i in aspect:
        for j in tokenized_sentences:
            if i in j:
                if i not in sent_set:
                    sent_set.append(j)

    return sent_set