Python ConllExtractor примеры, textblob.np_extractors.ConllExtractor Python примеры использования

Пример #1

0

Показать файл

class TestConllExtractor(unittest.TestCase):

    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"

    @attr('slow')
    def test_extract(self):
        noun_phrases = self.extractor.extract(self.text)
        assert_true("Python" in noun_phrases)
        assert_true("design philosophy" in noun_phrases)
        assert_true("code readability" in noun_phrases)

    @attr('slow')
    def test_parse_sentence(self):
        parsed = self.extractor._parse_sentence(self.sentence)
        assert_true(isinstance(parsed, nltk.tree.Tree))

    @attr('slow')
    def test_filter_insignificant(self):
        chunk = self.extractor._parse_sentence(self.sentence)
        tags = [tag for word, tag in chunk.leaves()]
        assert_true('DT' in tags)
        filtered = filter_insignificant(chunk.leaves())
        tags = [tag for word, tag in filtered]
        assert_true("DT" not in tags)

Пример #2

0

Показать файл

    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"

Пример #3

0

Показать файл

Файл: app.py Проект: Sahrul11/twisent

def main():

    st.title("Twitter sentiment analysis‎")
    st.subheader("Discover the positive and negative opinions about a product")
    st.markdown("""
    	#### Sentiment analysis is the automated process of analyzing text data and sorting it into sentiments positive, negative, or neutral.
    	""")
    st.subheader("Search  by topic")
    message = st.text_area("Enter Text", "Type Here ..")
    if st.button("Analyze"):
        new_tweets = api.search(q=message)
        for tweet in new_tweets:
            analysis = TextBlob(tweet.text,
                                analyzer=NaiveBayesAnalyzer(),
                                np_extractor=ConllExtractor())
            polarity = 'Positive'
            if (analysis.sentiment.p_pos < 0.50):
                polarity = 'Negative'
            st.subheader("Sentiment Analysis and Topic of Interest")
            st.write(tweet.text)
            st.write(polarity)
            st.write("Confidence :  Positive score: ",
                     analysis.sentiment.p_pos * 100, "  Negative score: ",
                     analysis.sentiment.p_neg * 100)
            st.write("Areas of interest: ", analysis.noun_phrases)
            st.subheader(
                "---------------------------------------------------------------------------"
            )

    st.subheader('Enter a Twitter Username to search tweets for: ')
    messageID = st.text_area("Enter a ID here", "...")
    if st.button("Process"):
        new_tweetsID = api.user_timeline(screen_name=messageID, count=20)
        for tweet in new_tweetsID:
            analysis = TextBlob(tweet.text,
                                analyzer=NaiveBayesAnalyzer(),
                                np_extractor=ConllExtractor())
            polarity = 'Positive'
            if (analysis.sentiment.p_pos < 0.50):
                polarity = 'Negative'
            st.subheader("Sentiment Analysis and Topic of Interest")
            st.write("Tweet : ", tweet.text)
            st.write("Sentiment:", polarity)
            st.write("Confidence :  Positive score: ",
                     analysis.sentiment.p_pos * 100, "  Negative score: ",
                     analysis.sentiment.p_neg * 100)
            st.write("Areas of interest: ", analysis.noun_phrases)
            st.subheader(
                "---------------------------------------------------------------------------"
            )
    st.sidebar.subheader("About App")
    st.sidebar.text("TSA with Streamlit")
    st.sidebar.info("Cudos to the Streamlit Team")
    st.sidebar.subheader("Developed By")
    st.sidebar.text("Sahrul ALom Choudhari")

Пример #4

0

Показать файл

def noun_phrases(query):
	# noun-phrase chunking
	# extractor = FastNPExtractor()
	extractor = ConllExtractor()
	blob = TextBlob(query, np_extractor=extractor)
	noun_phrases = blob.noun_phrases
	return noun_phrases

Пример #5

0

Показать файл

Файл: noun-phrase-detector.py Проект: genandlam/product-review-data-analysis

def main():
    # FILENAME = "CellPhoneReview-1000.json"
    # print('Reading data...')
    # review_data = open(FILENAME).readlines()
    # document = [json.loads(d)['reviewText'] for d in review_data][0]
    document = "These are awesome and make my phone look so stylish! I have only used one so far and have had it on for almost a year! CAN YOU BELIEVE THAT! ONE YEAR!! Great quality!"
    print(document)
    nltk_tagger = NLTKTagger()
    extractor = ConllExtractor()
    blob = TextBlob(document, pos_tagger=nltk_tagger, np_extractor=extractor)
    print(blob.tags)
    print(blob.noun_phrases)

    pattern_tagger = PatternTagger()
    blob2 = TextBlob(document, pos_tagger=pattern_tagger, np_extractor=extractor)
    print(blob2.tags)
    print(blob2.noun_phrases)

    tagged = nltk.pos_tag(tokenize(document.lower()))
    print(tagged)
    grammar = ('''
            NP: {<DT>?(<RB.?>*<VB.?>*<NNPS?>+<NNS?>+ | <JJ>*<NNS?>+)} # NP
            ''')

    chunkParser = nltk.RegexpParser(grammar)
    tree = chunkParser.parse(tagged)
    noun_phrases = []
    for subtree in tree.subtrees():
        if subtree.label() == 'NP':
            noun_phrase = ' '.join([elem[0] for elem in subtree])
            noun_phrases.append(noun_phrase)

    print(noun_phrases)

Пример #6

0

Показать файл

def get_textblob_entities(text):
    extractor = ConllExtractor()
    blob = TextBlob(text, np_extractor=extractor)
    entities = []
    for entity in blob.noun_phrases:
        entities.append({'text': entity.strip()})
    return (entities)

Пример #7

0

Показать файл

Файл: absa_blob.py Проект: wsgan001/NLP-1

def Preprocess(Reviews):
    extractor = ConllExtractor()
    text = TextBlob(Reviews, np_extractor=extractor)
    pos = []
    sents = [sent.lower().correct() for sent in text.sentences]
    #singularize()
    #lemmatize()
    for sent in sents:
        pos.append(sent.tags)
    return text.sentences, pos, text.noun_phrases

Пример #8

0

Показать файл

 def test_overrides(self):
     b = tb.Blobber(tokenizer=SentenceTokenizer(),
                     np_extractor=ConllExtractor())
     blob = b("How now? Brown cow?")
     assert_true(isinstance(blob.tokenizer, SentenceTokenizer))
     assert_equal(blob.tokens, tb.WordList(["How now?", "Brown cow?"]))
     blob2 = b("Another blob")
     # blobs have the same tokenizer
     assert_true(blob.tokenizer is blob2.tokenizer)
     # but aren't the same object
     assert_not_equal(blob, blob2)

Пример #9

0

Показать файл

Файл: LinguisticPreprocessor.py Проект: hayeong922/SPTR

 def get_NP_extractor(self):
     """
     return text blob NP extractor
     """
     #from textblob import TextBlob
     from textblob.np_extractors import ConllExtractor
     
     from textblob import Blobber
     #extractor = ConllExtractor()
     extractor =ConllExtractor()
     tb = Blobber(np_extractor=extractor)
     return tb

Пример #10

0

Показать файл

def parse(string):
    extractor = ConllExtractor()
    #text = "open the browser"
    text = string
    # Commands that Dhwani can handle and their respective targets
    commands = [
        'open', 'close', 'play', 'start', 'pause', 'stop', 'increase',
        'increment', 'decrease', 'decrement', 'set', 'shutdown'
    ]
    actions = [
        'door', 'browser', 'song', 'music', 'player', 'brightness', 'volume'
    ]
    attributes = []

    blob = TextBlob(text)
    token_string = blob.words
    mood = blob.sentiment.polarity
    #print(mood)
    found = False

    e = 0.005  # To set range for the neutral mood.

    for index in range(len(token_string)):
        if token_string[index] in str(commands):
            found = True
            #print(token_string[index])
            #Command found and look for the target
            for ahead in range(index, len(token_string)):
                if token_string[ahead] in str(actions):
                    #print(token_string[ahead])
                    # Call the execution function for (command, action)
                    # Update the index with ahead + 1
                    print("Add the execute command function")
                    index = ahead + 1
                    if token_string[ahead] == 'brightness' or 'volume':
                        # If Brightness and volume, look for third attribute
                        pass
        elif index == len(token_string) and found == False:
            # Invalid input. Say, "Can you rephrase your words?"
            print("Can you please rephrase your words?")

    if -1 <= mood <= (0 - e):
        # Bad mood. Call function to cheer him/her up. Songs/jokes
        stt.say("Call the mood, cheer man!")
    if (0 + e) <= mood <= 1:
        # happy mood. Say, "looks like you are enjoying your day!"
        stt.say("Enjoy the party man!")
    if (0 - e) <= mood <= (0 + e):
        # Neutral sentiment output. Make a pun joke
        stt.say("Ohh cool")

Пример #11

0

Показать файл

Файл: twitter_mongo_sports.py Проект: rutupatel21/social-network-using-sentiment-analysis-of-tweets

 def on_status(self, data):
     #print(data.text)
     analysis = TextBlob(data.text,
                         analyzer=NaiveBayesAnalyzer(),
                         np_extractor=ConllExtractor())
     #print ("Confidence :  Positive score: " ,analysis.sentiment.p_pos*100, "  Negative score: ", analysis.sentiment.p_neg*100 )
     self.db.sports.insert({
         "name": data.user.name,
         "text": data.text,
         "created_at": data.created_at,
         "positive_score:": analysis.sentiment.p_pos * 100,
         "negative_score:": analysis.sentiment.p_neg * 100
     })
     return True

Пример #12

0

Показать файл

Файл: Testing.py Проект: GraphicalDot/MadMachinesNLP01

        def __init__(self, text):
                self.text = text
                self.conll_extractor = ConllExtractor()
                self.topia_extractor = extract.TermExtractor()
                
                ##Our custom tokenizer
                self.custom_sent_tokenizer = SentenceTokenizationOnRegexOnInterjections()
                self.tokenized_sentences = self.custom_sent_tokenizer.tokenize(self.text)
                
                ##This method will apply the sstemmers to the sentences
                self.stemming()

                print nltk.sent_tokenize(self.text)
                self.np_textblob()
                self.np_topia()

Пример #13

0

Показать файл

def parseContents(contentList):
    tupleList = []
    posTagger = OpenNLP("/home/rohith/nitk/apache-opennlp-1.6.0", "POSTagger", "en-pos-maxent.bin")
    chunker = OpenNLP("/home/rohith/nitk/apache-opennlp-1.6.0", "ChunkerME", "en-chunker.bin")
    for item in contentList:
        attr = item[0]
        content = item[1]
        
        content = content.replace('\n','')
        sentences = sent_tokenize(content)
        for sentence in sentences:
            print('#'+sentence, file=sys.stderr)
            extractor = ConllExtractor()
            np = TextBlob(sentence, np_extractor=extractor).noun_phrases
            yield attr, np.lemmatize()

Пример #14

0

Показать файл

Файл: noun_phrases.py Проект: GraphicalDot/MadMachinesNLP01

    def __init__(self,
                 list_of_sentences,
                 default_np_extractor=None,
                 regexp_grammer=None,
                 if_postagged=False):
        """
                Args:
                        list_of_sentences: A list of lists with each element is a list of sentences which is pos tagged
                        Example:
                                [[('I', 'PRP'), ('went', 'VBD'), ('there', 'RB'), ('for', 'IN'), ('phirni', 'NN')], [], [], ...]

                        default_np_extractor:
                                    if a list been passed then the noun phrases from various np_extractors will be appended
                                    if a string is passed, only the noun phrases from that np extractor will be appended
                                    Options
                                        regex_np_extractor
                                        regex_textblob_conll_np
                                        textblob_np_conll
                                        textblob_np_base

                """

        self.if_postagged = if_postagged
        self.noun_phrases = list()
        self.conll_extractor = ConllExtractor()
        self.topia_extractor = extract.TermExtractor()

        self.list_of_sentences = list_of_sentences
        self.np_extractor = (
            "textblob_np_conll",
            default_np_extractor)[default_np_extractor != None]
        if not regexp_grammer:
            self.regexp_grammer = r"CustomNounP:{<JJ|VB|FW|VBN>?<NN.*>*<NN.*>}"

        eval("self.{0}()".format(self.np_extractor))

        self.noun_phrases = {self.np_extractor: self.noun_phrases}

        return

Пример #15

0

Показать файл

blob.sentiment

# Tokenizer
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer()
blob = TextBlob("This is\ta rather tabby\tblob.", tokenizer=tokenizer)
blob.tokens

#This is an alternative way
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

# Noun phrase chunkers
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob("Python is a high-level programming language.", np_extractor=extractor)
blob.noun_phrases

# POS taggers
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger)
blob.pos_tags

# Parser
from textblob.parsers import PatternParser
blob = TextBlob("Parsing is fun.", parser=PatternParser())
blob.parse()

# TextBlob that share same model

Пример #16

0

Показать файл

Файл: npchunk_rake.py Проект: venkatarajasekhar/Internship

import sys
from textblob import TextBlob
from rake import RakeKeywordExtractor
from textblob.np_extractors import ConllExtractor
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import WordPunctTokenizer
from textblob.taggers import NLTKTagger
from stopwordList import getList
import codecs
## GLOBAL VARIABLES
top_fraction = 1
LEMMA_OBJ = WordNetLemmatizer()
tokenizer = WordPunctTokenizer()
nltk_tagger = NLTKTagger()
stopwords = getList()
COLL_OBJ = ConllExtractor()


def extractKeywords(phrase_list):
    RAKE_OBJ = RakeKeywordExtractor(set([]))
    word_scores = RAKE_OBJ._calculate_word_scores(phrase_list)
    phrase_scores = RAKE_OBJ._calculate_phrase_scores(phrase_list, word_scores)
    sorted_phrase_scores = sorted(phrase_scores.iteritems(),
                                  key=operator.itemgetter(1),
                                  reverse=True)
    n_phrases = len(sorted_phrase_scores)
    return [x[0] for x in sorted_phrase_scores[0:int(n_phrases)]]


def extractChunks(CONTENT):
    BLOB_OBJ = TextBlob(CONTENT,

Пример #17

0

Показать файл

auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

print('Choose an option (1 or 2): ')
print('1. Choose a topic to search tweets for. ')
print('2. Choose a Twitter Username to search tweets for. ')
input_data = input()

if input_data == '1':
    print('Enter a topic: ')
    topic_name = input()
    new_tweets = api.search(q=topic_name)
    for tweet in new_tweets:
        analysis = TextBlob(tweet.text,
                            analyzer=NaiveBayesAnalyzer(),
                            np_extractor=ConllExtractor())
        polarity = 'Positive'
        if (analysis.sentiment.p_pos < 0.50):
            polarity = 'Negative'
        print("Sentiment Analysis and Topic of Interest")
        print("Tweet : ", tweet.text)
        print("Sentiment:", polarity)
        print("Confidence :  Positive score: ", analysis.sentiment.p_pos * 100,
              "  Negative score: ", analysis.sentiment.p_neg * 100)
        print("Areas of interest: ", analysis.noun_phrases)
        print(
            "---------------------------------------------------------------------------"
        )

else:
    print('2. Enter a Twitter Username to search tweets for: ')

Пример #18

0

Показать файл

    def post(self):
        """
                """

        cprint(figlet_format("Now exceuting %s" % self.__class__.__name__,
                             font='mini'),
               attrs=['bold'])

        text = self.get_argument("text")
        link = self.get_argument("link")
        tokenizer = None

        conll_extractor = ConllExtractor()
        topia_extractor = extract.TermExtractor()

        if link:
            print "Link is present, so have to run goose to extract text"
            print link

        text = text.replace("\n", "")

        if not tokenizer:
            tokenizer = SentenceTokenizationOnRegexOnInterjections()
            result = tokenizer.tokenize(text)
        else:
            result = nltk.sent_tokenize(text)

        tags = TAG_CLASSIFIER_LIB.predict(result)
        sentiments = SENTI_CLASSIFIER_LIB_THREE_CATEGORIES.predict(result)

        def assign_proba(__list):
            return {
                "mixed": round(__list[0], 2),
                "negative": round(__list[1], 2),
                "neutral": round(__list[2], 2),
                "positive": round(__list[3], 2),
            }

        sentiment_probabilities = map(
            assign_proba,
            SENTI_CLASSIFIER_LIB_THREE_CATEGORIES.predict_proba(result))

        new_result = list()

        for sentence, tag, sentiment, probability in zip(
                result, tags, sentiments, sentiment_probabilities):
            try:
                subcategory = list(
                    eval('{0}_SB_TAG_CLASSIFIER_LIB.predict(["{1}"])'.format(
                        tag[0:4].upper(), sentence)))[0]
            except:
                subcategory = None

            if max(probability) < .7:
                polarity_result = "can't decide"
            else:
                polarity_result = "decided"

            file_name, dependencies, indexeddependencies = save_tree(sentence)

            if file_name:
                with open(file_name, "rb") as image_file:
                    encoded_string = base64.b64encode(image_file.read())
            else:
                encoded_string = None

            blob = TextBlob(sentence)
            tb_nps = list(blob.noun_phrases)

            blob = TextBlob(sentence, np_extractor=conll_extractor)
            tb_conll_nps = list(blob.noun_phrases)

            te_nps = [e[0] for e in topia_extractor(sentence)]

            print sentence, dependencies, "\n"
            new_result.append({
                "sentence": sentence,
                "encoded_string": encoded_string,
                "polarity": sentiment,
                "sentiment_probabilities": probability,
                "dependencies": dependencies,
                "indexeddependencies": indexeddependencies,
                "polarity_result": polarity_result,
                "noun_phrases": ["a", "b", "c"],
                "tag": tag,
                "tb_conll_nps": tb_conll_nps,
                "te_nps": te_nps,
                "subcategory": subcategory
            })

        self.write({
            "success": True,
            "error": False,
            "result": new_result,
        })
        self.finish()
        return

Пример #19

0

Показать файл

 def test_can_pass_np_extractor_to_constructor(self):
     e = ConllExtractor()
     blob = tb.TextBlob('Hello world!', np_extractor=e)
     assert_true(isinstance(blob.np_extractor, ConllExtractor))

Пример #20

0

Показать файл

 def test_can_use_different_np_extractors(self):
     e = ConllExtractor()
     text = "Python is a high-level scripting language."
     blob = tb.TextBlob(text)
     blob.np_extractor = e
     assert_true(isinstance(blob.np_extractor, ConllExtractor))

Пример #21

0

Показать файл

Файл: summary.py Проект: dkasinets/SpeechRec

def words(text):
    list_digit_words = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
        'nine'
    ]
    extractor = ConllExtractor()
    blob = TextBlob(text, np_extractor=extractor)
    noun_phrases = blob.noun_phrases
    #print(noun_phrases)
    print('The voicemail is from ', end='')
    for np in noun_phrases:
        npp = np.split(' ')
        if len(npp) <= 2:

            if np != 'phone number' and np != 'good day' and np != 'great day':
                if np not in list_digit_words:

                    #if len(np)<=3:
                    print(np.title() + ' ', end='')

    verbs = list()
    for word, tag in blob.tags:
        if tag == 'VB':
            verbs.append(word.lemmatize())
    #print(verbs)

    verbs_l = list()

    for i in verbs:
        i_l = i.lower()
        verbs_l.append(i_l)
    nouns = list()
    #print(verbs_l)
    if 'please' in verbs_l:
        next_word = verbs_l[verbs_l.index('please') + 1]
        print("Please" + ' ' + next_word + '\n', end='')
        if next_word == 'call':
            print(' regarding ', end='')

            for word, tag in blob.tags:
                if tag == 'NN':
                    nouns.append(word.lemmatize())
#print(nouns)
            num = len(nouns)
            for item in random.sample(nouns, num):
                word = Word(item)
                if (word != 'phone' and word != 'number' and word != 'name'):
                    #if (word!='number'):
                    print(word, end=' ')
    else:
        print('Please \n ', end='')
        for verb in verbs_l:
            print(verb, end=' ')
        for word, tag in blob.tags:
            if tag == 'NN':
                nouns.append(word.lemmatize())


#print(nouns)

#print("\nThe voicemail is about ", end='')
        num = len(nouns)
        for item in random.sample(nouns, num):
            word = Word(item)
            if (word != 'phone' and word != 'number' and word != 'name'):
                if word not in list_digit_words:
                    #if (word!='number'):
                    print(word, end=' ')

Пример #22

0

Показать файл

def extract_named_entities(user_input):
    """
    This method extracts 'named entities' from user input text.

    :param user_input: the user input text to extract named entities from
    :type user_input: string
    :return artists_tracks_albums
    :rtype list
    """
    # targets and extracts named entities by filtering extracted noun phrases based on the POS tags of the words they contain
    artists_tracks_albums = []

    search_query_conllextractor = TextBlob(
        user_input,
        np_extractor=ConllExtractor())  # using Conll noun phrase extractor
    for noun_phrase in search_query_conllextractor.noun_phrases:
        np = TextBlob(noun_phrase)
        for np_word, np_word_tag in np.tags:
            for search_query_word, search_query_word_tag in search_query_conllextractor.tags:
                if search_query_word.lower() == np_word.lower():
                    np_word_tag = search_query_word_tag  # gets original POS tag back as breaking down np for use as TextBlob object re-tags np words incorrectly
                    if np_word_tag != "NNP" and np_word_tag != "NNPS" and np_word != "&" and np_word != "+":  # covers things like R&B where '&' would be an np_word:
                        if search_query_word.lower(
                        ) in np.words:  # checks if word has already been removed e.g. in query "I like music. Give me rap music." - it prevents an error from trying to remove 'music' twice if its only in one extracted noun phrase
                            np.words.remove(search_query_word.lower()
                                            )  # np.words are lowercase

        if len(np.words) != 0:
            artists_tracks_albums.append(' '.join(np_word
                                                  for np_word in np.words))

    search_query_fastnpextractor = TextBlob(
        user_input)  # using FastNP noun phrase extractor - TextBlob's default
    for noun_phrase in search_query_fastnpextractor.noun_phrases:
        np = TextBlob(noun_phrase)
        for np_word, np_word_tag in np.tags:
            for search_query_word, search_query_word_tag in search_query_fastnpextractor.tags:
                if search_query_word.lower() == np_word.lower():
                    np_word_tag = search_query_word_tag
                    if np_word_tag != "NNP" and np_word_tag != "NNPS" and np_word != "&" and np_word != "+":
                        if search_query_word.lower() in np.words:
                            np.words.remove(search_query_word.lower())

        if np.words not in artists_tracks_albums and len(np.words) != 0:
            artists_tracks_albums.append(' '.join(np_word
                                                  for np_word in np.words))

    for item in artists_tracks_albums:
        if re.match(
                r"\b([a-z]) (?=[a-z]\b)", item
        ):  # matches words like 'r b' which should be 'r&b' - the '&' was getting removed for some reason in the join, although recognised as an np_word and therefore should have been joined
            item_index = artists_tracks_albums.index(item)
            artists_tracks_albums[item_index] = item.replace(" ", "&")

    artists_tracks_albums = list(
        dict.fromkeys(artists_tracks_albums))  # deletes any duplicates

    artists_tracks_albums = [item.lower() for item in artists_tracks_albums
                             ]  # makes all lowercase

    return artists_tracks_albums

Пример #23

0

Показать файл

from nltk.corpus import wordnet as wn

from textblob import TextBlob
from textblob_aptagger import PerceptronTagger
from textblob.np_extractors import ConllExtractor

from utils import flatten

STOPS = stopwords.words('english')
EN_US_DICT = enchant.Dict("en_US")
EN_GB_DICT = enchant.Dict("en_GB")
PORTER = PorterStemmer()
WN_LEMMATIZER = WordNetLemmatizer()
SENTENCE_DETECTOR = data.load('tokenizers/punkt/english.pickle')
TAGGER = PerceptronTagger()
EXTRACTOR = ConllExtractor()


def get_fast_blob(text):
    return TextBlob(text, pos_tagger=TAGGER, np_extractor=EXTRACTOR)


def get_blob(text):
    return TextBlob(text, np_extractor=EXTRACTOR)


def tokenize_string(doc):
    return word_tokenize(doc)


def sentence_tokenize(s):

Пример #24

0

Показать файл

Файл: messageobject.py Проект: sureshsarda/ChatBot

def get_noun_phrase(sentence):
    extractor = ConllExtractor()
    blob = TextBlob(sentence, np_extractor=extractor)
    print blob.noun_phrases

Пример #25

0

Показать файл

def tweets():

    connection = httplib.HTTPSConnection('parseapi.back4app.com', 443)
    params = urllib.urlencode({
        "where":
        json.dumps({"manualTwitterURL": {
            "$ne": ""
        }}),
        "include":
        "user",
        "keys":
        "manualTwitterURL,user.objectId"
    })
    connection.connect()
    connection.request(
        'GET', '/classes/Lead?%s' % params, '', {
            "X-Parse-Application-Id":
            "9LT6MCUSdT4mnzlNkG2pS8L51wvMWvugurQJnjwB",
            "X-Parse-REST-API-Key": "6gwEVURQBIkh9prcc3Bgy8tRiJTFYFbJJkQsB45w"
        })
    result = json.loads(connection.getresponse().read())

    twitterURL = []
    leadid = []
    userid = []
    oldest = {}
    for i in range(0, len(result['results'])):
        twitterURL.append(result['results'][i]['manualTwitterURL'])
        leadid.append(result['results'][i]['objectId'])
        userid.append(result['results'][i]['user']['objectId'])

    for i in range(0, len(twitterURL)):
        alltweets = []
        oldestid = json.load(open("lastTweetId.txt"))
        try:
            oldestid = oldest.get(twitterURL[i])
        except IndexError:
            oldestid = '0'

    if oldestid == '0':
        new_tweets = api.user_timeline(screen_name=twitterURL[i], count=20)
    else:
        new_tweets = api.user_timeline(screen_name=twitterURL[i],
                                       count=20,
                                       since_id=oldestid)

        alltweets.extend(new_tweets)

        for tweet in alltweets:
            analysis = TextBlob(tweet.text,
                                analyzer=NaiveBayesAnalyzer(),
                                np_extractor=ConllExtractor())

        if (analysis.sentiment.p_pos > 0.75):
            polarity = 'Positive'
        elif (analysis.sentiment.p_neg > 0.75):
            polarity = 'Negative'
        else:
            polarity = 'Neutral'

        oldest[twitterURL[i]] = tweet.id

        if (analysis.sentiment.p_pos > 0.70
                or analysis.sentiment.p_neg > 0.70):
            try:
                interestTopic = analysis.noun_phrases[0]
            except IndexError:
                interestTopic = 'null'
            connection = httplib.HTTPSConnection('parseapi.back4app.com', 443)
            connection.connect()
            connection.request(
                'POST', '/classes/Insight',
                json.dumps({
                    "user": {
                        "__type": "Pointer",
                        "className": "_User",
                        "objectId": userid[i]
                    },
                    "lead": {
                        "__type": "Pointer",
                        "className": "Lead",
                        "objectId": leadid[i]
                    },
                    "type": "topic",
                    "confidence": analysis.sentiment.p_pos * 100,
                    "tweet": tweet.text,
                    "insight": polarity,
                    "tweetId": tweet.id,
                    "interestTopic": interestTopic,
                    "description": "insight"
                }), {
                    "X-Parse-Application-Id":
                    "9LT6MCUSdT4mnzlNkG2pS8L51wvMWvugurQJnjwB",
                    "X-Parse-REST-API-Key":
                    "6gwEVURQBIkh9prcc3Bgy8tRiJTFYFbJJkQsB45w",
                    "Content-Type": "application/json"
                })
    json.dump(oldest, open("lastTweetId.txt", "w"))

    return ('Successfully added data to Insights!')

Пример #26

0

Показать файл

Файл: text_preprocessing.py Проект: luojiahai/cluster-cloud-backend

def extract_phrase(text):
    '''take a preprocessed tweet and return a dict of noun
       phrases and counts as value'''
    extractor = ConllExtractor()
    blob = TextBlob(text)
    return dict(blob.np_counts)

Python ConllExtractor примеры использования