Python analyze_tweetHeavy示例，Twokenize.emoticons.analyze_tweetHeavy Python示例

示例#1

0

显示文件

文件： ExtractServer.py 项目： INIGOS/JavaThrift

    def extract(self,text):
        emo_list=[]
        acr_list=[]

        for word in twokenize.tokenize(text):
            if word !=" ":
                word=word.strip()

                try:
                    score=self.emoticons[word]
                    emo=emoticons.analyze_tweetHeavy(word)
                    emo_list.append(emo)
                    self.answer['EMOTICONS']=emo_list
                except:

                    if "@" in word:
                        word="@user"
        text=text.lower()
        for word in twokenize.tokenize(text):
            if word!="":
                word=word.strip()
                
                try:
                    word=self.acronyms[word]
                    acr_list.append(word)
                    self.answer['EXPANDED ACRONYMS']=acr_list
                except:
                    if "@" in word:
                        word="@user"
        str1=str(self.answer)
        return str1

示例#2

0

显示文件

文件： final.py 项目： INIGOS/emoticons

    def Smileyoperation(self, text):
        emo_list = []

        for word in twokenize.tokenize(text):
            if word != " ":
                word = word.strip()

                try:
                    score = self.emoticons[word]
                    emo = emoticons.analyze_tweetHeavy(word)
                    emo_list.append(emo)
                    self.answer["SMILEYS"] = emo_list
                except:

                    if "@" in word:
                        word = "@user"
        return self.answer

示例#3

0

显示文件

文件： extraction.py 项目： INIGOS/ClientProject

    def Smileyoperation(self, text):
        emo_list = []

        for word in twokenize.tokenize(text):
            if word != " ":
                word = word.strip()

                try:
                    score = self.emoticons[word]
                    emo = emoticons.analyze_tweetHeavy(word)
                    emo_list.append(emo)
                    self.answer['SMILEYS'] = emo_list
                except:

                    if "@" in word:
                        word = "@user"
        return self.answer

示例#4

0

显示文件

文件： retry.py 项目： INIGOS/emoticons

    def process(self,text,stopwordsF = 0, stemmerF = 0, encode = 1):
        list1=[]
        list2=[]
        line = re.sub(twokenize.Url_RE," ", text)
        temp = line.replace("#" , " ").lower().split()
        temp = " ".join(temp)

        for word in twokenize.tokenize(temp):
            if word != " ":

                word = word.strip()
                flagNonDict = 0

                try:

                    if self.wordDict[word] == 1:

                        word =	word
                except:
                    flagNonDict = 1
                    try:
                        score = self.emoticons[word]
                        emo = emoticons.analyze_tweetHeavy(word)

                        list1.append(emo)

                        self.result['EMOTICONS'] = list1
                    except:
                        try:
                            #Normalize Acronyms
                            word = self.acronyms[word]

                            list2.append(word)

                            self.result['ACRONYMS'] = list2

                        except:


                                if "@" in word:
                                 word = "@user"
        return self.result

示例#5

0

显示文件

文件： ExServer.py 项目： INIGOS/JavaThrift

 def ExEmo(self,text):
     emo_list=[]
 
     for word in twokenize.tokenize(text):
         if word !=" ":
             word=word.strip()
 
             try:
                 score=self.emoticons[word]
                 emo=emoticons.analyze_tweetHeavy(word)
                 emo_list.append(word)
                 emo_list.append(emo)
                 #d=dict(itertools.izip_longest(*[iter(emo_list)] * 2, fillvalue=""))
                 self.answer['EMOTICONS']=emo_list
             except:
 
                 if "@" in word:
                     word="@user"
     str1=str(self.answer)
     return str1

示例#6

0

显示文件

文件： extraction.py 项目： INIGOS/RANDOM-WORKS

    def EmoOperation(self,text):
        emo_list=[]
        acr_list=[]

        for word in twokenize.tokenize(text):
            if word !=" ":
                word=word.strip()

                try:
                    score=self.emoticons[word]
                    emo=emoticons.analyze_tweetHeavy(word)

                    emo_list.append(word)
                    emo_list.append(emo)
                    d=dict(itertools.izip_longest(*[iter(emo_list)] * 2, fillvalue=""))
                    
                except:

                    if "@" in word:
                        word="@user"
	return d

示例#7

0

显示文件

文件： neat.py 项目： INIGOS/emoticons

    def process(self,text):
        list1=[]
        list2=[]


        for word in twokenize.tokenize(text):
            if word != " ":

                word = word.strip()


                try:

                    if self.wordDict[word] == 1:

                        word =	word
                except:

                    try:
                        score = self.emoticons[word]
                        emo = emoticons.analyze_tweetHeavy(word)

                        list1.append(emo)

                        self.result['EMOTICONS'] = list1
                    except:
                        try:
                            #Normalize Acronyms
                            word = self.acronyms[word]

                            list2.append(word)

                            self.result['ACRONYMS'] = list2

                        except:


                                if "@" in word:
                                 word = "@user"
        return self.result

示例#8

0

显示文件

文件： TextFilter.py 项目： billho/diskoveror-ta

	def process(self,text,stopwordsF = 0, stemmerF = 0, encode = 1):

		# remove URL
		line = re.sub(twokenize.Url_RE," ", text)

		# to strip of extra white spaces
		temp = line.replace("#" , " ").lower().split()
		temp = " ".join(temp)

		tempTweet = ""

		for word in twokenize.tokenize(temp):
			if word != " ":

				word = word.strip()
				flagNonDict = 0

				try:
					#### Check Dict and set flag
					if self.wordDict[word] == 1:
						# print(word)
						word =	word
				except:
					flagNonDict = 1
					try:
						score = self.emoticons[word]
						emo = emoticons.analyze_tweetHeavy(word)
						word = emo + "#("+ str(score) +")#"
						# print(word)
					except:
						try:
							#Normalize Acronyms
							word = self.acronyms[word]
						except:
							try:
								#Normalize Contractions
								word = self.contractions[word]
							except:
								#Normalize words (Spell)
								if flagNonDict == 1:
									if "@" in word:
										# remove user mentions
										word = "@user"
									else:
										corrected = self.spellCheck.correct(word)
										if corrected != "a":
											word = corrected
				try:
					tempTweet = " ".join([tempTweet,word.strip()])
					tempTweet = tempTweet.lower().strip()
				except:
					tempTweet = " ".join([tempTweet,word.strip().decode("iso-8859-1")])
					tempTweet = tempTweet.lower().strip()

		if stemmerF == 1 and stopwordsF == 1:
			tempTweet = " ".join(stemmer.stem(w) for w in tempTweet.split(" ") if w not in self.stop)
		elif stemmerF == 1:
			tempTweet = " ".join(stemmer.stem(w.strip()) for w in tempTweet.split(" "))
		elif stopwordsF == 1:
			tempTweet = " ".join(w for w in tempTweet.split(" ") if w.strip() not in self.stop)

		# print(tempTweet.encode("utf-8"))
		if encode == 0:
			return(tempTweet)
		return(tempTweet.encode("utf-8"))

示例#9

0

显示文件

文件： TextFilter.py 项目： serendio-labs/diskoveror-ta

    def process(self, text, stopwordsF=0, stemmerF=0, encode=1):

        # remove URL
        line = re.sub(twokenize.Url_RE, " ", text)

        # to strip of extra white spaces
        temp = line.replace("#", " ").lower().split()
        temp = " ".join(temp)

        tempTweet = ""

        for word in twokenize.tokenize(temp):
            if word != " ":

                word = word.strip()
                flagNonDict = 0

                try:
                    #### Check Dict and set flag
                    if self.wordDict[word] == 1:
                        # print(word)
                        word = word
                except:
                    flagNonDict = 1
                    try:
                        score = self.emoticons[word]
                        emo = emoticons.analyze_tweetHeavy(word)
                        word = emo + "#(" + str(score) + ")#"
                        # print(word)
                    except:
                        try:
                            #Normalize Acronyms
                            word = self.acronyms[word]
                        except:
                            try:
                                #Normalize Contractions
                                word = self.contractions[word]
                            except:
                                #Normalize words (Spell)
                                if flagNonDict == 1:
                                    if "@" in word:
                                        # remove user mentions
                                        word = "@user"
                                    else:
                                        corrected = self.spellCheck.correct(
                                            word)
                                        if corrected != "a":
                                            word = corrected
                try:
                    tempTweet = " ".join([tempTweet, word.strip()])
                    tempTweet = tempTweet.lower().strip()
                except:
                    tempTweet = " ".join(
                        [tempTweet,
                         word.strip().decode("iso-8859-1")])
                    tempTweet = tempTweet.lower().strip()

        if stemmerF == 1 and stopwordsF == 1:
            tempTweet = " ".join(
                stemmer.stem(w) for w in tempTweet.split(" ")
                if w not in self.stop)
        elif stemmerF == 1:
            tempTweet = " ".join(
                stemmer.stem(w.strip()) for w in tempTweet.split(" "))
        elif stopwordsF == 1:
            tempTweet = " ".join(w for w in tempTweet.split(" ")
                                 if w.strip() not in self.stop)

        # print(tempTweet.encode("utf-8"))
        if encode == 0:
            return (tempTweet)
        return (tempTweet.encode("utf-8"))