Python predictTweet示例

编程语言: Python

命名空间/包名称: interface

方法/功能: predictTweet

hotexamples.com的示例: 6

Python predictTweet - 已找到6个示例。这些是从开源项目中提取的最受好评的interface.predictTweet现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： analyze.py 项目： AHAAAAAAA/CSCI7000-BigData

def analyze(input, output):
    with open(input, 'r') as in_f, open(output, 'a') as out_f:
        for line in in_f:
            in_data = json.loads(line)
            sent = interface.predictTweet(in_data['text'])
            in_data['sentiment'] = sent['pos']
            json.dump(in_data, out_f)
            out_f.write('\n')

示例#2

显示文件

文件： analyze.py 项目： rishabkanwal/CSCI7000-BigData

def analyze(input, output):
    with open(input, 'r') as in_f, open(output, 'a') as out_f:
        for line in in_f:
            in_data = json.loads(line)
            sent = interface.predictTweet(in_data['text'])
            in_data['sentiment'] = sent['pos']
            json.dump(in_data, out_f)
            out_f.write('\n')

示例#3

显示文件

文件： batch_sentiment.py 项目： rishabkanwal/CSCI7000-BigData

def clean_str(str):
    import re
    str = str + " "
    str = re.sub("http[^ ]*[\\\]", "\\\\", str)  #Remove hyperlinks
    str = re.sub("http[^ ]* ", " ", str)  #Remove hyperlinks
    str = str.replace('\\n', ' ')
    arr = re.findall(
        r"\w+(?:[-']\w+)*|'|[:)-.(]+|\S\w*",
        str)  #Single punctuation mark is removed, smileys remain intact
    arr = [i for i in arr if len(i) > 1 and i[0] != '@'
           ]  #Remove words starting with @ (Twitter mentions)
    arr = [i if i[0] != '#' else i[1:] for i in arr]  #Remove '#' from hashtags
    #arr=[i for i in arr if i!='http' and i!='com' and i!='org']
    res = " ".join(arr)
    return res.lower().strip()


fp, out = sys.argv[1].split(',')

sc = pyspark_cassandra.CassandraSparkContext()

data = sc.textFile(fp, 36)

clean_text = data.map(json.loads) \
                 .map(lambda x: (x, clean_str(x['text'])))

json_preds = clean_text.map(lambda x: (x[0], predictTweet(x[1])['pos'])) \
                       .map(json.dumps)

json_preds.saveAsTextFile(out)

示例#4

显示文件

文件： tester.py 项目： rishabkanwal/CSCI7000-BigData

import interface

print interface.predictTweet("I hate you")
print interface.predictList(["I hate you", "I feel ambivalent about you.", "Trump in General", "Violence", "I love everything"])

示例#5

显示文件

if __name__ == 'main':

brokers, topic = sys.argv[1:]

sc = pyspark_cassandra.CassandraSparkContext()
ssc = StreamingContext(sc, 1)

kvs = KafkaUtils.createDirectStream(ssc,
                                    [topic],
                                    {"metadata.broker.list": brokers})

clean_text = kvs.map(lambda x: json.loads(x[1])) \
                .map(lambda x: (x, clean_str(x['text'])))

db_dict = clean_text.map(lambda x: db_dict(x[0], predictTweet(x[1])['pos']))

trump = db_dict.filter(lambda x: x['candidate'] == 'trump')
hillary = db_dict.filter(lambda x: x['candidate'] == 'hillary')
bernie = db_dict.filter(lambda x: x['candidate'] == 'bernie')
zodiac_killer = db_dict.filter(lambda x: x['candidate'] == 'cruz')
parties = db_dict.filter(lambda x: x['candidate'] == 'parties')

if not trump.isEmpty():
    trump.saveToCassandra('db', 'trump')
if not hillary.isEmpty():
    hillary.saveToCassandra('db', 'hillary')
if not bernie.isEmpty():
    bernie.saveToCassandra('db', 'bernie')
if not zodiac_killer.isEmpty():
    zodiac_killer.saveToCassandra('db', 'cruz')

示例#6

显示文件

文件： batch_sentiment.py 项目： AHAAAAAAA/CSCI7000-BigData

import json
import sys
import pyspark_cassandra

def clean_str(str):
    import re
    str=str+" "
    str=re.sub("http[^ ]*[\\\]","\\\\",str)                    #Remove hyperlinks
    str=re.sub("http[^ ]* "," ",str)                           #Remove hyperlinks
    str=str.replace('\\n',' ')
    arr=re.findall(r"\w+(?:[-']\w+)*|'|[:)-.(]+|\S\w*", str)   #Single punctuation mark is removed, smileys remain intact
    arr=[i for i in arr if len(i)>1 and i[0]!='@']             #Remove words starting with @ (Twitter mentions)
    arr=[i if i[0]!='#' else i[1:] for i in arr]               #Remove '#' from hashtags
    #arr=[i for i in arr if i!='http' and i!='com' and i!='org']
    res=" ".join(arr)
    return res.lower().strip()

fp, out = sys.argv[1].split(',')

sc = pyspark_cassandra.CassandraSparkContext()

data = sc.textFile(fp, 36)

clean_text = data.map(json.loads) \
                 .map(lambda x: (x, clean_str(x['text'])))

json_preds = clean_text.map(lambda x: (x[0], predictTweet(x[1])['pos'])) \
                       .map(json.dumps)

json_preds.saveAsTextFile(out)