Python csvToPandasDF示例

编程语言: Python

命名空间/包名称: cleanText

方法/功能: csvToPandasDF

hotexamples.com的示例: 4

Python csvToPandasDF - 已找到4个示例。这些是从开源项目中提取的最受好评的cleanText.csvToPandasDF现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： mainTesting.py 项目： alexjacobs08/capStoneTwitterMining

file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret,consumer_key,access_token_secret,access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords

示例#2

显示文件

文件： main.py 项目： alexjacobs08/capStoneTwitterMining

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")


cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")


cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")



print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(cleanText.identifySubject(textList['text'][i], keyword_list))

示例#3

显示文件

file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret, consumer_key, access_token_secret, access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords

示例#4

显示文件

文件： main.py 项目： alexjacobs08/capStoneTwitterMining

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(
    auth, accessStream.Listener(tweet_limit, start_time, time_limit,
                                inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(
    track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")

cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")

cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")

print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(
        cleanText.identifySubject(textList['text'][i], keyword_list))
    clean_text.append(