示例#1
0
parser.add_argument('-result-type', dest="result_type", default="mixed", help="Result type")
parser.add_argument('-max-id', dest="max_id", default=None, help="Maximum Id from which to start searching")
parser.add_argument('-until', dest="until", default=None, help="Last date")
parser.add_argument('-lang', dest="lang", default=None, help="Language")
parser.add_argument('-table', dest="table", default="tweets", help="DB Table")
args = parser.parse_args()
q = args.q
n = args.n
result_type = args.result_type
max_id = args.max_id
until = args.until
lang = args.lang
table = args.table

twitter = TwitterUtils()
db = DbUtils()

next_results = {"q": q, "count": 100}
if result_type is not None:
    next_results["result_type"] = result_type
if max_id is not None:
    next_results["max_id"] = max_id
if until is not None:
    next_results["until"] = until
if lang is not None:
    next_results["lang"] = lang
db.cursor = db.conn.cursor()
try:
    for i in range(0, n):
        print("Request: %d" % (i+1,))
        result = twitter.search(**next_results)
from DbUtils import DbUtils
import re
import sys
db = DbUtils()

tweets = db.fetchall(sys.argv[3], ("text",), sort="id DESC")

tweets = [re.sub(r'http(s?).* ?', '', x[0]).strip().lower() for x in tweets]
tweets = list(set(tweets))



with open("resources/stopwords.txt","rb") as f:
    stopwords = f.read().splitlines()

with open("resources/" + sys.argv[2], "w") as f:
    f.write("i,j,x\r\n")
    for i in range(len(tweets)):
        for j in range(len(words)):
            cont = tweets[i].count(words[j])
            if cont > 0:
                f.write(str(i+1) + "," + str(j+1) + "," + str(cont) + "\r\n")