def main(): for t in tweets.find()[0:10000]: if get_day(t['created_at']) == 1: if t['twitter_nlp']['chunks'] and t['twitter_nlp']['ner']: try: print t['text'].encode('utf-8') print t['twitter_nlp']['chunks'] print t['twitter_nlp']['ner'] except: continue
def main(): for t in tweets.find()[0:10000]: if get_day(t["created_at"]) == 1: if t["twitter_nlp"]["chunks"] and t["twitter_nlp"]["ner"]: try: print t["text"].encode("utf-8") print t["twitter_nlp"]["chunks"] print t["twitter_nlp"]["ner"] except: continue
def main(): #spawn a pool of threads, and pass them queue instance for i in range(1): t = TweetsSegThread(queue) t.setDaemon(True) t.start() #populate queue with data for index, tweet in enumerate(tweets.find()): # filter out those who has segments if not tweet.has_key('segments'): # print 'putting number %d into queue.' % index queue.put((tweet['_id'], tweet['text'], index)) queue.join()
def main(): # spawn a pool of threads, and pass them queue instance for i in range(100): t = TweetsChunkerThread(queue) t.setDaemon(True) t.start() # populate the data into the queue # get all the tweets in Nov.1 for index, tweet in enumerate(tweets.find()): # filter out the tweets that not in the same day if get_day(tweet['created_at']) == day: queue.put((tweet, index)) # block current thread until there is no subthread queue.join()