Exemplo n.º 1
0
 def on_data(self, data):
     status = json.loads(data)
     if "text" not in status:
         print "Text is missing from the tweet body."
         print status
     else:
         terms_dict = extract.process_status(status['text'])
         created_at = datetime.strptime(status['created_at'], '%a %b %d %H:%M:%S +0000 %Y').strftime('%Y%m%d%H%M')
         now = datetime.now().strftime('%Y%m%d%H%M')
         self.miner.queue_for_sending(status['id'], terms_dict, created_at, now)
     return True
Exemplo n.º 2
0
 def on_data(self, data):
     status = json.loads(data)
     if "text" not in status:
         print "Text is missing from the tweet body."
         print status
     else:
         terms_dict = extract.process_status(status['text'])
         created_at = datetime.strptime(
             status['created_at'],
             '%a %b %d %H:%M:%S +0000 %Y').strftime('%Y%m%d%H%M')
         now = datetime.now().strftime('%Y%m%d%H%M')
         self.miner.queue_for_sending(status['id'], terms_dict, created_at,
                                      now)
     return True
Exemplo n.º 3
0
 def download_timelines(self, api):
     self.is_downloading = True
     for user_name in self.category.users.split(','):
         self.log("Downloading timeline for {}".format(user_name))
         user = api.get_user(user_name)
         page_list = []
         for page in tweepy.Cursor(api.user_timeline, user_id=user.id, count=800, include_rts=True).pages(16):
             page_list.append(page)
         for idx, page in enumerate(page_list):
             self.log("user:{} page:{}/{} statuses:{}".format(user_name, idx, len(page_list), len(page)))
             for status in page:
                 terms_dict = extract.process_status(status.text)
                 created_at = status.created_at.strftime('%Y%m%d%H%M')
                 now = datetime.now().strftime('%Y%m%d%H%M')
                 self.queue_for_sending(status.id, terms_dict, created_at, now)
                 if not self.is_downloading:
                     return  # stop downloading
Exemplo n.º 4
0
 def download_timelines(self, api):
     self.is_downloading = True
     for user_name in self.category.users.split(','):
         self.log("Downloading timeline for {}".format(user_name))
         user = api.get_user(user_name)
         page_list = []
         for page in tweepy.Cursor(api.user_timeline,
                                   user_id=user.id,
                                   count=800,
                                   include_rts=True).pages(16):
             page_list.append(page)
         for idx, page in enumerate(page_list):
             self.log("user:{} page:{}/{} statuses:{}".format(
                 user_name, idx, len(page_list), len(page)))
             for status in page:
                 terms_dict = extract.process_status(status.text)
                 created_at = status.created_at.strftime('%Y%m%d%H%M')
                 now = datetime.now().strftime('%Y%m%d%H%M')
                 self.queue_for_sending(status.id, terms_dict, created_at,
                                        now)
                 if not self.is_downloading:
                     return  # stop downloading