def backup_messages_thread(self): """This method backups the messages and should be ran in a different thread""" self.backup_running = True # Create a connection to the database db = TLDatabase(self.backup_dir) # Determine whether we started making the backup from the very first message or not. # If this is the case: # We won't need to come back to the first message again after we've finished downloading # them all, since that first message will already be in backup. # # Otherwise, if we did not start from the first message: # More messages were in the backup already, and after we backup those "left" ones, # we must return to the first message and backup until where we started. started_at_0 = self.metadata['resume_msg_id'] == 0 # Keep an internal downloaded count for it to be faster # (instead of querying the database all the time) self.metadata['saved_msgs'] = db.count('messages') # We also need to keep track of how many messages we've downloaded now # in order to calculate the estimated time left properly saved_msgs_now = 0 # Make the backup try: # We need this to invoke GetHistoryRequest input_peer = get_input_peer(self.entity) # Keep track from when we started to determine the estimated time left start = datetime.now() # Enter the download-messages main loop while self.backup_running: # Invoke the GetHistoryRequest to get the next messages after those we have result = self.client.invoke(GetHistoryRequest( peer=input_peer, offset_id=self.metadata['resume_msg_id'], limit=self.download_chunk_size, offset_date=None, add_offset=0, max_id=0, min_id=0 )) self.metadata['total_msgs'] = getattr(result, 'count', len(result.messages)) # First add users and chats, replacing any previous value for user in result.users: db.add_object(user, replace=True) for chat in result.chats: db.add_object(chat, replace=True) # Then add the messages to the backup for msg in result.messages: if db.in_table(msg.id, 'messages'): # If the message we retrieved was already saved, this means that we're # done because we have the rest of the messages. # Clear the list so we enter the next if, and break to early terminate self.metadata['resume_msg_id'] = result.messages[-1].id del result.messages[:] break else: db.add_object(msg) saved_msgs_now += 1 self.metadata['saved_msgs'] += 1 self.metadata['resume_msg_id'] = msg.id self.metadata['etl'] = str(self.calculate_etl( saved_msgs_now, self.metadata['total_msgs'], start=start)) # Always commit at the end to save changes db.commit() self.save_metadata() # The list can be empty because we've either used a too big offset # (in which case we have all the previous messages), or we've reached # a point where we have the upcoming messages (so there's no need to # download them again and we stopped) if not result.messages: # We've downloaded all the messages since the last backup if started_at_0: # And since we started from the very first message, we have them all print('Downloaded all {}'.format(self.metadata['total_msgs'])) break else: # We need to start from the first message (latest sent message) # and backup again until we have them all self.metadata['resume_msg_id'] = 0 started_at_0 = True # Always sleep a bit, or Telegram will get angry and tell us to chill sleep(self.download_delay) pass # end while except KeyboardInterrupt: print('Operation cancelled, not downloading more messages!') # Also commit here, we don't want to lose any information! db.commit() self.save_metadata() finally: self.backup_running = False
def backup_messages_thread(self): """This method backups the messages and should be ran in a different thread""" self.backup_running = True # Create a connection to the database db = TLDatabase(self.backup_dir) # Determine whether we started making the backup from the very first message or not. # If this is the case: # We won't need to come back to the first message again after we've finished downloading # them all, since that first message will already be in backup. # # Otherwise, if we did not start from the first message: # More messages were in the backup already, and after we backup those "left" ones, # we must return to the first message and backup until where we started. started_at_0 = self.metadata['resume_msg_id'] == 0 # Keep an internal downloaded count for it to be faster # (instead of querying the database all the time) self.metadata['saved_msgs'] = db.count('messages') # We also need to keep track of how many messages we've downloaded now # in order to calculate the estimated time left properly saved_msgs_now = 0 # Make the backup try: # We need this to invoke GetHistoryRequest input_peer = self.entity # Keep track from when we started to determine the estimated time left start = datetime.now() # Enter the download-messages main loop self.client.connect() while self.backup_running: # Invoke the GetHistoryRequest to get the next messages after those we have result = self.client.invoke( GetHistoryRequest(peer=input_peer, offset_id=self.metadata['resume_msg_id'], limit=self.download_chunk_size, offset_date=None, add_offset=0, max_id=0, min_id=0)) # For some strange reason, GetHistoryRequest might return upload.file.File # Ensure we retrieved Messages or MessagesSlice if not isinstance(result, Messages) and not isinstance(result, MessagesSlice) \ and not isinstance(result, ChannelMessages): print('Invalid result type when downloading messages:', type(result)) sleep(self.download_delay) continue self.metadata['total_msgs'] = getattr(result, 'count', len(result.messages)) # First add users and chats, replacing any previous value for user in result.users: db.add_object(user, replace=True) for chat in result.chats: db.add_object(chat, replace=True) # Then add the messages to the backup for msg in result.messages: if db.in_table(msg.id, 'messages'): # If the message we retrieved was already saved, this means that we're # done because we have the rest of the messages. # Clear the list so we enter the next if, and break to early terminate self.metadata['resume_msg_id'] = result.messages[-1].id del result.messages[:] break else: db.add_object(msg) saved_msgs_now += 1 self.metadata['saved_msgs'] += 1 self.metadata['resume_msg_id'] = msg.id self.metadata['etl'] = str( self.calculate_etl(saved_msgs_now, self.metadata['total_msgs'], start=start)) # Always commit at the end to save changes db.commit() self.save_metadata() # The list can be empty because we've either used a too big offset # (in which case we have all the previous messages), or we've reached # a point where we have the upcoming messages (so there's no need to # download them again and we stopped) if not result.messages: # We've downloaded all the messages since the last backup if started_at_0: # And since we started from the very first message, we have them all print('Downloaded all {}'.format( self.metadata['total_msgs'])) break else: # We need to start from the first message (latest sent message) # and backup again until we have them all self.metadata['resume_msg_id'] = 0 started_at_0 = True # Always sleep a bit, or Telegram will get angry and tell us to chill sleep(self.download_delay) pass # end while except KeyboardInterrupt: print('Operation cancelled, not downloading more messages!') # Also commit here, we don't want to lose any information! db.commit() self.save_metadata() finally: self.backup_running = False