示例#1
0
    def backup_messages_thread(self):
        """This method backups the messages and should be ran in a different thread"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Determine whether we started making the backup from the very first message or not.
        # If this is the case:
        #   We won't need to come back to the first message again after we've finished downloading
        #   them all, since that first message will already be in backup.
        #
        # Otherwise, if we did not start from the first message:
        #   More messages were in the backup already, and after we backup those "left" ones,
        #   we must return to the first message and backup until where we started.
        started_at_0 = self.metadata['resume_msg_id'] == 0

        # Keep an internal downloaded count for it to be faster
        # (instead of querying the database all the time)
        self.metadata['saved_msgs'] = db.count('messages')

        # We also need to keep track of how many messages we've downloaded now
        # in order to calculate the estimated time left properly
        saved_msgs_now = 0

        # Make the backup
        try:
            # We need this to invoke GetHistoryRequest
            input_peer = get_input_peer(self.entity)

            # Keep track from when we started to determine the estimated time left
            start = datetime.now()

            # Enter the download-messages main loop
            while self.backup_running:
                # Invoke the GetHistoryRequest to get the next messages after those we have
                result = self.client.invoke(GetHistoryRequest(
                    peer=input_peer,
                    offset_id=self.metadata['resume_msg_id'],
                    limit=self.download_chunk_size,
                    offset_date=None,
                    add_offset=0,
                    max_id=0,
                    min_id=0
                ))
                self.metadata['total_msgs'] = getattr(result, 'count', len(result.messages))

                # First add users and chats, replacing any previous value
                for user in result.users:
                    db.add_object(user, replace=True)
                for chat in result.chats:
                    db.add_object(chat, replace=True)

                # Then add the messages to the backup
                for msg in result.messages:
                    if db.in_table(msg.id, 'messages'):
                        # If the message we retrieved was already saved, this means that we're
                        # done because we have the rest of the messages.
                        # Clear the list so we enter the next if, and break to early terminate
                        self.metadata['resume_msg_id'] = result.messages[-1].id
                        del result.messages[:]
                        break
                    else:
                        db.add_object(msg)
                        saved_msgs_now += 1
                        self.metadata['saved_msgs'] += 1
                        self.metadata['resume_msg_id'] = msg.id

                self.metadata['etl'] = str(self.calculate_etl(
                    saved_msgs_now, self.metadata['total_msgs'],
                    start=start))

                # Always commit at the end to save changes
                db.commit()
                self.save_metadata()

                # The list can be empty because we've either used a too big offset
                # (in which case we have all the previous messages), or we've reached
                # a point where we have the upcoming messages (so there's no need to
                # download them again and we stopped)
                if not result.messages:
                    # We've downloaded all the messages since the last backup
                    if started_at_0:
                        # And since we started from the very first message, we have them all
                        print('Downloaded all {}'.format(self.metadata['total_msgs']))
                        break
                    else:
                        # We need to start from the first message (latest sent message)
                        # and backup again until we have them all
                        self.metadata['resume_msg_id'] = 0
                        started_at_0 = True

                # Always sleep a bit, or Telegram will get angry and tell us to chill
                sleep(self.download_delay)

            pass  # end while

        except KeyboardInterrupt:
            print('Operation cancelled, not downloading more messages!')
            # Also commit here, we don't want to lose any information!
            db.commit()
            self.save_metadata()

        finally:
            self.backup_running = False
示例#2
0
    def backup_messages_thread(self):
        """This method backups the messages and should be ran in a different thread"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Determine whether we started making the backup from the very first message or not.
        # If this is the case:
        #   We won't need to come back to the first message again after we've finished downloading
        #   them all, since that first message will already be in backup.
        #
        # Otherwise, if we did not start from the first message:
        #   More messages were in the backup already, and after we backup those "left" ones,
        #   we must return to the first message and backup until where we started.
        started_at_0 = self.metadata['resume_msg_id'] == 0

        # Keep an internal downloaded count for it to be faster
        # (instead of querying the database all the time)
        self.metadata['saved_msgs'] = db.count('messages')

        # We also need to keep track of how many messages we've downloaded now
        # in order to calculate the estimated time left properly
        saved_msgs_now = 0

        # Make the backup
        try:
            # We need this to invoke GetHistoryRequest
            input_peer = self.entity

            # Keep track from when we started to determine the estimated time left
            start = datetime.now()

            # Enter the download-messages main loop
            self.client.connect()
            while self.backup_running:
                # Invoke the GetHistoryRequest to get the next messages after those we have
                result = self.client.invoke(
                    GetHistoryRequest(peer=input_peer,
                                      offset_id=self.metadata['resume_msg_id'],
                                      limit=self.download_chunk_size,
                                      offset_date=None,
                                      add_offset=0,
                                      max_id=0,
                                      min_id=0))
                # For some strange reason, GetHistoryRequest might return upload.file.File
                # Ensure we retrieved Messages or MessagesSlice
                if not isinstance(result, Messages) and not isinstance(result, MessagesSlice) \
                        and not isinstance(result, ChannelMessages):
                    print('Invalid result type when downloading messages:',
                          type(result))
                    sleep(self.download_delay)
                    continue

                self.metadata['total_msgs'] = getattr(result, 'count',
                                                      len(result.messages))

                # First add users and chats, replacing any previous value
                for user in result.users:
                    db.add_object(user, replace=True)
                for chat in result.chats:
                    db.add_object(chat, replace=True)

                # Then add the messages to the backup
                for msg in result.messages:
                    if db.in_table(msg.id, 'messages'):
                        # If the message we retrieved was already saved, this means that we're
                        # done because we have the rest of the messages.
                        # Clear the list so we enter the next if, and break to early terminate
                        self.metadata['resume_msg_id'] = result.messages[-1].id
                        del result.messages[:]
                        break
                    else:
                        db.add_object(msg)
                        saved_msgs_now += 1
                        self.metadata['saved_msgs'] += 1
                        self.metadata['resume_msg_id'] = msg.id

                self.metadata['etl'] = str(
                    self.calculate_etl(saved_msgs_now,
                                       self.metadata['total_msgs'],
                                       start=start))

                # Always commit at the end to save changes
                db.commit()
                self.save_metadata()

                # The list can be empty because we've either used a too big offset
                # (in which case we have all the previous messages), or we've reached
                # a point where we have the upcoming messages (so there's no need to
                # download them again and we stopped)
                if not result.messages:
                    # We've downloaded all the messages since the last backup
                    if started_at_0:
                        # And since we started from the very first message, we have them all
                        print('Downloaded all {}'.format(
                            self.metadata['total_msgs']))
                        break
                    else:
                        # We need to start from the first message (latest sent message)
                        # and backup again until we have them all
                        self.metadata['resume_msg_id'] = 0
                        started_at_0 = True

                # Always sleep a bit, or Telegram will get angry and tell us to chill
                sleep(self.download_delay)

            pass  # end while

        except KeyboardInterrupt:
            print('Operation cancelled, not downloading more messages!')
            # Also commit here, we don't want to lose any information!
            db.commit()
            self.save_metadata()

        finally:
            self.backup_running = False