示例#1
0
def get_file_location(media):
    """
    Helper method to turn arbitrary media into (InputFileLocation, size/None).
    """
    location = file_size = None
    if isinstance(media, types.MessageMediaPhoto):
        media = media.photo

    if isinstance(media, types.Photo):
        for size in reversed(media.sizes):
            if isinstance(size, types.PhotoSize):
                if isinstance(size.location, types.FileLocation):
                    file_size = size.size
                    location = size.location
                    break
    elif isinstance(media, types.MessageMediaDocument):
        if isinstance(media.document, types.Document):
            file_size = media.document.size
            location = types.InputDocumentFileLocation(
                id=media.document.id,
                access_hash=media.document.access_hash,
                version=media.document.version)
    elif isinstance(media, (types.UserProfilePhoto, types.ChatPhoto)):
        if isinstance(media.photo_big, types.FileLocation):
            location = media.photo_big
        elif isinstance(media.photo_small, types.FileLocation):
            location = media.photo_small

    if isinstance(location, types.FileLocation):
        location = types.InputFileLocation(volume_id=location.volume_id,
                                           local_id=location.local_id,
                                           secret=location.secret)

    return location, file_size
示例#2
0
    async def handle_request(self, req, head=False, thumb=False):
        file_id = int(req.match_info["id"])
        
        message = await self.client.get_messages(entity=chat_id, ids=file_id)
        if not message or not message.file:
            return web.Response(status=410, text="410: Gone. Access to the target resource is no longer available!")
        
        if thumb and message.document:
            thumbnail = message.document.thumbs
            if not thumbnail:
                return web.Response(status=404, text="404: Not Found")
            thumbnail = thumbnail[-1]
            mime_type = 'image/jpeg'
            size = thumbnail.size
            file_name = f"{file_id}_thumbnail.jpg"
            media = types.InputDocumentFileLocation(
                id=message.document.id,
                access_hash=message.document.access_hash,
                file_reference=message.document.file_reference,
                thumb_size=thumbnail.type
            )
        else:
            media = message.media
            size = message.file.size
            file_name = get_file_name(message)
            mime_type = message.file.mime_type
        
        try:
            offset = req.http_range.start or 0
            limit = req.http_range.stop or size
            if (limit > size) or (offset < 0) or (limit < offset):
                raise ValueError("range not in acceptable format")
        except ValueError:
            return web.Response(
                status=416,
                text="416: Range Not Satisfiable",
                headers = {
                    "Content-Range": f"bytes */{size}"
                }
            )
        
        if not head:
            body = self.client.download(media, size, offset, limit)
        else:
            body = None
        
        headers = {
            "Content-Type": mime_type,
            "Content-Range": f"bytes {offset}-{limit}/{size}",
            "Content-Length": str(limit - offset),
            "Accept-Ranges": "bytes",
            "Content-Disposition": f'attachment; filename="{file_name}"'
        }

        return web.Response(
            status=206 if offset else 200,
            body=body,
            headers=headers
        )
示例#3
0
    async def _download_media(self, media_id, context_id, sender_id, date,
                              bar):
        media_row = self.dumper.conn.execute(
            'SELECT LocalID, VolumeID, Secret, Type, MimeType, Name, Size '
            'FROM Media WHERE ID = ?', (media_id, )).fetchone()
        # Documents have attributes and they're saved under the "document"
        # namespace so we need to split it before actually comparing.
        media_type = media_row[3].split('.')
        media_type, media_subtype = media_type[0], media_type[-1]
        if media_type not in ('photo', 'document'):
            return  # Only photos or documents are actually downloadable

        formatter = defaultdict(str,
                                context_id=context_id,
                                sender_id=sender_id,
                                type=media_subtype or 'unknown',
                                name=self._get_name(context_id) or 'unknown',
                                sender_name=self._get_name(sender_id)
                                or 'unknown')

        # Documents might have a filename, which may have an extension. Use
        # the extension from the filename if any (more accurate than mime).
        ext = None
        filename = media_row[5]
        if filename:
            filename, ext = os.path.splitext(filename)
        else:
            # No filename at all, set a sensible default filename
            filename = date.strftime('{}_%Y-%m-%d_%H-%M-%S'.format(
                formatter['type']))

        # The saved media didn't have a filename and we set our own.
        # Detect a sensible extension from the known mimetype.
        if not ext:
            ext = export_utils.get_extension(media_row[4])

        # Apply the date to the user format string and then replace the map
        formatter['filename'] = filename
        filename = date.strftime(self.media_fmt).format_map(formatter)
        filename += '.{}{}'.format(media_id, ext)
        if os.path.isfile(filename):
            __log__.debug('Skipping already-existing file %s', filename)
            return

        __log__.debug('Downloading to %s', filename)
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        if media_type == 'document':
            location = types.InputDocumentFileLocation(
                id=media_row[0],
                version=media_row[1],
                access_hash=media_row[2])
        else:
            location = types.InputFileLocation(local_id=media_row[0],
                                               volume_id=media_row[1],
                                               secret=media_row[2])

        def progress(saved, total):
            """Increment the tqdm progress bar"""
            if total is None:
                # No size was found so the bar total wasn't incremented before
                bar.total += saved
                bar.update(saved)
            elif saved == total:
                # Downloaded the last bit (which is probably <> part size)
                mod = (saved % DOWNLOAD_PART_SIZE) or DOWNLOAD_PART_SIZE
                bar.update(mod)
            else:
                # All chunks are of the same size and this isn't the last one
                bar.update(DOWNLOAD_PART_SIZE)

        if media_row[6] is not None:
            bar.total += media_row[6]

        self._incomplete_download = filename
        await self.client.download_file(location,
                                        file=filename,
                                        file_size=media_row[6],
                                        part_size_kb=DOWNLOAD_PART_SIZE //
                                        1024,
                                        progress_callback=progress)
        self._incomplete_download = None
示例#4
0
    async def _download_media(self, media_id, context_id, sender_id, date,
                              progress):
        media_row = await db_media.find_one({'_id': media_id})
        progress.name = media_row['name']
        if media_row['size']:
            if media_row['size'] > self.max_size:
                logger.warning('忽略过大文件:%s', media_row['name'])
                return  # 忽略过大的文件
            if media_row['size'] < self.min_size:
                logger.warning('忽略过小文件:%s', media_row['name'])
                return  # 忽略过小的文件
        # Documents have attributes and they're saved under the "document"
        # namespace so we need to split it before actually comparing.
        media_type = media_row['type'].split('.')
        media_type, media_subtype = media_type[0], media_type[-1]
        if media_type not in ('document', 'photo'):
            logger.info('忽略文档类型:%s', media_type)
            return  # Only photos or documents are actually downloadable

        formatter = defaultdict(str,
                                context_id=context_id,
                                sender_id=sender_id,
                                type=media_subtype or 'unknown',
                                name=await self._get_name(context_id)
                                or 'unknown',
                                sender_name=await self._get_name(sender_id)
                                or 'unknown')

        # Documents might have a filename, which may have an extension. Use
        # the extension from the filename if any (more accurate than mime).
        ext = None
        filename = media_row['name']
        if filename:
            filename, ext = os.path.splitext(filename)
        else:
            # No filename at all, set a sensible default filename
            filename = arrow.get(date).format('YYYYMMDDHHmmssSSS')
            logger.debug('忽略无名称文件')
            return

        # The saved media didn't have a filename and we set our own.
        # Detect a sensible extension from the known mimetype.
        if not ext:
            ext = export_utils.get_extension(media_row['mime_type'])

        # Apply the date to the user format string and then replace the map
        formatter['filename'] = fix_windows_filename(filename)
        filename = date.strftime(self.media_fmt).format_map(formatter)
        filename += '.{}{}'.format(media_id, ext)
        if os.path.isfile(filename):
            logger.debug('Skipping already-existing file %s', filename)
            return
        logger.info('正在下载:%s 至 %s', media_type, filename)
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        if media_type == 'document':
            location = types.InputDocumentFileLocation(
                id=media_row['local_id'],
                version=media_row['volume_id'],
                access_hash=media_row['secret'])
        else:
            location = types.InputFileLocation(
                local_id=media_row['local_id'],
                volume_id=media_row['volume_id'],
                secret=media_row['secret'])

        def progress_callback(saved, total):
            """Increment the tqdm progress bar"""
            if total is None:
                # No size was found so the bar total wasn't incremented before
                progress.total += saved
                progress.inc(saved)
            elif saved == total:
                # Downloaded the last bit (which is probably <> part size)
                mod = (saved % DOWNLOAD_PART_SIZE) or DOWNLOAD_PART_SIZE
                progress.inc(mod)
            else:
                # All chunks are of the same size and this isn't the last one
                progress.inc(DOWNLOAD_PART_SIZE)

        if media_row['size'] is not None:
            progress.total += media_row['size']

        self._incomplete_download = filename
        await self.client.download_file(location,
                                        file=filename,
                                        file_size=media_row['size'],
                                        part_size_kb=DOWNLOAD_PART_SIZE //
                                        1024,
                                        progress_callback=progress_callback)
        self._incomplete_download = None
示例#5
0
    def download_past_media(self, dumper, target_id):
        """
        Downloads the past media that has already been dumped into the
        database but has not been downloaded for the given target ID yet.

        Media which formatted filename results in an already-existing file
        will be *ignored* and not re-downloaded again.
        """
        # TODO Should this respect and download only allowed media? Or all?
        target_in = self.client.get_input_entity(target_id)
        target = self.client.get_entity(target_in)
        target_id = utils.get_peer_id(target)

        msg_cursor = dumper.conn.cursor()
        msg_cursor.execute(
            'SELECT ID, Date, FromID, MediaID FROM Message '
            'WHERE ContextID = ? AND MediaID IS NOT NULL', (target_id, ))

        msg_row = msg_cursor.fetchone()
        while msg_row:
            media_row = dumper.conn.execute(
                'SELECT LocalID, VolumeID, Secret, Type, MimeType, Name '
                'FROM Media WHERE ID = ?', (msg_row[3], )).fetchone()
            # Documents have attributed and they're saved under the "document"
            # namespace so we need to split it before actually comparing.
            media_type = media_row[3].split('.')
            media_type, media_subtype = media_type[0], media_type[-1]
            if media_type not in ('photo', 'document'):
                # Only photos or documents are actually downloadable
                msg_row = msg_cursor.fetchone()
                continue

            user_row = dumper.conn.execute(
                'SELECT FirstName, LastName FROM User WHERE ID = ?',
                (msg_row[2], )).fetchone()
            if user_row:
                sender_name = '{} {}'.format(msg_row[0] or '', msg_row[1]
                                             or '').strip()
            else:
                sender_name = ''

            date = datetime.datetime.utcfromtimestamp(msg_row[1])
            formatter = defaultdict(str,
                                    id=msg_row[0],
                                    context_id=target_id,
                                    sender_id=msg_row[2] or 0,
                                    type=media_subtype or 'unknown',
                                    ext=mimetypes.guess_extension(media_row[4])
                                    or '.bin',
                                    name=utils.get_display_name(target)
                                    or 'unknown',
                                    sender_name=sender_name or 'unknown')
            if formatter['ext'] == '.jpe':
                formatter['ext'] = '.jpg'  # Nobody uses .jpe for photos

            name = None if media_subtype == 'photo' else media_row[5]
            formatter['filename'] = name or date.strftime(
                '{}_%Y-%m-%d_%H-%M-%S'.format(formatter['type']))
            filename = date.strftime(self.media_fmt).format_map(formatter)
            if not filename.endswith(formatter['ext']):
                if filename.endswith('.'):
                    filename = filename[:-1]
                filename += formatter['ext']

            if os.path.isfile(filename):
                __log__.debug('Skipping existing file %s', filename)
            else:
                __log__.info('Downloading to %s', filename)
                os.makedirs(os.path.dirname(filename), exist_ok=True)
                if media_type == 'document':
                    self.client.download_file(types.InputDocumentFileLocation(
                        id=media_row[0],
                        version=media_row[1],
                        access_hash=media_row[2]),
                                              file=filename)
                else:
                    self.client.download_file(types.InputFileLocation(
                        local_id=media_row[0],
                        volume_id=media_row[1],
                        secret=media_row[2]),
                                              file=filename)
                time.sleep(1)
            msg_row = msg_cursor.fetchone()