async def download( self, file: TypeLocation, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None ) -> AsyncGenerator[bytes, None]: connection_count = connection_count or self._get_connection_count( file_size) print("download count is ", connection_count) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = math.ceil(file_size / part_size) log.debug("Starting parallel download: " f"{connection_count} {part_size} {part_count} {file!s}") await self._init_download(connection_count, file, part_count, part_size) part = 0 while part < part_count: tasks = [] for sender in self.senders: tasks.append(self.loop.create_task(sender.next())) for task in tasks: data = await task if not data: break yield data part += 1 log.debug(f"Part {part} downloaded") log.debug("Parallel download finished, cleaning up connections") await self._cleanup()
async def download(self, file, file_size, offset, limit): part_size_kb = utils.get_appropriated_part_size(file_size) part_size = int(part_size_kb * 1024) first_part_cut = offset % part_size first_part = math.floor(offset / part_size) last_part_cut = part_size - (limit % part_size) last_part = math.ceil(limit / part_size) part_count = math.ceil(file_size / part_size) part = first_part try: async for chunk in self.iter_download(file, offset=first_part * part_size, request_size=part_size): if part == first_part: yield chunk[first_part_cut:] elif part == last_part: yield chunk[:last_part_cut] else: yield chunk self.log.debug( f"Part {part}/{last_part} (total {part_count}) served!") part += 1 self.log.debug("serving finished") except (GeneratorExit, StopAsyncIteration, asyncio.CancelledError): self.log.debug("file serve interrupted") raise except Exception: self.log.debug("file serve errored", exc_info=True)
async def download( self, file: TypeLocation, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None, ) -> AsyncGenerator[bytes, None]: connection_count = connection_count or self._get_connection_count( file_size) print("contagem de download é ", connection_count) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = math.ceil(file_size / part_size) log.debug("Iniciando download paralelo: " f"{connection_count} {part_size} {part_count} {file!s}") await self._init_download(connection_count, file, part_count, part_size) part = 0 while part < part_count: tasks = [ self.loop.create_task(sender.next()) for sender in self.senders ] for task in tasks: data = await task if not data: break yield data part += 1 log.debug(f"Parte {part} baixada") log.debug("Download paralelo concluído, limpando conexões") await self._cleanup()
async def download( self, file: TypeLocation, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None, ) -> AsyncGenerator[bytes, None]: connection_count = connection_count or self._get_connection_count( file_size) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = math.ceil(file_size / part_size) await self._init_download(connection_count, file, part_count, part_size) part = 0 while part < part_count: tasks = [] for sender in self.senders: tasks.append(self.loop.create_task(sender.next())) for task in tasks: data = await task if not data: break yield data part += 1 await self._cleanup()
async def download( self, file: TypeLocation, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None ) -> AsyncGenerator[bytes, None]: connection_count = connection_count or self._get_connection_count( file_size) print("indirme sayısı ", connection_count) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = math.ceil(file_size / part_size) log.debug("Paralel indirme başlatılıyor: " f"{connection_count} {part_size} {part_count} {file!s}") await self._init_download(connection_count, file, part_count, part_size) part = 0 while part < part_count: tasks = [] for sender in self.senders: tasks.append(self.loop.create_task(sender.next())) for task in tasks: data = await task if not data: break yield data part += 1 log.debug(f"Part {part} downloaded") log.debug("Paralel indirme tamamlandı, bağlantılar temizleniyor") await self._cleanup()
async def download( self, file: TypeLocation, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None, ) -> AsyncGenerator[bytes, None]: connection_count = connection_count or self._get_connection_count(file_size) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = math.ceil(file_size / part_size) log.debug( "Memulai unduhan paralel: " f"{connection_count} {part_size} {part_count} {file!s}" ) await self._init_download(connection_count, file, part_count, part_size) part = 0 while part < part_count: tasks = [] for sender in self.senders: tasks.append(self.loop.create_task(sender.next())) for task in tasks: data = await task if not data: break yield data part += 1 log.debug(f"Bagian {part} terunduh") log.debug("Unduhan paralel selesai, membersihkan koneksi") await self._cleanup()
async def init_upload(self, file_id: int, file_size: int, part_size_kb: Optional[float] = None, connection_count: Optional[int] = None) -> Tuple[int, int, bool]: connection_count = connection_count or self._get_connection_count(file_size) part_size = (part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024 part_count = (file_size + part_size - 1) // part_size is_large = file_size > 10 * 1024 * 1024 await self._init_upload(connection_count, file_id, part_count, is_large) return part_size, part_count, is_large
async def download(self, file, file_size, offset, limit): part_size = utils.get_appropriated_part_size(file_size) * 1024 first_part_cut = offset % part_size first_part = math.floor(offset / part_size) last_part_cut = part_size - (limit % part_size) last_part = math.ceil(limit / part_size) part_count = math.ceil(file_size / part_size) part = first_part self.log.debug(f"""Request Details part_size(bytes) = {part_size}, first_part = {first_part}, cut = {first_part_cut}(length={part_size-first_part_cut}), last_part = {last_part}, cut = {last_part_cut}(length={last_part_cut}), parts_count = {part_count} """) try: async for chunk in self.iter_download(file, offset=first_part * part_size, request_size=part_size): self.log.debug( f"Part {part}/{last_part} (total {part_count}) served!") if part == first_part: yield chunk[first_part_cut:] elif part == last_part: yield chunk[:last_part_cut] break else: yield chunk part += 1 self.log.debug("serving finished") except (GeneratorExit, StopAsyncIteration, asyncio.CancelledError): self.log.debug("file serve interrupted") raise except Exception: self.log.debug("file serve errored", exc_info=True)
def download_file(self, input_location, file=None, part_size_kb=None, file_size=None, progress_callback=None): """ Downloads the given input location to a file. Args: input_location (:tl:`InputFileLocation`): The file location from which the file will be downloaded. file (`str` | `file`): The output file path, directory, or stream-like object. If the path exists and is a file, it will be overwritten. part_size_kb (`int`, optional): Chunk size when downloading files. The larger, the less requests will be made (up to 512KB maximum). file_size (`int`, optional): The file size that is about to be downloaded, if known. Only used if ``progress_callback`` is specified. progress_callback (`callable`, optional): A callback function accepting two parameters: ``(downloaded bytes, total)``. Note that the ``total`` is the provided ``file_size``. """ if not part_size_kb: if not file_size: part_size_kb = 64 # Reasonable default else: part_size_kb = utils.get_appropriated_part_size(file_size) part_size = int(part_size_kb * 1024) # https://core.telegram.org/api/files says: # > part_size % 1024 = 0 (divisible by 1KB) # # But https://core.telegram.org/cdn (more recent) says: # > limit must be divisible by 4096 bytes # So we just stick to the 4096 limit. if part_size % 4096 != 0: raise ValueError( 'The part size must be evenly divisible by 4096.') in_memory = file is None if in_memory: f = io.BytesIO() elif isinstance(file, str): # Ensure that we'll be able to download the media helpers.ensure_parent_dir_exists(file) f = open(file, 'wb') else: f = file # The used client will change if FileMigrateError occurs client = self cdn_decrypter = None input_location = utils.get_input_location(input_location) download_thread = [] q_request = [] __log__.info('Downloading file in chunks of %d bytes', part_size) threads_count = 2 + int((self._download_threads_count - 2) * float(file_size) / (1024 * 1024 * 10)) threads_count = min(threads_count, self._download_threads_count) # threads_count = 1 # threads_count = min(part_count, threads_count) try: offset = 0 result = None try: request = GetFileRequest(input_location, offset, part_size) result = client(request) if isinstance(result, FileCdnRedirect): __log__.info('File lives in a CDN') cdn_decrypter, result = CdnDecrypter.prepare_decrypter(client, self._get_cdn_client(result), result) else: f.write(result.bytes) offset += part_size except FileMigrateError as e: __log__.info('File lives in another DC') client = self._get_exported_client(e.new_dc) # if cdn_decrypter: # result = cdn_decrypter.get_file() # if not result.bytes: # return getattr(result, 'type', '') # f.write(result.bytes) __log__.debug('Saved %d more bytes', len(result.bytes)) if progress_callback: progress_callback(f.tell(), file_size) # spawn threads for i in range(threads_count): q_request.append(Queue()) thread_dl = self.ProcessDownload('thread {0}'.format(i), self, q_request[i]) thread_dl.start() download_thread.append(thread_dl) # offset += part_size while True: for i in range(threads_count): if cdn_decrypter: q_request[i].put(cdn_decrypter) else: request = GetFileRequest(input_location, offset, part_size) q_request[i].put(request) offset += part_size for q in q_request: q.join() for th in download_thread: if th.result and th.result.bytes: f.write(th.result.bytes) if progress_callback: progress_callback(f.tell(), file_size) else: for i in range(threads_count): q_request[i].put(None) for th in download_thread: th.join() return getattr(th.result, 'type', '') finally: if client != self: client.disconnect() if cdn_decrypter: try: cdn_decrypter.client.disconnect() except: pass if isinstance(file, str): f.close()
def upload_file(self, file, part_size_kb=None, file_name=None, use_cache=None, progress_callback=None): """ Uploads the specified file and returns a handle (an instance of InputFile or InputFileBig, as required) which can be later used before it expires (they are usable during less than a day). Uploading a file will simply return a "handle" to the file stored remotely in the Telegram servers, which can be later used on. This will **not** upload the file to your own chat or any chat at all. Args: file (`str` | `bytes` | `file`): The path of the file, byte array, or stream that will be sent. Note that if a byte array or a stream is given, a filename or its type won't be inferred, and it will be sent as an "unnamed application/octet-stream". Subsequent calls with the very same file will result in immediate uploads, unless ``.clear_file_cache()`` is called. part_size_kb (`int`, optional): Chunk size when uploading files. The larger, the less requests will be made (up to 512KB maximum). file_name (`str`, optional): The file name which will be used on the resulting InputFile. If not specified, the name will be taken from the ``file`` and if this is not a ``str``, it will be ``"unnamed"``. use_cache (`type`, optional): The type of cache to use (currently either ``InputDocument`` or ``InputPhoto``). If present and the file is small enough to need the MD5, it will be checked against the database, and if a match is found, the upload won't be made. Instead, an instance of type ``use_cache`` will be returned. progress_callback (`callable`, optional): A callback function accepting two parameters: ``(sent bytes, total)``. Returns: :tl:`InputFileBig` if the file size is larger than 10MB, ``InputSizedFile`` (subclass of :tl:`InputFile`) otherwise. """ if isinstance(file, (InputFile, InputFileBig)): return file # Already uploaded if isinstance(file, str): file_size = os.path.getsize(file) elif isinstance(file, bytes): file_size = len(file) else: file = file.read() file_size = len(file) # File will now either be a string or bytes if not part_size_kb: part_size_kb = utils.get_appropriated_part_size(file_size) if part_size_kb > 512: raise ValueError('The part size must be less or equal to 512KB') part_size = int(part_size_kb * 1024) if part_size % 1024 != 0: raise ValueError( 'The part size must be evenly divisible by 1024') # Set a default file name if None was specified file_id = helpers.generate_random_long() if not file_name: if isinstance(file, str): file_name = os.path.basename(file) else: file_name = str(file_id) # Determine whether the file is too big (over 10MB) or not # Telegram does make a distinction between smaller or larger files is_large = file_size > 10 * 1024 * 1024 hash_md5 = hashlib.md5() if not is_large: # Calculate the MD5 hash before anything else. # As this needs to be done always for small files, # might as well do it before anything else and # check the cache. if isinstance(file, str): with open(file, 'rb') as stream: file = stream.read() hash_md5.update(file) if use_cache: cached = self.session.get_file( hash_md5.digest(), file_size, cls=use_cache ) if cached: return cached part_count = (file_size + part_size - 1) // part_size __log__.info('Uploading file of %d bytes in %d chunks of %d', file_size, part_count, part_size) with open(file, 'rb') if isinstance(file, str) else BytesIO(file) as stream: threads_count = 2 + int((self._upload_threads_count - 2) * float(file_size) / (1024 * 1024 * 10)) threads_count = min(threads_count, self._upload_threads_count) threads_count = min(part_count, threads_count) upload_thread = [] q_request = Queue() # spawn threads for i in range(threads_count): thread_dl = self.ProcessUpload('thread {0}'.format(i), self, q_request) thread_dl.start() upload_thread.append(thread_dl) for part_index in range(0, part_count, threads_count): # Read the file by in chunks of size part_size for part_thread_index in range(threads_count): if part_index + part_thread_index >= part_count: break part = stream.read(part_size) # The SavePartRequest is different depending on whether # the file is too large or not (over or less than 10MB) if is_large: request = SaveBigFilePartRequest(file_id, part_index + part_thread_index, part_count, part) else: request = SaveFilePartRequest(file_id, part_index + part_thread_index, part) q_request.put(request) # q_request.join() job_completed = False while not job_completed: for th in upload_thread: if th: if th.result is True: job_completed = True __log__.debug('Uploaded %d/%d', part_index + 1, part_count) if progress_callback: progress_callback(stream.tell(), file_size) elif th.result is False: raise RuntimeError('Failed to upload file part {}.'.format(part_index)) q_request.join() for i in range(threads_count): q_request.put(None) for th in upload_thread: th.join() if is_large: return InputFileBig(file_id, part_count, file_name) else: return InputSizedFile( file_id, part_count, file_name, md5=hash_md5, size=file_size )
def upload(self, file, file_name=None): entity = types.InputPeerSelf() # begin _file_to_media media = None file_handle = None # begin _upload_file file_id = helpers.generate_random_long() if not file_name and getattr(file, 'name', None): file_name = file.name if file_name is None: file_name = str(file_id) file = file.read() file_size = len(file) part_size = int(utils.get_appropriated_part_size(file_size) * 1024) is_large = file_size > 10 * 1024 * 1024 hash_md5 = hashlib.md5() if not is_large: hash_md5.update(file) part_count = (file_size + part_size - 1) // part_size with BytesIO(file) as stream: for part_index in range(part_count): part = stream.read(part_size) if is_large: request = functions.upload.SaveBigFilePartRequest( file_id, part_index, part_count, part) else: request = functions.upload.SaveFilePartRequest( file_id, part_index, part) result = self.client(request) yield float(round(100.0 * (part_index / part_count), 2)) if not result: raise RuntimeError( 'Failed to upload file part {}.'.format(part_index)) if is_large: file_handle = types.InputFileBig(file_id, part_count, file_name) else: file_handle = custom.InputSizedFile(file_id, part_count, file_name, md5=hash_md5, size=file_size) # end _upload_file attributes, mime_type = utils.get_attributes(file, force_document=True, voice_note=False, video_note=False) attributes[0].file_name = file_name media = types.InputMediaUploadedDocument(file=file_handle, mime_type=mime_type, attributes=attributes) # end _file_to_media request = functions.messages.SendMediaRequest(entity, media, reply_to_msg_id=None, message='', entities=[], reply_markup=None, silent=None) t = self.client(request) if type(t) != types.Updates: t = self.client.loop.run_until_complete(t) msg = self.client._get_response_message(request, t, entity) yield msg