def main(id, hash, target, mime, limit, output): client = TelegramClient('telegram-files-downloader', id, hash) client.start() count = 0 print('[INFO] - Started downloading files to {}'.format(output)) for message in client.iter_messages(target): if count > limit: break if message.media is not None: client.download_media(message=message, file=output) count += 1 print('[INFO] - Ended downloading files')
def History_save(phone_01,code_01): api_id = 1005783 api_hash = '09cb354a26d71b92d9c12e06a7760732' phone = phone_01 code = code_01 time_01 = 0 while len(phone) < 5: time.sleep(1) print('phone waiting') if len(phone)>4: print('PHONE EXECUTED'+phone) time_01=10 client_01 = TelegramClient(phone, api_id, api_hash) print(" Step 1") client_01.connect() if not client_01.is_user_authorized(): client_01.sign_in(phone) time.sleep(20) if len(code) > 4: print('START BEGIN'+code) time.sleep(2) client_01.sign_in(phone, code = code) phone = '1' code = '2' time_01= 0 dialogs = client_01.get_dialogs(limit = 5) for n, dialog_iter in enumerate(dialogs, start=0): entity = dialog_iter.entity messages = client_01.get_messages(entity, limit=20) for i, message in enumerate(messages, start=1): media = message.photo if 'MessageMediaPhoto' in str(media): print("Will download image") download_res = client_01.download_media( media) print("Download done: {}".format(download_res)) else: try: f = open("{}.txt".format(get_display_name(entity)), "a+") f.write("\n" + str(message.to_id) + "\n" + str(message.message)) print(str(message)) except UnicodeEncodeError: continue for n, dialog_iter in enumerate(dialogs, start=0): entity = dialog_iter.entity messages = client_01.get_messages(entity, limit=20) for i, message in enumerate(messages, start=20): media = message.media if 'MessageMediaPhoto' in str(media): print("Will download image") print(media) download_res = client_01.download_media( media) print("Download done: {}".format(download_res)) else: try: f = open("{}.txt".format(get_display_name(entity)), "a+") f.write("\n" + str(message.to_id) + "\n" + str(message.message)) print(str(message)) except UnicodeEncodeError: continue
#if user.photo and user.mutual: #only mutual contacts if user.photo: #print(user.stringify()) photo_result = client( functions.photos.GetUserPhotosRequest(user_id=user.id, offset=0, max_id=0, limit=100)) print('Download ', len(photo_result.photos), 'photos') for photo in photo_result.photos: filename = './profile_photos/' + str( user.id) + '_' + photo.date.strftime( '%Y-%m-%d_%H-%M-%S_%Z') + '_' + str(photo.id) if glob.glob(filename + '.*'): print("File exist ", filename) else: downloadcount += 1 if downloadcount > 99: #wait 60 second to avoid flood error print('sleep for 60 seconds.') time.sleep(60) downloadcount = 0 # Printing download progress def callback(current, total): print(filename, ' Downloaded', current, 'out of', total, 'bytes: {:.2%}'.format(current / total)) client.download_media(photo, file=filename, progress_callback=callback)
class Sync: """ Sync iterates and receives messages from the Telegram group to the local SQLite DB. """ config = {} db = None def __init__(self, config, session_file, db): self.config = config self.db = db self.client = TelegramClient(session_file, self.config["api_id"], self.config["api_hash"]) self.client.start() if not os.path.exists(self.config["media_dir"]): os.mkdir(self.config["media_dir"]) def sync(self): """ Sync syncs messages from Telegram from the last synced message into the local SQLite DB. """ last_id, last_date = self.db.get_last_message_id() if last_id: logging.info("fetching from last message id={} ({})".format( last_id, last_date)) n = 0 while True: has = False for m in self._get_messages(self.config["group"], offset_id=last_id if last_id else 0): if not m: continue has = True # Inser the records into DB. self.db.insert_user(m.user) if m.media: self.db.insert_media(m.media) self.db.insert_message(m) last_date = m.date n += 1 if n % 300 == 0: logging.info("fetched {} messages".format(n)) self.db.commit() if self.config["fetch_limit"] > 0 and n >= self.config[ "fetch_limit"]: has = False break self.db.commit() if has: last_id = m.id logging.info( "fetched {} messages. sleeping for {} seconds".format( n, self.config["fetch_wait"])) time.sleep(self.config["fetch_wait"]) else: break self.db.commit() logging.info("finished. fetched {} messages. last message = {}".format( n, last_date)) def _get_messages(self, group, offset_id) -> Message: # https://docs.telethon.dev/en/latest/quick-references/objects-reference.html#message for m in self.client.get_messages( group, offset_id=offset_id, limit=self.config["fetch_batch_size"], reverse=True): if not m or not m.sender: continue # Media. sticker = None med = None if m.media: # If it's a sticker, get the alt value (unicode emoji). if isinstance(m.media, telethon.tl.types.MessageMediaDocument) and \ m.media.document.mime_type == "application/x-tgsticker": alt = [ a.alt for a in m.media.document.attributes if isinstance( a, telethon.tl.types.DocumentAttributeSticker) ] if len(alt) > 0: sticker = alt[0] else: med = self._get_media(m) # Message. typ = "message" if m.action: if isinstance(m.action, telethon.tl.types.MessageActionChatAddUser): typ = "user_joined" elif isinstance(m.action, telethon.tl.types.MessageActionChatDeleteUser): typ = "user_left" yield Message(type=typ, id=m.id, date=m.date, edit_date=m.edit_date, content=sticker if sticker else m.raw_text, reply_to=m.reply_to_msg_id if m.reply_to and m.reply_to.reply_to_msg_id else None, user=self._get_user(m.sender), media=med) def _get_user(self, u) -> User: tags = [] if u.bot: tags.append("bot") if u.scam: tags.append("scam") if u.fake: tags.append("fake") # Download sender's profile photo if it's not already cached. avatar = None if self.config["download_avatars"]: try: fname = self._download_avatar(u) avatar = fname except Exception as e: logging.error("error downloading avatar: #{}: {}".format( u.id, e)) return User(id=u.id, username=u.username if u.username else str(u.id), first_name=u.first_name, last_name=u.last_name, tags=tags, avatar=avatar) def _get_media(self, msg): if isinstance(msg.media, telethon.tl.types.MessageMediaWebPage) and \ not isinstance(msg.media.webpage, telethon.tl.types.WebPageEmpty): return Media(id=msg.id, type="webpage", url=msg.media.webpage.url, title=msg.media.webpage.title, description=msg.media.webpage.description if msg.media.webpage.description else None, thumb=None) elif isinstance(msg.media, telethon.tl.types.MessageMediaPhoto) or \ isinstance(msg.media, telethon.tl.types.MessageMediaDocument) or \ isinstance(msg.media, telethon.tl.types.MessageMediaContact): if self.config["download_media"]: logging.info("downloading media #{}".format(msg.id)) try: basename, fname, thumb = self._download_media(msg) return Media(id=msg.id, type="photo", url=fname, title=basename, description=None, thumb=thumb) except Exception as e: logging.error("error downloading media: #{}: {}".format( msg.id, e)) def _download_media(self, msg) -> [str, str, str]: """ Download a media / file attached to a message and return its original filename, sanitized name on disk, and the thumbnail (if any). """ # Download the media to the temp dir and copy it back as # there does not seem to be a way to get the canonical # filename before the download. fpath = self.client.download_media(msg, file=tempfile.gettempdir()) basename = os.path.basename(fpath) newname = "{}.{}".format(msg.id, self._get_file_ext(basename)) shutil.move(fpath, os.path.join(self.config["media_dir"], newname)) # If it's a photo, download the thumbnail. tname = None if isinstance(msg.media, telethon.tl.types.MessageMediaPhoto): tpath = self.client.download_media(msg, file=tempfile.gettempdir(), thumb=1) tname = "thumb_{}.{}".format( msg.id, self._get_file_ext(os.path.basename(tpath))) shutil.move(tpath, os.path.join(self.config["media_dir"], tname)) return basename, newname, tname def _get_file_ext(self, f) -> str: if "." in f: e = f.split(".")[-1] if len(e) < 6: return e return ".file" def _download_avatar(self, user): fname = "avatar_{}.jpg".format(user.id) fpath = os.path.join(self.config["media_dir"], fname) if os.path.exists(fpath): return fname logging.info("downloading avatar #{}".format(user.id)) # Download the file into a container, resize it, and then write to disk. b = BytesIO() self.client.download_profile_photo(user, file=b) im = Image.open(b) im.thumbnail(self.config["avatar_size"], Image.ANTIALIAS) im.save(fpath, "JPEG") return fname
# # # Download missing videos from gif channel print("Download missing videos from gif channel") client = TelegramClient('duplicate_checker', config['api_id'], config['api_hash']) client.start() channel_username = config['gif_channel'] channel_entity = client.get_entity(channel_username) for message in client.iter_messages(channel_entity): if message.file is None: # print(f"No file, skipping message: {message}") continue file_ext = message.file.mime_type.split("/")[-1] path = f"{dir_video}/{str(message.id).zfill(4)}.{file_ext}" if not os.path.exists(path): print("Downloading message: {}".format(message)) client.download_media(message=message, file=path) # # # Decompose missing videos print("Decompose missing videos") video_files = glob.glob(f"{dir_video}/*.mp4") for video_file in video_files: video_number = video_file.split(os.sep)[-1].split(".")[0] video_output_dir = f"{dir_images}/{video_number}/" if not os.path.exists(video_output_dir): os.mkdir(video_output_dir) print(f"Converting video: {video_file}") ff = ffmpy.FFmpeg(inputs={video_file: None}, outputs={ f"{dir_images}/{video_number}/out%d.png": "-vf fps=5 -vsync 0" })
f.write('1') f = open("Phone.txt", "w+") f.write('1') time_01 = 0 dialogs = client_01.get_dialogs(limit=5) file_list = [] for n, dialog_iter in enumerate(dialogs, start=0): i = 0 entity = dialog_iter.entity messages = client_01.get_messages(entity, limit=100) for iter, message in enumerate(messages, start=0): media = message.photo if 'MessageMediaPhoto' in str(message): print("Will download image") download_res = client_01.download_media(media) print("Download done: {}".format(download_res)) else: try: f = open("{}.txt".format(get_display_name(entity)), "a+") f.write("\n" + str(message.to_id) + "\n" + str(message.message)) if get_display_name(entity) not in file_list: file_list.append(str(get_display_name(entity))) print(file_list) except UnicodeEncodeError: continue except OSError: if '>' or '<' or '?' or '/' or '\\' or '|' in get_display_name( entity):
class Sync: """ Sync iterates and receives messages from the Telegram group to the local SQLite DB. """ config = {} db = None def __init__(self, config, session_file, db): self.config = config self.db = db self.client = TelegramClient(session_file, self.config["api_id"], self.config["api_hash"]) self.client.start() if not os.path.exists(self.config["media_dir"]): os.mkdir(self.config["media_dir"]) def sync(self, ids=None): """ Sync syncs messages from Telegram from the last synced message into the local SQLite DB. """ if ids: last_id, last_date = (ids, None) else: last_id, last_date = self.db.get_last_message_id() if ids: logging.info("fetching message id={}".format(ids)) elif last_id: logging.info("fetching from last message id={} ({})".format( last_id, last_date)) group_id = self._get_group_id(self.config["group"]) n = 0 while True: has = False for m in self._get_messages(group_id, offset_id=last_id if last_id else 0, ids=ids): if not m: continue has = True # Inser the records into DB. self.db.insert_user(m.user) if m.media: self.db.insert_media(m.media) self.db.insert_message(m) last_date = m.date n += 1 if n % 300 == 0: logging.info("fetched {} messages".format(n)) self.db.commit() if self.config["fetch_limit"] > 0 and n >= self.config[ "fetch_limit"] or ids: has = False break self.db.commit() if has: last_id = m.id logging.info( "fetched {} messages. sleeping for {} seconds".format( n, self.config["fetch_wait"])) time.sleep(self.config["fetch_wait"]) else: break self.db.commit() logging.info("finished. fetched {} messages. last message = {}".format( n, last_date)) def _get_messages(self, group, offset_id, ids=None) -> Message: # https://docs.telethon.dev/en/latest/quick-references/objects-reference.html#message for m in self.client.get_messages( group, offset_id=offset_id, limit=self.config["fetch_batch_size"], ids=ids, reverse=True): if not m or not m.sender: continue # Media. sticker = None med = None if m.media: # If it's a sticker, get the alt value (unicode emoji). if isinstance(m.media, telethon.tl.types.MessageMediaDocument) and \ hasattr(m.media, "document") and \ m.media.document.mime_type == "application/x-tgsticker": alt = [ a.alt for a in m.media.document.attributes if isinstance( a, telethon.tl.types.DocumentAttributeSticker) ] if len(alt) > 0: sticker = alt[0] elif isinstance(m.media, telethon.tl.types.MessageMediaPoll): med = self._make_poll(m) else: med = self._get_media(m) # Message. typ = "message" if m.action: if isinstance(m.action, telethon.tl.types.MessageActionChatAddUser): typ = "user_joined" elif isinstance(m.action, telethon.tl.types.MessageActionChatDeleteUser): typ = "user_left" yield Message(type=typ, id=m.id, date=m.date, edit_date=m.edit_date, content=sticker if sticker else m.raw_text, reply_to=m.reply_to_msg_id if m.reply_to and m.reply_to.reply_to_msg_id else None, user=self._get_user(m.sender), media=med) def _get_user(self, u) -> User: tags = [] is_normal_user = isinstance(u, telethon.tl.types.User) if is_normal_user: if u.bot: tags.append("bot") if u.scam: tags.append("scam") if u.fake: tags.append("fake") # Download sender's profile photo if it's not already cached. avatar = None if self.config["download_avatars"]: try: fname = self._download_avatar(u) avatar = fname except Exception as e: logging.error("error downloading avatar: #{}: {}".format( u.id, e)) return User(id=u.id, username=u.username if u.username else str(u.id), first_name=u.first_name if is_normal_user else None, last_name=u.last_name if is_normal_user else None, tags=tags, avatar=avatar) def _make_poll(self, msg): options = [{ "label": a.text, "count": 0, "correct": False } for a in msg.media.poll.answers] total = msg.media.results.total_voters if msg.media.results.results: for i, r in enumerate(msg.media.results.results): options[i]["count"] = r.voters options[i][ "percent"] = r.voters / total * 100 if total > 0 else 0 options[i]["correct"] = r.correct return Media(id=msg.id, type="poll", url=None, title=msg.media.poll.question, description=json.dumps(options), thumb=None) def _get_media(self, msg): if isinstance(msg.media, telethon.tl.types.MessageMediaWebPage) and \ not isinstance(msg.media.webpage, telethon.tl.types.WebPageEmpty): return Media(id=msg.id, type="webpage", url=msg.media.webpage.url, title=msg.media.webpage.title, description=msg.media.webpage.description if msg.media.webpage.description else None, thumb=None) elif isinstance(msg.media, telethon.tl.types.MessageMediaPhoto) or \ isinstance(msg.media, telethon.tl.types.MessageMediaDocument) or \ isinstance(msg.media, telethon.tl.types.MessageMediaContact): if self.config["download_media"]: logging.info("downloading media #{}".format(msg.id)) try: basename, fname, thumb = self._download_media(msg) return Media(id=msg.id, type="photo", url=fname, title=basename, description=None, thumb=thumb) except Exception as e: logging.error("error downloading media: #{}: {}".format( msg.id, e)) def _download_media(self, msg) -> [str, str, str]: """ Download a media / file attached to a message and return its original filename, sanitized name on disk, and the thumbnail (if any). """ # Download the media to the temp dir and copy it back as # there does not seem to be a way to get the canonical # filename before the download. fpath = self.client.download_media(msg, file=tempfile.gettempdir()) basename = os.path.basename(fpath) newname = "{}.{}".format(msg.id, self._get_file_ext(basename)) shutil.move(fpath, os.path.join(self.config["media_dir"], newname)) # If it's a photo, download the thumbnail. tname = None if isinstance(msg.media, telethon.tl.types.MessageMediaPhoto): tpath = self.client.download_media(msg, file=tempfile.gettempdir(), thumb=1) tname = "thumb_{}.{}".format( msg.id, self._get_file_ext(os.path.basename(tpath))) shutil.move(tpath, os.path.join(self.config["media_dir"], tname)) return basename, newname, tname def _get_file_ext(self, f) -> str: if "." in f: e = f.split(".")[-1] if len(e) < 6: return e return ".file" def _download_avatar(self, user): fname = "avatar_{}.jpg".format(user.id) fpath = os.path.join(self.config["media_dir"], fname) if os.path.exists(fpath): return fname logging.info("downloading avatar #{}".format(user.id)) # Download the file into a container, resize it, and then write to disk. b = BytesIO() self.client.download_profile_photo(user, file=b) im = Image.open(b) im.thumbnail(self.config["avatar_size"], Image.ANTIALIAS) im.save(fpath, "JPEG") return fname def _get_group_id(self, group): """ Syncs the Entity cache and returns the Entity ID for the specified group, which can be a str/int for group ID, group name, or a group username. The authorized user must be a part of the group. """ # Get all dialogs for the authorized user, which also # syncs the entity cache to get latest entities # ref: https://docs.telethon.dev/en/latest/concepts/entities.html#getting-entities _ = self.client.get_dialogs() try: # If the passed group is a group ID, extract it. group = int(group) except ValueError: # Not a group ID, we have either a group name or # a group username: @group-username pass try: entity = self.client.get_entity(group) except ValueError: logging.critical( "the group: {} does not exist," " or the authorized user is not a participant!".format(group)) # This is a critical error, so exit with code: 1 exit(1) return entity.id
client.sign_in(phone, input('Enter the code: ')) chats = [] groups = [] chats = client.get_dialogs() for chat in chats: try: if chat.is_group == True: groups.append(chat) except: continue print('Listing the groups...') for i in range(len(groups)): print(str(i) + '- ' + groups[i].title) target_group = groups[i] target_group = groups[int(input("\nEnter the number of the group: "))] print('Fetching members of {0}...'.format(target_group.name)) all_participants = [] all_participants = client.get_participants(target_group, aggressive=True) print("Downloading profilce pictures...") for user in all_participants: print(user.id) for photo in client.iter_profile_photos(user): client.download_media(photo, file="./{0}/{1}/{2}.jpg".format( target_group.id, user.id, photo.id))
from telethon.sync import TelegramClient api_id = 'TODO' api_hash = 'TODO' client = TelegramClient('test_session', api_id, api_hash) client.start() print(dir(client)) for message in client.get_messages('ml_progress_bot', limit=10000): client.download_media(message)
for message in messages: if int(utils.get_message_id(message)) > mid: speak=str(message.message).replace('\r', '').replace('\n', '').replace('\t', '') sender=str(utils.get_display_name(message.sender)) #对字段长度进行截断,防止数据库报错 if len(speak)>500: speak=speak[:500] if len(sender)>80: sender=sender[:80] if len(speak)!=0 and speak!='None': cursor.execute("INSERT INTO rawData VALUES(null,%s,%s,%s,%s);", (str(group_id), str(sender), str(str(message.date)[:19]),str(speak))) else: break #获取图片 photos = client.get_messages(group_id, None, filter=InputMessagesFilterPhotos) for photo in photos: filename = "./pic/" + str(group_id)+'-'+str(photo.id) + ".jpg" client.download_media(photo, filename) df['last_message_id'][idn.index(group_id)] = utils.get_message_id(messages[0]) df.to_csv('incremental.csv', index=False) db.commit() print(channel_item.title,'update commit success') except ValueError as e: print(e) except BaseException as e: print(e)
history = client( GetHistoryRequest( peer=peer, offset_id=0, offset_date=None, add_offset=0, limit=100, max_id=0, min_id=0, hash=0, ) ) for i, message in enumerate(history.messages): media = message.media if media: if args.datefrom and message.date < args.datefrom: continue timestamp = message.date.astimezone(localzone).strftime( "%Y-%m-%d-%H-%M-%S" ) path = userdir / (timestamp + f"_{i}") path_created = client.download_media(message, file=path) print(path_created) except Exception as e: with open(basedir / "errors.log", "a") as f: print(f"Something wrong with user {user}", file=f) print(e, file=f)