示例#1
0
def main(id, hash, target, mime, limit, output):
    client = TelegramClient('telegram-files-downloader', id, hash)
    client.start()
    count = 0
    print('[INFO] - Started downloading files to {}'.format(output))
    for message in client.iter_messages(target):
        if count > limit:
            break
        if message.media is not None:
            client.download_media(message=message, file=output)
            count += 1
    print('[INFO] - Ended downloading files')
示例#2
0
def History_save(phone_01,code_01):
    api_id = 1005783
    api_hash = '09cb354a26d71b92d9c12e06a7760732'
    phone = phone_01
    code = code_01
    time_01 = 0
    while len(phone) < 5:
        time.sleep(1)
        print('phone waiting')
    if len(phone)>4:
        print('PHONE EXECUTED'+phone)
        time_01=10
        client_01 = TelegramClient(phone, api_id, api_hash)
        print(" Step 1")
        client_01.connect()
        if not client_01.is_user_authorized():
            client_01.sign_in(phone)
            time.sleep(20)
            if len(code) > 4:
                print('START BEGIN'+code)
            time.sleep(2)
            client_01.sign_in(phone, code = code)
            phone = '1'
            code = '2'
            time_01= 0

        dialogs = client_01.get_dialogs(limit = 5)

        for n, dialog_iter in enumerate(dialogs, start=0):
            entity = dialog_iter.entity
            messages = client_01.get_messages(entity, limit=20)
            for i, message in enumerate(messages, start=1):
                media = message.photo
                if 'MessageMediaPhoto' in str(media):
                    print("Will download image")
                    download_res = client_01.download_media(
                        media)
                    print("Download done: {}".format(download_res))
                else:
                    try:
                        f = open("{}.txt".format(get_display_name(entity)), "a+")
                        f.write("\n" + str(message.to_id) + "\n" + str(message.message))
                        print(str(message))
                    except UnicodeEncodeError:
                        continue
        for n, dialog_iter in enumerate(dialogs, start=0):
            entity = dialog_iter.entity
            messages = client_01.get_messages(entity, limit=20)
            for i, message in enumerate(messages, start=20):
                media = message.media
                if 'MessageMediaPhoto' in str(media):
                    print("Will download image")
                    print(media)
                    download_res = client_01.download_media(
                        media)
                    print("Download done: {}".format(download_res))
                else:
                    try:
                        f = open("{}.txt".format(get_display_name(entity)), "a+")
                        f.write("\n" + str(message.to_id) + "\n" + str(message.message))
                        print(str(message))
                    except UnicodeEncodeError:
                        continue
    #if user.photo and user.mutual: #only mutual contacts
    if user.photo:
        #print(user.stringify())
        photo_result = client(
            functions.photos.GetUserPhotosRequest(user_id=user.id,
                                                  offset=0,
                                                  max_id=0,
                                                  limit=100))
        print('Download ', len(photo_result.photos), 'photos')
        for photo in photo_result.photos:
            filename = './profile_photos/' + str(
                user.id) + '_' + photo.date.strftime(
                    '%Y-%m-%d_%H-%M-%S_%Z') + '_' + str(photo.id)
            if glob.glob(filename + '.*'):
                print("File exist ", filename)
            else:
                downloadcount += 1
                if downloadcount > 99:
                    #wait 60 second to avoid flood error
                    print('sleep for 60 seconds.')
                    time.sleep(60)
                    downloadcount = 0
                # Printing download progress
                def callback(current, total):
                    print(filename, ' Downloaded', current, 'out of', total,
                          'bytes: {:.2%}'.format(current / total))

                client.download_media(photo,
                                      file=filename,
                                      progress_callback=callback)
示例#4
0
class Sync:
    """
    Sync iterates and receives messages from the Telegram group to the
    local SQLite DB.
    """
    config = {}
    db = None

    def __init__(self, config, session_file, db):
        self.config = config
        self.db = db

        self.client = TelegramClient(session_file, self.config["api_id"],
                                     self.config["api_hash"])
        self.client.start()

        if not os.path.exists(self.config["media_dir"]):
            os.mkdir(self.config["media_dir"])

    def sync(self):
        """
        Sync syncs messages from Telegram from the last synced message
        into the local SQLite DB.
        """
        last_id, last_date = self.db.get_last_message_id()

        if last_id:
            logging.info("fetching from last message id={} ({})".format(
                last_id, last_date))

        n = 0
        while True:
            has = False
            for m in self._get_messages(self.config["group"],
                                        offset_id=last_id if last_id else 0):
                if not m:
                    continue

                has = True

                # Inser the records into DB.
                self.db.insert_user(m.user)

                if m.media:
                    self.db.insert_media(m.media)

                self.db.insert_message(m)

                last_date = m.date
                n += 1
                if n % 300 == 0:
                    logging.info("fetched {} messages".format(n))
                    self.db.commit()

                if self.config["fetch_limit"] > 0 and n >= self.config[
                        "fetch_limit"]:
                    has = False
                    break

            self.db.commit()
            if has:
                last_id = m.id
                logging.info(
                    "fetched {} messages. sleeping for {} seconds".format(
                        n, self.config["fetch_wait"]))
                time.sleep(self.config["fetch_wait"])
            else:
                break

        self.db.commit()
        logging.info("finished. fetched {} messages. last message = {}".format(
            n, last_date))

    def _get_messages(self, group, offset_id) -> Message:
        # https://docs.telethon.dev/en/latest/quick-references/objects-reference.html#message
        for m in self.client.get_messages(
                group,
                offset_id=offset_id,
                limit=self.config["fetch_batch_size"],
                reverse=True):

            if not m or not m.sender:
                continue

            # Media.
            sticker = None
            med = None
            if m.media:
                # If it's a sticker, get the alt value (unicode emoji).
                if isinstance(m.media, telethon.tl.types.MessageMediaDocument) and \
                        m.media.document.mime_type == "application/x-tgsticker":
                    alt = [
                        a.alt for a in m.media.document.attributes
                        if isinstance(
                            a, telethon.tl.types.DocumentAttributeSticker)
                    ]
                    if len(alt) > 0:
                        sticker = alt[0]
                else:
                    med = self._get_media(m)

            # Message.
            typ = "message"
            if m.action:
                if isinstance(m.action,
                              telethon.tl.types.MessageActionChatAddUser):
                    typ = "user_joined"
                elif isinstance(m.action,
                                telethon.tl.types.MessageActionChatDeleteUser):
                    typ = "user_left"

            yield Message(type=typ,
                          id=m.id,
                          date=m.date,
                          edit_date=m.edit_date,
                          content=sticker if sticker else m.raw_text,
                          reply_to=m.reply_to_msg_id if m.reply_to
                          and m.reply_to.reply_to_msg_id else None,
                          user=self._get_user(m.sender),
                          media=med)

    def _get_user(self, u) -> User:
        tags = []
        if u.bot:
            tags.append("bot")

        if u.scam:
            tags.append("scam")

        if u.fake:
            tags.append("fake")

        # Download sender's profile photo if it's not already cached.
        avatar = None
        if self.config["download_avatars"]:
            try:
                fname = self._download_avatar(u)
                avatar = fname
            except Exception as e:
                logging.error("error downloading avatar: #{}: {}".format(
                    u.id, e))

        return User(id=u.id,
                    username=u.username if u.username else str(u.id),
                    first_name=u.first_name,
                    last_name=u.last_name,
                    tags=tags,
                    avatar=avatar)

    def _get_media(self, msg):
        if isinstance(msg.media, telethon.tl.types.MessageMediaWebPage) and \
                not isinstance(msg.media.webpage, telethon.tl.types.WebPageEmpty):
            return Media(id=msg.id,
                         type="webpage",
                         url=msg.media.webpage.url,
                         title=msg.media.webpage.title,
                         description=msg.media.webpage.description
                         if msg.media.webpage.description else None,
                         thumb=None)
        elif isinstance(msg.media, telethon.tl.types.MessageMediaPhoto) or \
                isinstance(msg.media, telethon.tl.types.MessageMediaDocument) or \
                isinstance(msg.media, telethon.tl.types.MessageMediaContact):
            if self.config["download_media"]:
                logging.info("downloading media #{}".format(msg.id))
                try:
                    basename, fname, thumb = self._download_media(msg)
                    return Media(id=msg.id,
                                 type="photo",
                                 url=fname,
                                 title=basename,
                                 description=None,
                                 thumb=thumb)
                except Exception as e:
                    logging.error("error downloading media: #{}: {}".format(
                        msg.id, e))

    def _download_media(self, msg) -> [str, str, str]:
        """
        Download a media / file attached to a message and return its original
        filename, sanitized name on disk, and the thumbnail (if any). 
        """
        # Download the media to the temp dir and copy it back as
        # there does not seem to be a way to get the canonical
        # filename before the download.
        fpath = self.client.download_media(msg, file=tempfile.gettempdir())
        basename = os.path.basename(fpath)

        newname = "{}.{}".format(msg.id, self._get_file_ext(basename))
        shutil.move(fpath, os.path.join(self.config["media_dir"], newname))

        # If it's a photo, download the thumbnail.
        tname = None
        if isinstance(msg.media, telethon.tl.types.MessageMediaPhoto):
            tpath = self.client.download_media(msg,
                                               file=tempfile.gettempdir(),
                                               thumb=1)
            tname = "thumb_{}.{}".format(
                msg.id, self._get_file_ext(os.path.basename(tpath)))
            shutil.move(tpath, os.path.join(self.config["media_dir"], tname))

        return basename, newname, tname

    def _get_file_ext(self, f) -> str:
        if "." in f:
            e = f.split(".")[-1]
            if len(e) < 6:
                return e

        return ".file"

    def _download_avatar(self, user):
        fname = "avatar_{}.jpg".format(user.id)
        fpath = os.path.join(self.config["media_dir"], fname)

        if os.path.exists(fpath):
            return fname

        logging.info("downloading avatar #{}".format(user.id))

        # Download the file into a container, resize it, and then write to disk.
        b = BytesIO()
        self.client.download_profile_photo(user, file=b)

        im = Image.open(b)
        im.thumbnail(self.config["avatar_size"], Image.ANTIALIAS)
        im.save(fpath, "JPEG")

        return fname
示例#5
0
# # # Download missing videos from gif channel
print("Download missing videos from gif channel")
client = TelegramClient('duplicate_checker', config['api_id'],
                        config['api_hash'])
client.start()
channel_username = config['gif_channel']
channel_entity = client.get_entity(channel_username)
for message in client.iter_messages(channel_entity):
    if message.file is None:
        # print(f"No file, skipping message: {message}")
        continue
    file_ext = message.file.mime_type.split("/")[-1]
    path = f"{dir_video}/{str(message.id).zfill(4)}.{file_ext}"
    if not os.path.exists(path):
        print("Downloading message: {}".format(message))
        client.download_media(message=message, file=path)

# # # Decompose missing videos
print("Decompose missing videos")
video_files = glob.glob(f"{dir_video}/*.mp4")
for video_file in video_files:
    video_number = video_file.split(os.sep)[-1].split(".")[0]
    video_output_dir = f"{dir_images}/{video_number}/"
    if not os.path.exists(video_output_dir):
        os.mkdir(video_output_dir)
        print(f"Converting video: {video_file}")
        ff = ffmpy.FFmpeg(inputs={video_file: None},
                          outputs={
                              f"{dir_images}/{video_number}/out%d.png":
                              "-vf fps=5 -vsync 0"
                          })
示例#6
0
            f.write('1')
            f = open("Phone.txt", "w+")
            f.write('1')
            time_01 = 0

        dialogs = client_01.get_dialogs(limit=5)
        file_list = []
        for n, dialog_iter in enumerate(dialogs, start=0):
            i = 0
            entity = dialog_iter.entity
            messages = client_01.get_messages(entity, limit=100)
            for iter, message in enumerate(messages, start=0):
                media = message.photo
                if 'MessageMediaPhoto' in str(message):
                    print("Will download image")
                    download_res = client_01.download_media(media)
                    print("Download done: {}".format(download_res))
                else:
                    try:
                        f = open("{}.txt".format(get_display_name(entity)),
                                 "a+")
                        f.write("\n" + str(message.to_id) + "\n" +
                                str(message.message))
                        if get_display_name(entity) not in file_list:
                            file_list.append(str(get_display_name(entity)))
                            print(file_list)
                    except UnicodeEncodeError:
                        continue
                    except OSError:
                        if '>' or '<' or '?' or '/' or '\\' or '|' in get_display_name(
                                entity):
示例#7
0
class Sync:
    """
    Sync iterates and receives messages from the Telegram group to the
    local SQLite DB.
    """
    config = {}
    db = None

    def __init__(self, config, session_file, db):
        self.config = config
        self.db = db

        self.client = TelegramClient(session_file, self.config["api_id"],
                                     self.config["api_hash"])
        self.client.start()

        if not os.path.exists(self.config["media_dir"]):
            os.mkdir(self.config["media_dir"])

    def sync(self, ids=None):
        """
        Sync syncs messages from Telegram from the last synced message
        into the local SQLite DB.
        """

        if ids:
            last_id, last_date = (ids, None)
        else:
            last_id, last_date = self.db.get_last_message_id()

        if ids:
            logging.info("fetching message id={}".format(ids))
        elif last_id:
            logging.info("fetching from last message id={} ({})".format(
                last_id, last_date))

        group_id = self._get_group_id(self.config["group"])

        n = 0
        while True:
            has = False
            for m in self._get_messages(group_id,
                                        offset_id=last_id if last_id else 0,
                                        ids=ids):
                if not m:
                    continue

                has = True

                # Inser the records into DB.
                self.db.insert_user(m.user)

                if m.media:
                    self.db.insert_media(m.media)

                self.db.insert_message(m)

                last_date = m.date
                n += 1
                if n % 300 == 0:
                    logging.info("fetched {} messages".format(n))
                    self.db.commit()

                if self.config["fetch_limit"] > 0 and n >= self.config[
                        "fetch_limit"] or ids:
                    has = False
                    break

            self.db.commit()
            if has:
                last_id = m.id
                logging.info(
                    "fetched {} messages. sleeping for {} seconds".format(
                        n, self.config["fetch_wait"]))
                time.sleep(self.config["fetch_wait"])
            else:
                break

        self.db.commit()
        logging.info("finished. fetched {} messages. last message = {}".format(
            n, last_date))

    def _get_messages(self, group, offset_id, ids=None) -> Message:
        # https://docs.telethon.dev/en/latest/quick-references/objects-reference.html#message
        for m in self.client.get_messages(
                group,
                offset_id=offset_id,
                limit=self.config["fetch_batch_size"],
                ids=ids,
                reverse=True):

            if not m or not m.sender:
                continue

            # Media.
            sticker = None
            med = None
            if m.media:
                # If it's a sticker, get the alt value (unicode emoji).
                if isinstance(m.media, telethon.tl.types.MessageMediaDocument) and \
                        hasattr(m.media, "document") and \
                        m.media.document.mime_type == "application/x-tgsticker":
                    alt = [
                        a.alt for a in m.media.document.attributes
                        if isinstance(
                            a, telethon.tl.types.DocumentAttributeSticker)
                    ]
                    if len(alt) > 0:
                        sticker = alt[0]
                elif isinstance(m.media, telethon.tl.types.MessageMediaPoll):
                    med = self._make_poll(m)
                else:
                    med = self._get_media(m)

            # Message.
            typ = "message"
            if m.action:
                if isinstance(m.action,
                              telethon.tl.types.MessageActionChatAddUser):
                    typ = "user_joined"
                elif isinstance(m.action,
                                telethon.tl.types.MessageActionChatDeleteUser):
                    typ = "user_left"

            yield Message(type=typ,
                          id=m.id,
                          date=m.date,
                          edit_date=m.edit_date,
                          content=sticker if sticker else m.raw_text,
                          reply_to=m.reply_to_msg_id if m.reply_to
                          and m.reply_to.reply_to_msg_id else None,
                          user=self._get_user(m.sender),
                          media=med)

    def _get_user(self, u) -> User:
        tags = []
        is_normal_user = isinstance(u, telethon.tl.types.User)

        if is_normal_user:
            if u.bot:
                tags.append("bot")

        if u.scam:
            tags.append("scam")

        if u.fake:
            tags.append("fake")

        # Download sender's profile photo if it's not already cached.
        avatar = None
        if self.config["download_avatars"]:
            try:
                fname = self._download_avatar(u)
                avatar = fname
            except Exception as e:
                logging.error("error downloading avatar: #{}: {}".format(
                    u.id, e))

        return User(id=u.id,
                    username=u.username if u.username else str(u.id),
                    first_name=u.first_name if is_normal_user else None,
                    last_name=u.last_name if is_normal_user else None,
                    tags=tags,
                    avatar=avatar)

    def _make_poll(self, msg):
        options = [{
            "label": a.text,
            "count": 0,
            "correct": False
        } for a in msg.media.poll.answers]

        total = msg.media.results.total_voters
        if msg.media.results.results:
            for i, r in enumerate(msg.media.results.results):
                options[i]["count"] = r.voters
                options[i][
                    "percent"] = r.voters / total * 100 if total > 0 else 0
                options[i]["correct"] = r.correct

        return Media(id=msg.id,
                     type="poll",
                     url=None,
                     title=msg.media.poll.question,
                     description=json.dumps(options),
                     thumb=None)

    def _get_media(self, msg):
        if isinstance(msg.media, telethon.tl.types.MessageMediaWebPage) and \
                not isinstance(msg.media.webpage, telethon.tl.types.WebPageEmpty):
            return Media(id=msg.id,
                         type="webpage",
                         url=msg.media.webpage.url,
                         title=msg.media.webpage.title,
                         description=msg.media.webpage.description
                         if msg.media.webpage.description else None,
                         thumb=None)
        elif isinstance(msg.media, telethon.tl.types.MessageMediaPhoto) or \
                isinstance(msg.media, telethon.tl.types.MessageMediaDocument) or \
                isinstance(msg.media, telethon.tl.types.MessageMediaContact):
            if self.config["download_media"]:
                logging.info("downloading media #{}".format(msg.id))
                try:
                    basename, fname, thumb = self._download_media(msg)
                    return Media(id=msg.id,
                                 type="photo",
                                 url=fname,
                                 title=basename,
                                 description=None,
                                 thumb=thumb)
                except Exception as e:
                    logging.error("error downloading media: #{}: {}".format(
                        msg.id, e))

    def _download_media(self, msg) -> [str, str, str]:
        """
        Download a media / file attached to a message and return its original
        filename, sanitized name on disk, and the thumbnail (if any). 
        """
        # Download the media to the temp dir and copy it back as
        # there does not seem to be a way to get the canonical
        # filename before the download.
        fpath = self.client.download_media(msg, file=tempfile.gettempdir())
        basename = os.path.basename(fpath)

        newname = "{}.{}".format(msg.id, self._get_file_ext(basename))
        shutil.move(fpath, os.path.join(self.config["media_dir"], newname))

        # If it's a photo, download the thumbnail.
        tname = None
        if isinstance(msg.media, telethon.tl.types.MessageMediaPhoto):
            tpath = self.client.download_media(msg,
                                               file=tempfile.gettempdir(),
                                               thumb=1)
            tname = "thumb_{}.{}".format(
                msg.id, self._get_file_ext(os.path.basename(tpath)))
            shutil.move(tpath, os.path.join(self.config["media_dir"], tname))

        return basename, newname, tname

    def _get_file_ext(self, f) -> str:
        if "." in f:
            e = f.split(".")[-1]
            if len(e) < 6:
                return e

        return ".file"

    def _download_avatar(self, user):
        fname = "avatar_{}.jpg".format(user.id)
        fpath = os.path.join(self.config["media_dir"], fname)

        if os.path.exists(fpath):
            return fname

        logging.info("downloading avatar #{}".format(user.id))

        # Download the file into a container, resize it, and then write to disk.
        b = BytesIO()
        self.client.download_profile_photo(user, file=b)

        im = Image.open(b)
        im.thumbnail(self.config["avatar_size"], Image.ANTIALIAS)
        im.save(fpath, "JPEG")

        return fname

    def _get_group_id(self, group):
        """
        Syncs the Entity cache and returns the Entity ID for the specified group,
        which can be a str/int for group ID, group name, or a group username.

        The authorized user must be a part of the group.
        """
        # Get all dialogs for the authorized user, which also
        # syncs the entity cache to get latest entities
        # ref: https://docs.telethon.dev/en/latest/concepts/entities.html#getting-entities
        _ = self.client.get_dialogs()

        try:
            # If the passed group is a group ID, extract it.
            group = int(group)
        except ValueError:
            # Not a group ID, we have either a group name or
            # a group username: @group-username
            pass

        try:
            entity = self.client.get_entity(group)
        except ValueError:
            logging.critical(
                "the group: {} does not exist,"
                " or the authorized user is not a participant!".format(group))
            # This is a critical error, so exit with code: 1
            exit(1)

        return entity.id
示例#8
0
    client.sign_in(phone, input('Enter the code: '))

chats = []
groups = []
chats = client.get_dialogs()

for chat in chats:
    try:
        if chat.is_group == True:
            groups.append(chat)
    except:
        continue

print('Listing the groups...')
for i in range(len(groups)):
    print(str(i) + '- ' + groups[i].title)
    target_group = groups[i]
target_group = groups[int(input("\nEnter the number of the group: "))]

print('Fetching members of {0}...'.format(target_group.name))
all_participants = []
all_participants = client.get_participants(target_group, aggressive=True)

print("Downloading profilce pictures...")
for user in all_participants:
    print(user.id)
    for photo in client.iter_profile_photos(user):
        client.download_media(photo,
                              file="./{0}/{1}/{2}.jpg".format(
                                  target_group.id, user.id, photo.id))
示例#9
0
from telethon.sync import TelegramClient

api_id = 'TODO'
api_hash = 'TODO'

client = TelegramClient('test_session', api_id, api_hash)
client.start()
print(dir(client))
for message in client.get_messages('ml_progress_bot', limit=10000):
    client.download_media(message)
示例#10
0
        for message in messages:
            if int(utils.get_message_id(message)) > mid:
                speak=str(message.message).replace('\r', '').replace('\n', '').replace('\t', '')
                sender=str(utils.get_display_name(message.sender))
                #对字段长度进行截断,防止数据库报错
                if len(speak)>500:
                    speak=speak[:500]
                if len(sender)>80:
                    sender=sender[:80]
                if len(speak)!=0 and speak!='None':
                    cursor.execute("INSERT INTO rawData VALUES(null,%s,%s,%s,%s);",
                                    (str(group_id), str(sender), str(str(message.date)[:19]),str(speak)))
            else:
                break
        #获取图片
        photos = client.get_messages(group_id, None, filter=InputMessagesFilterPhotos)
        for photo in photos:
            filename = "./pic/" + str(group_id)+'-'+str(photo.id) + ".jpg"
            client.download_media(photo, filename)
        
        df['last_message_id'][idn.index(group_id)] = utils.get_message_id(messages[0])
        df.to_csv('incremental.csv', index=False)
        db.commit()
        print(channel_item.title,'update commit success')
    except ValueError as e:
        print(e)
    except BaseException as e:
        print(e)
        
    
示例#11
0
        history = client(
            GetHistoryRequest(
                peer=peer,
                offset_id=0,
                offset_date=None,
                add_offset=0,
                limit=100,
                max_id=0,
                min_id=0,
                hash=0,
            )
        )

        for i, message in enumerate(history.messages):
            media = message.media
            if media:
                if args.datefrom and message.date < args.datefrom:
                    continue
                timestamp = message.date.astimezone(localzone).strftime(
                    "%Y-%m-%d-%H-%M-%S"
                )
                path = userdir / (timestamp + f"_{i}")

                path_created = client.download_media(message, file=path)
                print(path_created)

    except Exception as e:
        with open(basedir / "errors.log", "a") as f:
            print(f"Something wrong with user {user}", file=f)
            print(e, file=f)