async def send_media(message: types.Message): try: looter = PostLooter(message.text, get_videos=True) edges = looter.info['edge_media_to_caption']['edges'] except (ValueError, KeyError): await message.answer(emojize(ERROR_MESSAGE)) return media = types.MediaGroup() for m in looter.medias(): for link in get_links(m, looter): if '.mp4' in link: media.attach_video(link) else: media.attach_photo(link) await message.answer_media_group(media=media) try: description = edges[0]['node']['text'] await message.answer(description) except IndexError: await message.answer('<i>Описание отсутствует.</i>', parse_mode=types.ParseMode.HTML)
def scrape_post_from_link(given_link): # scrape post from a given link looter = PostLooter(given_link) post_list = [] for post_info in looter.medias(): post_dict = {} comment_users = [] comment_texts = [] comment_users_verified = [] comment_texts_processed = [] for i in post_info['edge_media_to_parent_comment']['edges']: comment_users += [i['node']['owner']['username']] comment_texts += [i['node']['text']] comment_texts_processed += [process_text(i['node']['text'])] comment_users_verified += [i['node']['owner']['is_verified']] try: caption = post_info['edge_media_to_caption']['edges'][0]['node'][ 'text'] except Exception: caption = "" if caption: post_dict['hashtags_cap'] = re.findall(r"#(\w+)", caption) post_dict['mentions_cap'] = re.findall(r"@(\w+)", caption) else: post_dict['hashtags_cap'] = [] post_dict['mentions_cap'] = [] post_dict['shortcode'] = post_info['shortcode'] post_dict['photo_url'] = post_info['display_url'] post_dict['comment_users'] = comment_users post_dict['comment_texts'] = comment_texts post_dict['comment_users_verified'] = comment_users_verified post_dict['comment_texts_processed'] = comment_texts_processed post_dict['caption'] = caption post_dict['post_url'] = _baseurl + str(post_info['shortcode']) post_dict['post_id'] = post_info['id'] post_dict['user_name'] = post_info['owner']['username'] post_dict['user_id'] = post_info['owner']['id'] post_dict['user_full_name'] = post_info['owner']['full_name'] post_dict['user_verified'] = post_info['owner']['is_verified'] post_dict['user_private'] = post_info['owner']['is_private'] post_dict['user_profile_pic_url'] = post_info['owner'][ 'profile_pic_url'] post_dict['user_post_count'] = post_info['owner'][ 'edge_owner_to_timeline_media']['count'] post_dict['is_ad'] = post_info['is_ad'] post_dict['is_video'] = post_info['is_video'] post_dict['location'] = post_info['location'] post_dict['timestamp'] = post_info['taken_at_timestamp'] post_dict['datetime'] = datetime.fromtimestamp( post_info['taken_at_timestamp']) post_dict['comments_disabled'] = post_info['comments_disabled'] post_dict['likes'] = post_info['edge_media_preview_like']['count'] post_dict['comments'] = post_info['edge_media_to_parent_comment'][ 'count'] tagged_usernames = [] tagged_user_full_name = [] tagged_user_verified = [] for i in post_info['edge_media_to_tagged_user']['edges']: tagged_usernames += [i['node']['user']['username']] tagged_user_full_name += [i['node']['user']['full_name']] tagged_user_verified += [i['node']['user']['is_verified']] post_dict['tagged_usernames'] = tagged_usernames post_dict['tagged_user_full_name'] = tagged_user_full_name post_dict['tagged_user_verified'] = tagged_user_verified try: ploot = PostLooter(post_dict['post_url']) ploot.download('instaLooter_images/temp/') img = cv2.imread('instaLooter_images/temp/' + post_info['id'] + ".jpg") text = pytesseract.image_to_string(img) post_dict['image_text'] = text post_dict['hashtags_img'] = re.findall(r"#(\w+)", text) post_dict['mentions_img'] = re.findall(r"@(\w+)", text) text = process_text(text) post_dict['image_text_processed'] = text except Exception as e: post_dict['image_text'] = "" post_dict['hashtags_img'] = "" post_dict['mentions_img'] = "" post_dict['image_text_processed'] = "" post_list += [post_dict] return post_list