def get_post_comments(post, num=None, path=None): agent = Agent() comments = set() pointer = None if num == None: comments_count = int(post.comments_count) else: comments_count = num limit = 50 batch_num = math.ceil(comments_count / limit) for i in range(batch_num): if i == batch_num - 1: count = comments_count - limit * (batch_num - 1) batch_comments, pointer = agent.get_comments(post, pointer=pointer, count=count) else: batch_comments, pointer = agent.get_comments(post, pointer=pointer, count=limit) for j, item in enumerate(batch_comments): comments.add(Comment(item.id)) comments_info = {} for i, item in enumerate(comments): comment_info = copy.copy(item) comment_info.media = str(comment_info.media) comment_info.owner = str(comment_info.owner) comments_info[i] = comment_info.__dict__ comments_dict = {"comments": comments_info} comments_json = json.dumps(comments_dict, indent=2) if path == None: path = './data' pathlib.Path(path + '/comments').mkdir(parents=True, exist_ok=True) postcode = post.code filename = path + '/comments/' + postcode + '__last_comments.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(comments_json) return comments
def test_get_comments_long(count, shortcode): anon = Agent() media = Media(shortcode) data, pointer = anon.get_comments(media, count=count) assert (min(media.comments_count, count) == len(data)) assert ((pointer is None) == (media.likes_count <= count)) Media.clear_cache()
def test_get_comments_pointer(count, shortcode): anon = Agent() media = Media(shortcode) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_comments(media, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (media.likes_count <= count)) Media.clear_cache()
all_medias.append(media) if loaded: break all_comments = {} all_comments_id = [] vip_comments_nick = [] vip_comments_id = [] for i, media in enumerate(all_medias): # if i > 10: # break loaded = False print(datetime.now().strftime("%H:%M:%S"), 'Запись', str(i), 'из', str(len(all_medias))) try: comments, pointer = agent.get_comments(media) except Exception as e: print(datetime.now().strftime("%H:%M:%S"), 'Сбой') comments, pointer = agent.get_comments(media) for comment in comments: all_comments[comment.id] = comment all_comments_id.append(comment.id) if comment.owner.login == vip: if comment.text.find('@') > -1: for j in range(len(comment.text.split('@')) - 1): vip_comments_nick.append( comment.text.split('@')[j + 1].split(' ')[0]) vip_comments_id.append(comment.id) sleep(round(random() * 5) + 2) while pointer: try: