def test_too_many_requests(self): responses.add( responses.POST, "https://api.vk.com/method/execute", json={"response": { "count": 6000, "items": [], }}, status=200, ) start = time.time() with patch("vkapi.wall.get_posts_1000") as get_posts_1000: get_posts_1000.return_value = [] _ = get_wall_execute(domain="cs102py", count=6000) end = time.time() self.assertGreaterEqual(end - start, 2.0, msg="Слишком много запросов в секунду")
def test_total_count(self): expected_items = [ { "id": 1, "from_id": 1234, "owner_id": 1234, "date": 1234567890, "text": "some message", } ] responses.add( responses.POST, "https://api.vk.com/method/execute", json={ "response": { "count": 1, "items": expected_items, } }, status=200, ) wall = get_wall_execute(domain="cs102py", count=1) self.assertIsInstance( wall, pd.DataFrame, msg="Функция должна возвращать DataFrame, используйте json_normalize", ) self.assertEqual( expected_items, wall.to_dict("records"), msg="Вы должны сделать один запрос, чтобы узнать общее число записей", ) resp_body = unquote(responses.calls[0].request.body) self.assertTrue( '"count":"1"' in resp_body or '"count":+"1"' in resp_body, msg="Вы должны сделать один запрос, чтобы узнать общее число записей", )
def example(): posts = get_wall_execute(domain="rbc", count=5000, max_count=1000, progress=tqdm) stopwords = list(map(str.strip, open("stop_words.txt"))) text_no_urls = map(preprocessing.replace.replace_urls, posts.text.dropna().to_list()) text_no_punct = map(preprocessing.remove_punctuation, text_no_urls) text_no_emojis = map(preprocessing.replace.replace_emojis, text_no_punct) text_no_white_space = map(preprocessing.normalize.normalize_whitespace, text_no_emojis) docs = map(str.split, text_no_white_space) docs = [[word.lower() for word in doc if word not in stopwords] for doc in docs] docs = [[word for word in doc if word not in stopwords] for doc in docs] dictionary = Dictionary(docs) corpus = list(dictionary.doc2bow(text) for text in docs) ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=15) vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary) pyLDAvis.show(vis)
def my_realization(): posts = get_wall_execute(domain="rbc", count=5000, max_count=1000, progress=tqdm) morph = pymorphy2.MorphAnalyzer() stopwords = list(map(str.strip, open("stop_words.txt"))) texts = [] for text in posts.text.dropna().to_list(): text = re.sub(r"[^\w\s]", "", text) emoji_pattern = re.compile( "[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) "]+", flags=re.UNICODE, ) text = emoji_pattern.sub(r"", text) text = re.sub(r"http\S+", "", text) text = " ".join( morph.parse(word.lower())[0].normal_form.lower() for word in text.split()) text.replace(" ", " ") text.replace("\n", "") res = [[w.lower()] for w in text.split() if not w.lower() in stopwords] texts += res dictionary = Dictionary(texts) corpus = list(dictionary.doc2bow(t) for t in texts) ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=15) vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary) pyLDAvis.show(vis)