Пример #1
0
 def worker(news):
     html = urlOpen.get_html(news.url)
     print(str(news.pk) + "     ", end='\n')
     if html:
         text = textParser.get_text_from_html(html)
         url_list = [url for url in aParser.get_a_from_news_text(news_url=news.url, text=text)]
         text = aParser.remove_all_tags(text)
         text = text_prerparer.text_preparer(text)
         return NewsText(news=news, text=text), url_list
Пример #2
0
def parse_news(n=None):

    for news in News.objects.filter(is_parsed=False)[:n].iterator():
        print(str(news.id) + "     ", end='\n')
        html = urlOpen.get_html(news.url)  # 0.19 - 2.5 s
        if html:
            text = textParser.get_text_from_html(html)  # 0.0099 - 0.026 s
            NewsText.objects.create(news=news, text=text)
            news.is_parsed = True
            news.save()  # 0.004 with atomic and 0.23 without
Пример #3
0
def parse_news(n=None):

    for news in News.objects.filter(is_parsed=False)[:n].iterator():
        print(str(news.id) + "     ", end='\n')
        html = urlOpen.get_html(news.url)  # 0.19 - 2.5 s
        if html:
            text = textParser.get_text_from_html(html)  # 0.0099 - 0.026 s
            NewsText.objects.create(news=news, text=text)
            news.is_parsed = True
            news.save()  # 0.004 with atomic and 0.23 without
Пример #4
0
 def worker(news):
     html = urlOpen.get_html(news.url)
     print(str(news.pk) + "     ", end='\n')
     if html:
         text = textParser.get_text_from_html(html)
         url_list = [
             url for url in aParser.get_a_from_news_text(news_url=news.url,
                                                         text=text)
         ]
         text = aParser.remove_all_tags(text)
         text = text_prerparer.text_preparer(text)
         return NewsText(news=news, text=text), url_list
Пример #5
0
def worker(input_q: JoinableQueue, output: Queue):
    from django import db
    db.connection.close()
    while True:
        task = input_q.get()
        if task == "end":
            break
        html = urlOpen.get_html(task.url)
        if html:
            text = textParser.get_text_from_html(html)
        input_q.task_done()
        # info()
        output.put(task.url)
    print("exit")
Пример #6
0
 def worker(news):
     print(str(news.id) + "     ", end='\n')
     html = urlOpen.get_html(news.url)
     if html:
         text = textParser.get_text_from_html(html)
         return NewsText(news=news, text=text)
Пример #7
0
 def worker(news):
     print(str(news.id) + "     ", end='\n')
     html = urlOpen.get_html(news.url)
     if html:
         text = textParser.get_text_from_html(html)
         return NewsText(news=news, text=text)