Python CnnMiddleEastArticles примеры использования

Язык программирования: Python

Пространство имен/Пакет: articles.items

Класс/Тип: CnnMiddleEastArticles

Примеров на hotexamples.com: 3

Python CnnMiddleEastArticles - 3 примера найдено. Это лучшие примеры Python кода для articles.items.CnnMiddleEastArticles, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CnnMiddleEastArticles(3)

Основные методы

CnnMiddleEastArticles (3)

Пример #1

Показать файл

Файл: cnn_business.py Проект: moustafa360/ArabicNewsArticles

    def parse_details(self, response):
        var = CnnMiddleEastArticles()
        list_content, final_output = [], []

        var["title"] = response.css(
            "h1._2JPm2UuC56::text").extract_first().strip()
        summary = response.css(
            "div.clearfix.wysiwyg._2A-9LYJ7eK p strong::text").extract_first(
            ).replace("\n", " ")
        if summary is None:
            summary = response.css(
                "div.clearfix.wysiwyg._2A-9LYJ7eK p strong span::text"
            ).extract_first().replace("\n", " ")
        summary = summary.partition(
            "(CNN)")[2] if "(CNN)" in summary else summary
        var["summary"] = summary
        for i in response.css(
                "div.clearfix.wysiwyg._2A-9LYJ7eK p:nth-child(n+2)"):
            list_content.append("".join(
                i.xpath('descendant-or-self::text()').extract()))
            temp1 = [i.replace("\n", " ") for i in list_content]
            temp2 = [i.replace("\r", " ") for i in temp1]
            final_output = [i.replace("\xa0", " ") for i in temp2]
        var["article_content"] = final_output
        yield var

Пример #2

Показать файл

 def parse_details(self, response):
     list_content = []
     middle_east = CnnMiddleEastArticles()
     middle_east["title"] = response.css("div.sna_content_head_cont h1.sna_content_heading::text").extract_first() \
         .strip()
     for i in response.css(
             "div.article-body div#firstBodyDiv > p:nth-child(n+1)"):
         list_content.append("".join(
             i.xpath('descendant-or-self::text()').extract()))
         middle_east["article_content"] = list_content
         middle_east["tags"] = response.css(
             "div.article-tags.noprint div a h2::text").extract()
     if middle_east["article_content"] and len(
             middle_east["tags"]) > 1:  # we need more than 2 tags at least!
         yield middle_east

Пример #3

Показать файл

Файл: middleeast.py Проект: moustafa360/ArabicNewsArticles

    def parse_details(self, response):
        var = CnnMiddleEastArticles()
        var["title"] = response.css("h1._2JPm2UuC56::text").extract_first().strip()
        # for the content of articles i will have to cover all the structure of the webpages

        try:
            temp = [i.rstrip() for i in response.css("div.wysiwyg p:not(div.first-child)::text").extract()]
            if len(temp) != 1:  # if it has "" only!
                var["article_content"] = self.clear_input(temp)

            elif not var["article_content"] and len(temp) != 1:
                temp = [i.rstrip() for i in response.css("div.wysiwyg p:not(:first-child) > strong > span > span > span"
                                                         " > span > span > span > span > span::text").extract()]
                var["article_content"] = self.clear_input(temp)

            elif not var["article_content"] and len(temp) != 1:
                temp = [i.rstrip() for i in
                        response.css("div.wysiwyg p:not(:first-child) > span > span > span > span > span > span > span "
                                     "> span::text").extract()]
                var["article_content"] = self.clear_input(temp)

            elif not var["article_content"] and len(temp) != 1:
                temp = [i.rstrip() for i in
                        response.css("div.wysiwyg p:not(:first-child)> span > span > span > span > span > "
                                     "span:nth-child(3) > span > span::text").extract()]
                var["article_content"] = self.clear_input(temp)

            elif not var["article_content"] and len(temp) != 1:
                temp = [i.rstrip() for i in
                        response.css("div.wysiwyg p:not(:first-child)> span > span > span > span > span > "
                                     "span:nth-child(2) > span > span::text").extract()]
                var["article_content"] = self.clear_input(temp)

            elif not var["article_content"] and len(temp) != 1:
                temp = [i.rstrip() for i in response.css("div.wysiwyg._2A-9LYJ7eK p::text").extract()]
                var["article_content"] = self.clear_input(temp)
            else:
                var["article_content"] = "you did not cover this case."

            var["tags"] = [i.strip() for i in response.css("ul.AsCeVPiOdE li a::text").extract()]
            if var["tags"] and var["article_content"]:  # do not save any article that has neither tag nor content!
                yield var

        except KeyError as e:
            print(e)