Python Headline примеры использования

Язык программирования: Python

Пространство имен/Пакет: headline

Класс/Тип: Headline

Примеров на hotexamples.com: 12

Python Headline - 12 примеров найдено. Это лучшие примеры Python кода для headline.Headline, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Headline(12)

Основные методы

Headline (12)

Пример #1

Показать файл

Файл: cbc.py Проект: sunnypurewal/newscrawler

 def parse(self, response):
     jsonresponse = json.loads(response.body_as_unicode())
     for item in jsonresponse:
         headline = Headline()
         headline["title"] = item["title"]
         headline["title2"] = item["typeAttributes"]["deck"]
         headline["description"] = item["description"]
         headline["url"] = item["typeAttributes"]["url"]
         headline["imgurl"] = item["typeAttributes"]["imageLarge"]
         headline["tags"] = item["typeAttributes"]["urlSlug"].split("-")
         headline["score"] = item["typeAttributes"]["trending"][
             "numViewers"]
         headline["timestamp"] = item["updatedAt"]
         headline["id"] = item["id"]
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)
     self.page += 1
     if self.page < 10:
         yield scrapy.Request(url=self.HOST.format(self.page),
                              meta={"dont_cache": self.dont_cache})

Пример #2

Показать файл

 def parse(self, response):
     posts = response.css("article.post")
     for post in posts:
         headline = Headline()
         headline["title"] = post.css(".entry-title").xpath(
             "a/text()").get()
         headline["title2"] = response.css("article.post").css(
             ".entry-content::text").get().strip()
         headline["url"] = post.css(".entry-title").xpath("a/@href").get()
         imgsrc = post.css("figure.thumbnail").xpath("a/img/@src").get()
         imgsrc2 = post.css("figure.thumbnail").xpath(
             "a/img/@pm-lazy-src").get()
         if imgsrc is not None and imgsrc.find(
                 "data:") == -1 and imgsrc.find("http") != -1:
             headline["imgurl"] = imgsrc
         elif imgsrc2 is not None and imgsrc2.find("http") != -1:
             headline["imgurl"] = imgsrc2
         headline["id"] = post.xpath("@data-event-tracking").get().split(
             "|")[-2]
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)
     self.page += 1
     yield scrapy.Request(url=self.HOST.format(self.page),
                          meta={"dont_cache": self.dont_cache})

Пример #3

Показать файл

Файл: globe.py Проект: sunnypurewal/newscrawler

 def parse(self, response):
     html = json.loads(response.body_as_unicode())["rendering"]
     res = scrapy.http.HtmlResponse(url=response.url,
                                    body=html,
                                    encoding="utf-8")
     stories = res.css("div.c-card>a")
     for story in stories:
         url = f"{self.HOST}{story.xpath('@href').get()}"
         id = url.split("/")[-2]
         author = story.css("span.c-card__author::text").get()
         title = story.css("div.c-card__hed-text::text").get()
         imgurl = story.css("img.c-image").xpath("@src").get()
         headline = Headline({
             "id": id,
             "url": url,
             "author": author,
             "title": title,
             "imgurl": imgurl
         })
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)
     self.last_id += 10
     if self.last_id <= 80:
         yield scrapy.Request(url=self.FETCH_HOST.format(self.last_id),
                              meta={"dont_cache": self.dont_cache})

Пример #4

Показать файл

def get_headlines(url):
    """
    Gets headlines from http://www.newsapi.org

    :param url: url
    :return: array of headlines
    """
    headlines = []

    req = urllib.request.Request(url)
    response = urllib.request.urlopen(req).read().decode('utf8')
    r = json.loads(response)

    prev_published_at = str(datetime.now()).split(" ")[0]

    for re in r['articles']:

        if str(re['publishedAt']) == 'None':
            published_at = prev_published_at
        else:
            published_at = "" + str(re['publishedAt']).split('T')[0]
            prev_published_at = published_at

        h = Headline(re['title'].split('\n')[0], re['url'], re['source']['id']
                     or re['source']['name'], published_at, re['urlToImage'])
        headlines.append(h)

    return headlines

Пример #5

Показать файл

def classify_headlines(headlines, classifier):
    features = [bait_features(Headline(headline)) for headline in headlines]
    label_list = []
    for feat in features:
        label_list.append(classifier.classify(feat))
    bait_count = label_list.count('bait')
    return bait_count / len(label_list)

Пример #6

Показать файл

Файл: global.py Проект: sunnypurewal/newscrawler

 def parse(self, response):
   stories = response.css("div.story")
   for story in stories:
     headline = Headline()
     article = story.css("article")
     a = story.css("h3.story-h").xpath("a")
     headline["url"] = a.xpath("@href").get()
     headline["title"] = a.xpath("text()").get()
     headline["title2"] = article.css("div.story-txt").css("p::text").get()
     headline["id"] = story.xpath("@data-post_id").get()
     headline["imgurl"] = article.css("img.story-img").xpath("@src").get()
     self.last_id = int(headline["id"])
     if self.should_get_article(headline["id"]):
       yield scrapy.Request(url=headline["url"],meta={"dont_cache":False,"headline":headline},callback=self.parse_body)
   url = self.HOST.format(self.last_id)
   yield scrapy.Request(url=url,meta={"dont_cache":self.dont_cache})

Пример #7

Показать файл

 def parse(self, response):
     stories = response.css("article.post")
     for story in stories:
         headline = Headline()
         headline["url"] = story.css(".entry-title>a").xpath("@href").get()
         headline["title"] = story.css(".entry-title>a::text").get()
         data = json.loads(story.xpath("@data-evt-val").get())
         headline["id"] = data["story"]["id"] if data["story"][
             "id"] else headline["url"].split("/")[-1]
         headline["imgurl"] = story.css(
             "img.attachment-post-thumbnail").xpath("@src").get()
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)

Пример #8

Показать файл

Файл: webScraperAdapter.py Проект: johSchm/webScraper

 def extract_headlines(self):
     """ Gather all headline information from the passed html code and return them as a list.
     :return list of headlines
     """
     self.headlines.clear()
     result_block = self.raw_html.find_all('div', attrs={'class': 'g'})
     for result in result_block:
         link = result.find('a', href=True)
         title = result.find('h3')
         description = result.find('span', attrs={'class': 'st'})
         if link and title:
             link = link['href']
             title = title.get_text()
             if description:
                 description = description.get_text()
             if link != '#':
                 self.headlines.append(Headline(title, link, description))
     return self.headlines

Пример #9

Показать файл

Файл: star.py Проект: sunnypurewal/newscrawler

 def parse(self, response):
     stories = response.css(".story")
     for story in stories:
         headline = Headline()
         a = story.xpath("div[@class='story__body']/span/span/a")
         url = f"https://www.thestar.com{a.xpath('@href').get()}"
         title = a.xpath("span[@class='story__headline']/text()").get()
         title2 = a.xpath("p[@class='story__abstract']/text()").get()
         headline["url"] = url
         headline["title"] = title
         headline["title2"] = title2
         headline["id"] = url.split("/")[-1].split(".")[0]
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)

Пример #10

Показать файл

Файл: macleans.py Проект: sunnypurewal/newscrawler

 def parse(self, response):
     stories = response.css("article.post")
     for story in stories:
         headline = Headline()
         headline["url"] = story.css("div.row>div.text>header>a").xpath(
             "@href").get()
         headline["title"] = story.css("div.row>div.text>header>a").xpath(
             "@title").get()
         headline["title2"] = story.css(
             "div.row>div.text>header>a>div.excerpt>p::text").get()
         headline["id"] = story.xpath("@id").get().split("-")[-1]
         headline["imgurl"] = story.css("img").xpath("@data-src").get()
         if self.should_get_article(headline["id"]):
             yield scrapy.Request(url=headline["url"],
                                  meta={
                                      "dont_cache": False,
                                      "headline": headline
                                  },
                                  callback=self.parse_body)
     self.page += 1
     if self.page <= 10:
         yield scrapy.Request(url=self.HOST.format(self.page),
                              meta={"dont_cache": self.dont_cache})

Пример #11

Показать файл

def create_headlines():
    headline_tuples = pickle.load(open('headlines.p', 'rb'))
    headlines = [Headline(h[0], h[1]) for h in headline_tuples]
    random.shuffle(headlines)
    return headlines

Пример #12

Показать файл

Файл: neighborsclassifier.py Проект: chianobi/nlp-final-slam

def create_predictable_list(headline_string):
    predictor_as_object = Headline(headline_string, "none")
    predictable_values = []
    predictable_values.append(create_x_vals(predictor_as_object))
    return predictable_values