Python News.title примеры использования

Язык программирования: Python

Пространство имен/Пакет: Model.news

Класс/Тип: News

Метод/Функция: title

Примеров на hotexamples.com: 9

Python News.title - 9 примеров найдено. Это лучшие примеры Python кода для Model.news.News.title, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

News(15)

spider_name(10)

title(9)

date(7)

url(7)

source_url(4)

id(2)

source(2)

tag(2)

content(1)

keyword(1)

keywords(1)

news_date(1)

Пример #1

Показать файл

Файл: jingji.py Проект: NickLeeCoder/ttj

    def parse_item(self, response, url):

        try:
            title = (response.xpath('//h2[@class="titl"]/text()'))[0].strip()
        except Exception as e:
            title = '未知'

        try:
            date = (response.xpath('//p[@class="Wh"]/span[1]/text()')
                    )[0].strip().split()[0]
            date = str(arrow.get(date)).split('T')[0]
        except Exception as e:
            date = '未知'

        try:
            con_list = response.xpath('//div[@class="detailCont"]/p')
            content = self.pasre_content(con_list)
        except Exception as e:
            content = '未知'

        item = News()
        item.title = title
        item.date = date
        item.content = content
        item.url = url
        item.spider_name = 'jingji'

        return item

Пример #2

Показать файл

    def parser_item(self, item):
        news = News()
        news.spider_name = 'amac'
        news.url = self.parser_url(
            item.xpath('./@href')[0], 'http://www.amac.org.cn')
        news.title = item.xpath('./text()')[0]

        self.newslist.append(news)

Пример #3

Показать файл

Файл: mohurd.py Проект: NickLeeCoder/ttj

 def parser_item(self, item):
     news = News()
     news.spider_name = 'mohurd'
     news.url = item.xpath('./@href')[0]
     news.title = item.xpath('./text()')[0]
     news.date = item.getparent().getnext().xpath(
         './text()')[0][1:-1].replace('.', '-').strip()
     self.newslist.append(news)

Пример #4

Показать файл

Файл: get_toutiao_news_byapi.py Проект: wangshen520/CrawlerAndDataAnalysis

def toutiao_news_api(url):
    # 代理服务器
    proxyHost = "http-dyn.abuyun.com"
    proxyPort = "9020"

    # 代理隧道验证信息
    proxyUser = "******"
    proxyPass = "******"

    proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
        "host": proxyHost,
        "port": proxyPort,
        "user": proxyUser,
        "pass": proxyPass,
    }

    proxies = {
        "http": proxyMeta,
        "https": proxyMeta,
    }
    ua = UserAgent(verify_ssl=False)
    headers = {
        'cookie':
        'tt_webid=6825236887406953998; s_v_web_id=verify_ka17kc91_J51hfIgB_1Ujy_4F87_AQ77_v44SCeaZdYbb; WEATHER_CITY=%E5%8C%97%E4%BA%AC; __tasessionId=ftj73c94a1589124278466; tt_webid=6825236887406953998; csrftoken=3bc73a541ff3c196706a5fa652baa10a; ttcid=93c87bb6d2c44204a824c060f2a0344b39; SLARDAR_WEB_ID=167cd898-158d-4682-84b7-515f808f9c49; tt_scid=nvrgh8BUDb5bfXypX.EbNgFcMiVjrSr7vdwnPAab2w2tEn2I8DLcdmqRb2aAGGvT6b9b',
        'user-agent': ua.random,
        'x-requested-with': 'XMLHttpRequest'
    }
    toutiao_data = requests.get(url, headers=headers, proxies=proxies).text
    global data
    data = json.loads(toutiao_data)
    global max_behot_time
    max_behot_time = data['next']['max_behot_time']
    items = data['data']

    news_list = []
    link_head = 'http://toutiao.com'

    for n in items:
        if 'title' in n and n['tag'] != 'ad' and n['tag'] != 'news_media':
            news = News()
            news.title = n['title']
            print(news.title)
            news.tag = n['tag']
            news.source = n['source']
            # 转换成localtime
            time_local = time.localtime(n['behot_time'])
            # 转换成新的时间格式(2016-05-05 20:28:54)
            dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
            news.news_date = dt
            print(news.news_date)
            news.source_url = link_head + n['source_url']

            news_list.append(news)
            #print(news.title, news.source_url, news.source, news.keyword, news.keywords)

    return news_list

Пример #5

Показать файл

Файл: csrc.py Проект: NickLeeCoder/ttj

    def parser_item(self, item):
        url = item.xpath('./li[@class="mc"]/div/a/@href')[0]
        date = item.xpath('./li[@class="fbrq"]/text()')[0]

        news = News()
        news.spider_name = 'csrc'
        news.url = self.parser_url(url, 'http://www.csrc.gov.cn/pub/zjhpublic')
        news.title = item.xpath('./li[@class="mc"]/div/a/text()')[0]
        news.date = arrow.get(date).format('YYYY-MM-DD')

        # log(news.url, news.title, news.date)
        self.newslist.append(news)

Пример #6

Показать файл

    def parser_item(self, item):
        url = item.xpath('./a/@href')[0]
        date = item.xpath('./span/text()')[0]

        news = News()
        news.spider_name = 'circ'
        news.url = self.parser_url(url, 'http://www.gov.cn')
        news.title = item.xpath('./a/text()')[0]
        news.date = date

        # log(news.url, news.title, news.date)

        self.newslist.append(news)

Пример #7

Показать файл

Файл: circ.py Проект: NickLeeCoder/ttj

    def parser_item(self, item):
        url = item.xpath('./a/@href')[0]
        if 'search' in url:
            return

        date = item.getnext().xpath('./text()')[0][1:-1]

        news = News()
        news.spider_name = 'circ'
        news.url = self.parser_url(url, 'http://www.circ.gov.cn')
        news.title = item.xpath('./a/text()')[0]
        news.date = date

        # log(news.url, news.title, news.date)

        self.newslist.append(news)

Пример #8

Показать файл

    def get_html(self, url):
        html = requests.get(url, headers=self.get_news_header())
        html.encoding = 'utf-8'
        html = etree.HTML(html.text)
        items = html.xpath('//a[@class="STYLE8"]')

        for item in items:
            news = News()
            news.spider_name = 'cbrc'
            news.url = item.xpath('./@href')[0]
            news.title = item.xpath('./@title')[0]
            news.date = item.getparent().getnext().xpath('./text()')[0].strip()

            self.newslist.append(news)

        return self.parser_url(self.newslist)

Пример #9

Показать файл

Файл: get_toutiao_news_bykeyword.py Проект: wangshen520/CrawlerAndDataAnalysis

def keyword_search(keyword):

    source_url_list = select_source_url_returnset()

    url = 'http://www.toutiao.com/search_content/?offset=0&format=json&keyword= ' + keyword + '&autoload=true&count=200&cur_tab=1'

    toutiao_data = requests.get(url).text

    data = json.loads(toutiao_data)
    items = data['data']

    news_list = []
    link_head = 'http://toutiao.com'

    for n in items:
        if 'title' in n:
            news = News()
            news.title = n['title']
            news.tag = n['tag']
            news.source = n['source']
            news.source_url = link_head + n['source_url']
            # 两会关键词
            news.keyword = keyword
            # 今日头条自带关键词
            news.keywords = n['keywords']

            #如果已经存在source_url则跳过
            if news.source_url in source_url_list:
                print('数据库已有该记录！')
                continue

            print('新添加记录：', news.title)
            news_list.append(news)
            # print(news.title, news.source_url, news.source, news.keyword, news.keywords)

    return news_list