Exemplo n.º 1
0
def jfinfo2(root, item):
    time_text = "".join(root.xpath("//div[@class='t-tit']/span[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='t-context f16 picture']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 2
0
def ybzhan1(root, item):
    time_text = "".join(root.xpath("//span[@class='time']/text()")[0])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='newsContent']/div/text()"))
    item.set('content', content)
Exemplo n.º 3
0
def cheaa1(root, item):
    time_text = "".join(root.xpath("//div[@id='NewsInfo']/text()"))
    time_text = "".join(time_text.strip().split()[:2])
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='ctrlfscont']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 4
0
def tom1(root, item):
    time_text = "".join(root.xpath("//span[@class='infor_time']/text()"))
    time_text = time_text.strip()
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='news_box_text']/p/text()"))
    item.set('content', content)
Exemplo n.º 5
0
def zgny1(root, item):
    time_text = "".join(root.xpath("//p[@class='xinXi']/text()"))
    time_text = "".join(time_text.split()[3:5])
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='wenZi_02']/p/text()"))
    item.set('content', content)
Exemplo n.º 6
0
def cri1(root, item):
    time_text = "".join(root.xpath("//span[@id='acreatedtime']/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='abody']/p/text()"))
    item.set('content', content)
Exemplo n.º 7
0
def xinhua2(root, item):
    time_text = "".join("//div[@class='source']/span[@class='time']")
    time_text = time_text.strip()
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='p-detail']/p/text()"))
    item.set('content', content)
Exemplo n.º 8
0
def machine3651(root, item):
    time_text = "".join(root.xpath("//div[@class='newliIn_Sti']/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@class='newliIn_Zti']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 9
0
def takungpao1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='tkp_con_author']/span[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='tkp_content']//text()"))
    item.set('content', content)
Exemplo n.º 10
0
def p21jingji1(root, item):
    date_text = "".join(root.xpath("//p[@class='Wh']/span[1]/text()"))
    time_text = "".join(root.xpath("//span[@class='hour']/text()"))
    time_text = date_text + " " + time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='detailCont']/p/text()"))
    item.set('content', content)
Exemplo n.º 11
0
def people1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='box01']/div[@class='fl']/text()"))
    time_text = "".join(time_text.split()[:1])
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='rwb_zw']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 12
0
def aweb1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='newsLeft newscontentB']/h5/text()")[0])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='conT f14px'][1]/p/text()"))
    item.set('content', content)
Exemplo n.º 13
0
def smm1(root, item):
    time_text = "".join(
        root.xpath("//p[@class='news-tips']/label[1]/span/text()"))
    time_text = "".join(time_text.split()[:2])
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='news-main']/p/text()"))
    item.set('content', content)
Exemplo n.º 14
0
def china5e1(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='showtitle']/div[@class='showtitinfo']/text()"))
    time_text = time_text.split()[0]
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='showcontent']/div[1]/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 15
0
def texnet1(root, item):
    time_text = "".join(root.xpath("//p[@class='line22 fontgrey']/text()"))
    time_text = " ".join(time_text.split()[1:3])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='detail-text line25 font14px']/div/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 16
0
def tnc1(root, item):
    time_text = "".join(root.xpath("//div[@class='article-title']/p/text()"))
    time_text = " ".join(time_text.split()[:2])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='article-content c-mb-20']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 17
0
def chinaventure1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='details_01_l']/span[3]/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='content_01 m_t_30 detasbmo']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 18
0
def chinabreed1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='artInfo']/span[@id='pub_date']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(
        root.xpath(
            "//div[@class='blkContainerSblk']/div[@id='artibody']/div/text()"))
    item.set('content', content)
Exemplo n.º 19
0
def ftchinese1(root, item):
    # print "root", root
    if root is None:
        # print "warning: root is None"
        return
    time_text = "".join(root.xpath("//span[@class='story-time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='story-body-container']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 20
0
def jrj2(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='titmain']/p[@class='inftop']/span[1]/text()"))
    time_text = time_text.strip()
    item.set('timestamp', my_utils.get_timestamp(time_text))
    item.set(
        'content', "".join(
            root.xpath(
                "//div[@class='titmain']/div[@class='texttit_m1']/p/text()")))
Exemplo n.º 21
0
def qq1(root, item):
    """  """
    time_text = "".join(
        root.xpath("//div[@class='a_Info']/span[@class='a_time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    item.set(
        'content', "".join(
            root.xpath(
                ".//div[contains(@id,'Cnt-Main-Article-QQ')]/p[not(contains(@style,'COLOR: red'))]//text()"
            )))
Exemplo n.º 22
0
def p21food1(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='news_detail_t']/div[@class='ws_det_p2']/i[1]/text()"
        ))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath(
            "//div[@class='news_detail_t']/div[@class='ws_det_p3']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 23
0
def dayoo1(root, item):
    time_text = "".join(root.xpath("//span[@class='time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='text_content']/p/text()"))
    item.set('content', content)
Exemplo n.º 24
0
def bbtnews1(root, item):
    time_text = "".join(root.xpath("//div[@class='info']/span/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='pageContent']/p/text()"))
    item.set('content', content)
Exemplo n.º 25
0
def lswb1(root, item):
    time_text = "".join(root.xpath("//ol[@class='breadcrumb']/li[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='article-content']/p/text()"))
    item.set('content', content)
Exemplo n.º 26
0
def banyuetan1(root, item):
    time_text = "".join(root.xpath("//div[@class='detail_tit_time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='detail_content']/p/text()"))
    item.set('content', content)
Exemplo n.º 27
0
def sohu3(root, item):
    time_text = "".join(root.xpath("//span[@id='news-time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//article[@id='mp-editor']/p/text()"))
    item.set('content', content)
Exemplo n.º 28
0
def iresearch1(root, item):
    time_text = "".join(root.xpath("//div[@class='origin']/em/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@class='m-article']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
Exemplo n.º 29
0
def gongkong1(root, item):
    time_text = "".join(root.xpath("//span[@class='f1405'][3]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='article']/p/text()"))
    item.set('content', content)
Exemplo n.º 30
0
def carnoc1(root, item):
    time_text = "".join(root.xpath("//span[@id='pubtime_baidu']/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='newstext']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)