Exemplo n.º 1
0
def storage(number, title, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.hupu.voice",
        "newsType": "news",
        "title": title,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "虎扑",
    }
    print(dicts)
Exemplo n.º 2
0
def storage(number, title, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.legaldaily.www",
        "newsType": "news",
        "title": title,
        "release_time": timeout,
        "create_time": seconds(timeout, exist=True),
        "format_text": format_text,
        "source": "法制网",
    }
    print(dicts)
Exemplo n.º 3
0
def storage(number, data, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.qq.news",
        "newsType": "news",
        "title": data["title"],
        "release_time": data["pubtime"],
        "create_time": seconds(data["pubtime"]),
        "format_text": format_text,
        "source": "腾讯",
    }
    print(dicts)
Exemplo n.º 4
0
def storage(number, title, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.chinanews.channel",
        "newsType": "news",
        "title": title,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "中国新闻网",
    }
    print(dicts)
Exemplo n.º 5
0
def storage(message, timeout, format_text):
    dicts = {
        "source_id": int(message["contentid"]),
        "source_url": "com.dianyingjie.www",
        "newsType": "news",
        "title": message["title"],
        "release_time": timeout,
        "create_time": seconds(timeout, exist=True),
        "format_text": format_text,
        "source": "电影网",
    }
    print(dicts)
Exemplo n.º 6
0
def storage(number, message, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.chinanews.channel",
        "newsType": "news",
        "title": message["title"],
        "release_time": message["pubtime"],
        "create_time": seconds(message["pubtime"], exist=True),
        "format_text": format_text,
        "source": "中国新闻网",
    }
    print(dicts)
Exemplo n.º 7
0
def storage(number, title, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "cn.taiwan.www",
        "newsType": "news",
        "title": title,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "中国台湾网",
    }
    print(dicts)
Exemplo n.º 8
0
def storage(message, format_text):
    dicts = {
        "source_id": int(message["guid"]),
        "source_url": "cn.haiwainet.opa",
        "newsType": "news",
        "title": message["title"],
        "release_time": message["pubtime"],
        "create_time": seconds(message["pubtime"]),
        "format_text": format_text,
        "source": "海外网",
    }
    print(dicts)
Exemplo n.º 9
0
def storage(message, format_text):
    dicts = {
        "source_id": int(message["articleid"]),
        "source_url": "com.tvoao.www",
        "newsType": "news",
        "title": message["title"],
        "release_time": message["showtime"],
        "create_time": seconds(message["showtime"]),
        "format_text": format_text,
        "source": "中广互联",
    }
    print(dicts)
Exemplo n.º 10
0
def storage(message, format_text):
    dicts = {
        "source_id": message["id"],
        "source_url": "com.cctv.news",
        "newsType": "news",
        "title": message["title"],
        "release_time": message["dateTime"],
        "create_time": seconds(message["dateTime"], exist=True),
        "format_text": format_text,
        "source": "央视网",
    }
    print(dicts)
Exemplo n.º 11
0
def storage(number, title, author, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.techweb.www",
        "newsType": "news",
        "title": title,
        "author": author,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "TechWeb",
    }
    print(dicts)
Exemplo n.º 12
0
def storage(number, title, author, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.xinhuanet.www",
        "newsType": "news",
        "title": title,
        "author": author,
        "release_time": timeout,
        "create_time": seconds(timeout, exist=True),
        "format_text": format_text,
        "source": "新华网",
    }
    print(dicts)
Exemplo n.º 13
0
def storage(number, title, author, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.enorth.news",
        "newsType": "news",
        "title": title,
        "authoe": author,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "北方网",
    }
    print(dicts)
Exemplo n.º 14
0
def storage(message, format_text):
    dicts = {
        "source_id": int(message["DocID"]),
        "source_url": "com.xinhuanet.www",
        "newsType": "news",
        "title": message["Title"],
        "author": message["Author"],
        "release_time": message["PubTime"],
        "create_time": seconds(message["PubTime"]),
        "format_text": format_text,
        "source": "新华网",
    }
    print(dicts)
Exemplo n.º 15
0
def storage(number, title, timeout, format_text):
    dicts = {
        "source_id": number,
        "source_url": "com.workercn.www",
        "newsType": "news",
        "title": title,
        "release_time": timeout,
        "create_time": seconds(timeout),
        "format_text": format_text,
        "source": "中工网",
    }
    print(dicts)
    # storageDatabase(dicts)


# timestmap = time.strptime(timeout + ":00", "%Y/%m/%d %H:%M:%S")
#     timeout = time.strftime("%Y-%m-%d %H:%M:%S", timestmap)
Exemplo n.º 16
0
def download(html, number):
    pattern = re.compile('<video')
    exist = re.findall(pattern, html)
    if not exist:
        return
    pattern = re.compile('<div class="newscontent"[\s\S]*?<div id="one"')
    data = re.findall(pattern, html)
    if data:
        data = data[0]
    else:
        return
    pattern_source = re.compile('来源:澎湃新闻')
    source = re.findall(pattern_source, data)
    if not source:
        return
    pattern_title = re.compile('(<h1 class="news_title">)([\s\S]*?)(</h1>)')
    title = re.findall(pattern_title, data)[0][1]
    pattern_time = re.compile('\d+-\d+-\d+ \d+:\d+')
    timeout = re.findall(pattern_time, data)[0]
    create_time = seconds(timeout, exist=True)
    pattern_author = re.compile('(>责任编辑:)([\s\S]*?)(<)')
    author = re.findall(pattern_author, data)[0][1]
    pattern_text = re.compile(
        '(<div class="news_txt"[\s\S]*?>)([\s\S]*?)(<audio)')
    format_text = re.findall(pattern_text, data)
    if format_text:
        format_text = format_text[0][1]
    else:
        pattern_text = re.compile(
            '(<div class="news_txt"[\s\S]*?>)([\s\S]*?)(</div>[\s\S]*?<div class="go_to_topic">)'
        )
        format_text = re.findall(pattern_text, data)[0][1]
    pattern = re.compile('<div[\s\S]*?>')
    format_text = re.sub(pattern, "<p>", format_text)
    pattern = re.compile('</div>')
    format_text = re.sub(pattern, "</p>", format_text)
    storage(number, title, author, timeout, create_time, format_text, source)