示例#1
0
 def db_prepare(cls):
     """
     为新浪新闻建库建表,暂时考虑2018年
     :return:
     """
     MysqlHelper.create_database('sina_news')
     MysqlHelper.create_news_table_mid('sina_news', 'sina_mid')
 def save_one_news(cls, url, db_name, table_name):
     news_info = WangyiNewsExtractor.news_info_by_url(url)
     if news_info:
         MysqlHelper.insert_news_mid(db_name=db_name,
                                     table_name=table_name,
                                     news_title=news_info["title"],
                                     pub_time=news_info["time"],
                                     content=news_info["content"],
                                     news_src="网易",
                                     news_link=news_info["url"],
                                     category=news_info["category"],
                                     topic='',
                                     summary='',
                                     polarity=news_info["polarity"])
示例#3
0
 def save_old_news_day(cls, date):
     news_list = SinaNewsBrief.fetch_news_by_day(date)
     print 'news_list_get'
     count = 0
     for news_block in news_list:
         try:
             news_url = news_block['url']
             news_content = Html2Article.url2article(news_url)
             page_content = requests.get(news_url).content
             news_keyword = SinaNewsExtract.extract_keywords(page_content)
             polarity = SentiCalc.score_calc(news_block["title"])
             MysqlHelper.insert_news_mid('sina_news', 'sina_mid', news_block['title'], news_block['time'], news_content, '新浪', news_block['url'], news_block['category'], news_keyword, '', polarity=str(polarity))
             print count, polarity, news_block['title']
             count += 1
         except Exception, e:
             print e
 def news_analyze(cls, query_body):
     """
     单篇新闻分析,按url精准取新闻内容,从mysql里取,其次从网络抓取
     :param query_body:
     :return:
     """
     news_content = ""
     news_url = query_body["news_url"]
     news_src = query_body["news_src"]
     src_dict = {
         "新浪": "sina_news.sina_mid",
         "网易": "wangyi_news.wangyi_mid",
         "腾讯": "qq_news.qq_mid",
         "搜狐": "souhu_news.souhu_mid",
         "新华": "xinhua_new.xinhua_mid"
     }
     try:
         sql_statement = "select * from %s where news_link='%s';" % (
             src_dict[news_src], news_url)
         conn = MysqlHelper.create_conn()
         cur = conn.cursor()
         cur.execute(sql_statement)
         mysql_res = cur.fetchall()
         if len(mysql_res) > 0:
             news_content = mysql_res[0][3]
         else:
             # 数据库搜不到,从网络爬,用新华网的session来抓
             news_content = Html2Article.url2article(news_url)
     except Exception, e:
         # 数据库搜不到,从网络爬,用新华网的session来抓
         news_content = Html2Article.url2article(news_url)
示例#5
0
# -*- coding:utf-8 -*-
import sys
import requests
import pymysql
from elasticsearch import Elasticsearch
from model.db_operate.mysql_helper import MysqlHelper
reload(sys)
sys.setdefaultencoding('utf-8')

# 从mysql数据库全量导入到elastcisearch
conn = MysqlHelper.create_conn()
cur = conn.cursor()

es = Elasticsearch()

db_table_list = [
    "qq_news.qq_mid", "sina_news.sina_mid", "souhu_news.souhu_mid",
    "wangyi_news.wangyi_mid", "xinhua_news.xinhua_mid"
]
for db_table in db_table_list:
    sql_statement = "select * from %s;" % db_table
    cur.execute(sql_statement)
    res = cur.fetchall()

    for news in res:
        try:
            title = news[1]
            pub_time = str(news[2]).split()[0]
            news_content = news[3]
            src = news[4]
            url = news[5]
示例#6
0
# -*- coding:utf-8 -*-
import sys
from model.db_operate.mysql_helper import MysqlHelper
import pymysql
reload(sys)
sys.setdefaultencoding('utf-8')

# create db and tables
if __name__ == "__main__":
    MysqlHelper.create_database('souhu_news')
    MysqlHelper.create_news_table_mid('souhu_news', 'souhu_mid')
    MysqlHelper.create_database('qq_news')
    MysqlHelper.create_news_table_mid('qq_news', 'qq_mid')
    MysqlHelper.create_database('wangyi_news')
    MysqlHelper.create_news_table_mid('wangyi_news', 'wangyi_mid')
    MysqlHelper.create_database('xinhua_news')
    MysqlHelper.create_news_table_mid('xinhua_news', 'xinhua_mid')
    MysqlHelper.create_database('sina_news')
    MysqlHelper.create_news_table_mid('sina_news', 'sina_mid')
    print "db and table created!"