def main(): now = datetime.datetime.now() current_time = now.strftime("%Y-%m-%d %H:%M:%S") one_day = now - datetime.timedelta(days=1) one_day_ago = one_day.strftime('%Y-%m-%d %H:%M:%S')[:16] history_day = (now - datetime.timedelta(days=10)).strftime('%Y-%m-%d %H:%M:%S')[:16] conn = None try: conn = get_conn() with conn: sql_max_date = """ SELECT max(CASE WHEN news_source='新浪财经' THEN news_date END), max(CASE WHEN news_source='华尔街见闻' THEN news_date END) FROM news_cj """ res = execute_select(conn, sql_max_date) max_date_sina = res[0][0] if res[0][0] else one_day_ago max_date_news = res[0][1] if res[0][1] else one_day_ago sql_delete = """ DELETE FROM news_cj WHERE news_date <= %s OR (news_source='华尔街见闻' AND news_date=%s) OR (news_source='新浪财经' AND news_date=%s) """ execute_sql(conn, sql_delete, (history_day, max_date_news, max_date_sina)) get_news(conn, max_date_news, current_time) get_sina_news(conn, max_date_sina, current_time) except Exception as e: logger.error(str(e)) finally: if conn: conn.close() sys.exit()
def deal(): remv_his_log() last_mon = (now - datetime.timedelta(days=20)).strftime("%Y-%m-%d %H:%M:%S") str_his_dtm = (now - datetime.timedelta(days=60)).strftime("%Y-%m-%d %H:%M:%S") logger.debug('last_mon:%s, str_his_dtm:%s ' % (last_mon, str_his_dtm)) conn = None try: conn = get_conn() with conn: del_his_info(conn, str_his_dtm) # 查找出关键字 sql_key_word = " SELECT word FROM infos.hot_keyword " re_key_word = execute_select(conn, sql_key_word) key_word_list = [] for key_word in re_key_word: key_word_list.append(key_word[0]) # 查找网站地址 sql_url_link = " SELECT url, url_nm FROM infos.url_link " re_url_link = execute_select(conn, sql_url_link) url_list = [] for url in re_url_link: url_list.append(url[0]) # 查找相关百家号 sql_bj_num = " SELECT bj_num FROM infos.url_link WHERE bj_num>'' " re_bj_num = execute_select(conn, sql_bj_num) bj_num_list = [] for bj_num in re_bj_num: bj_num_list.append(bj_num) key_word_srch(conn, last_mon, key_word_list, url_list, bj_num_list) except Exception as e: logger.error(str(e)) finally: if conn: conn.close()
def remv_repeat_info(conn, sql_params): """ 判断是否有重复再进行插入 :param conn: :param sql_params: :return: """ sql_rept = " SELECT url_nm FROM infos.rslt_url WHERE url_nm=%s " rept_re = execute_select(conn, sql_rept, (sql_params[0], )) if len(rept_re) == 0: logger.debug('插入sql:%s' % sql_info) logger.debug('数据参数:%s' % sql_params) execute_sql(conn, sql_info, sql_params)
def read_comment(conn): logger.info('读取数据库中数据...read_comment') film_critics = [] sql_select = "SELECT * FROM db_movie" params = '100' result = execute_select(conn, sql_select) print(result) for res in result: logger.debug(res) comment = res[2] if comment: film_critics.append(comment) return film_critics
def read_comment(conn): logger.info('读取数据库中数据...read_comment') sina_li = [] comment_li = [] user_li = [] sql_select = "SELECT * FROM sina_comment" params = '1000' result = execute_select(conn, sql_select) logger.debug(result) for res in result: logger.debug(res) if res not in sina_li: sina_li.append([res[0], res[1], res[2], res[3], res[4]]) user_name = res[1] user_li.append(user_name) comment = res[3] if comment: comment_li.append(comment) return sina_li, comment_li, user_li
def spider_Item(self, item, spider): try: with conn: sql_repeat = """ select * from public.db_movie where user_name=%s """ print('piplines') res = execute_select(conn, sql_repeat, item['user_name']) if not res[0]: sql_insert = """ INSERT INTO public.db_movie(user_name, comment_time, film_critics) VALUES(%s, %s, %s) """ execute_sql(conn, sql_insert, item) print('增加数据') else: pass finally: if conn: conn.close()