def newspage(request): info = ts.get_latest_news(top=2, show_content=True) newstmp1 = models.News.objects.order_by('-pk')[0] newstmp2 = models.News.objects.order_by('-pk')[1] news = models.News() news.title = info.title[0].__str__() news.content = info.content[0].__str__() if news.title != newstmp1.title and news.title != newstmp2.title: news.save() news = models.News() news.title = info.title[1].__str__() news.content = info.content[1].__str__() if news.title != newstmp1.title and news.title != newstmp2.title: news.save() news = models.News.objects.order_by('-pk') news1 = news[0] news2 = news[1] news3 = news[2] news = news[3:] return render(request, 'news.html', { 'news': news, 'news1': news1, 'news2': news2, 'news3': news3 })
def Newsget(self): #PDnews=ts.get_latest_news(top=self.newslength,show_content=self.show_c) PDnews = ts.get_latest_news(top=self.newslength, show_content=False) try: if PDnews.ix[0, 'time'] != self.latesttime: Newsmemory = pandas.concat([self.Newsmemory, PDnews], axis=0) #按行合并 #self.latesttime=PDnews.ix[0,'time'] if self.show_c: try: Content = None C_C = 0 while not Content and C_C < 5: Content = ts.latest_content(PDnews.ix[0, 'url']) C_C += 1 except: print('latest_content api fail to load url:%s' % PDnews.ix[0, 'url']) Content = '' #self.latestnew=PDnews.ix[0,'classify']+PDnews.ix[0,'title']+Content self.latestnew = PDnews.ix[0, 'title'] + Content else: #self.latestnew=PDnews.ix[0,'classify']+PDnews.ix[0,'title'] #maybe content later self.latestnew = PDnews.ix[0, 'title'] print(PDnews[['classify', 'title', 'time']]) if PDnews.ix[0, 'time']: print('old latesttime %s' % self.latesttime) self.latesttime = PDnews.ix[0, 'time'] print('new latesttime %s' % self.latesttime) except: print('Get Latest News Error')
def get_news(): timer = Timer(24 * 60 * 60, get_news) timer.start() session = db.session() old = session.query(News).all() for one in old: session.delete(one) session.commit() session.close() session = db.session() info = tushare.get_latest_news(top=30, show_content=True) for i in range(30): news = News(info.title[i].__str__(), info.content[i].__str__(), info.time[i].__str__()) session.add(news) session.commit() session.close()
def get_latest_news(self,topN): newslist = ts.get_latest_news(topN) latest_news = () i = 0 for record in newslist: print record.title[i] return latest_news
def store_news(): # 获取新浪股吧的最新新闻 sina_news = ts.guba_sina(show_content=True) # 防止数据未更新插入重复数据 2018/04/22 0:33 # TODO 集合为空时的容错处理 for i in db.news.sina.find().sort([("_id", -1)]).limit(1): for j in sina_news[:6][-1:].get("title"): if i.get("title") != j: db.news.sina.insert( json.loads(sina_news[:6].to_json(orient='records'))) # 获取前6条最新的即时新闻 immediate_news = ts.get_latest_news(top=6, show_content=True) for i in db.news.immediate.find().sort([("_id", -1)]).limit(1): for j in immediate_news[-1:].get("title"): if i.get("title") != j: db.news.immediate.insert( json.loads(immediate_news.to_json(orient='records'))) # 获取个股信息地雷数据 mines_news = ts.get_notices() if not mines_news is None: db.news.mines.insert(json.loads(mines_news.to_json(orient='records')))
def get_latest_news(): col = db['sina_finance'] df = ts.get_latest_news(show_content=True) for _, row in df.iterrows(): if row['content'] is None: continue col.replace_one({ 'url': row['url'], }, { 'classify': row['classify'], 'title': row['title'], 'time': row['time'], 'year': 2018, 'url': row['url'], 'content': [ c.strip() for c in row['content'].split('\n') if len(c.strip()) > 0 ], 'crawl_time': datetime.datetime.fromtimestamp(time.time()), }, upsert=True)
def news_real(engine): tbl = "news_real" cnt = 0 tsl.log(tbl + " start...") df = ts.get_latest_news() if df is None: tsl.log("no latest news") return st = datetime.datetime.today() et = st - datetime.timedelta(hours=2) st = '%02i-%02i %02i:00' % (st.month, st.day, st.hour) et = '%02i-%02i %02i:00' % (et.month, et.day, et.hour) df = df[df.time >= et] df = df[df.time < st] urls = df.url.values contents = ['' for i in range(len(df))] for i in range(len(df)): if 'blog.sina.com.cn' in urls[i]: continue try: content = ts.latest_content(urls[i]) if content is not None: contents[i] = unicode(content)#.encode('raw_unicode_escape').decode('utf8') cnt += 1 except BaseException, e: print e print urls[i]
def nlpHandle(nlp, mic, profile, wxbot=None): """ Arguments: text -- user-input, typically transcribed speech mic -- used to interact with the user (for both input and output) profile -- contains information related to the user (e.g., phone number) wxbot -- wechat bot instance """ def get_news(data): rs = [] for index, row in data.iterrows(): rs.append(str(index + 1)) for col_name in data.columns: rs.append(row[col_name]) rs.append('\n') return ' '.join(rs) sys.path.append(mic.dingdangpath.LIB_PATH) from app_utils import wechatUser text = nlp['text'] if any(word in text for word in [u"财经新闻"]): news = ts.get_latest_news(top=10, show_content=False) t = mic.asyncSay("已获取财经新闻," + ('将发送到您的微信' if wxbot != None else "篇幅较长,请登录微信获取")) if wxbot != None: wechatUser(profile, wxbot, '财经新闻', get_news(news)) t.join() elif any(word in text for word in [u"信息地雷"]): orgName = None code = None items = nlp['items'] for item in items: if item['ne'] == u'ORG': orgName = item['item'] break if orgName: code = db.get_instance().get_stock_code(orgName) else: m = pattern.search(text) if m: code = m.group(2) if code: orgName = db.get_instance().get_stock_name(code) if not orgName: mic.say("股票代码可能不存在") return notices = ts.get_notices(code) notices = notices[0:10] tit = orgName + '的信息地雷' t = mic.asyncSay("已获取" + tit + "," + ('将发送到您的微信' if wxbot != None else "篇幅较长,请登录微信获取")) if wxbot != None: wechatUser(profile, wxbot, tit, get_news(notices)) t.join() else: mic.say("没能获取股票代码")
def test(): ts.get_sz50s() ts.get_hs300s() ts.get_zz500s() ts.realtime_boxoffice() ts.get_latest_news() ts.get_notices(tk) ts.guba_sina() ts.get_cpi() ts.get_ppi() ts.get_stock_basics() ts.get_concept_classified() ts.get_money_supply() ts.get_gold_and_foreign_reserves() ts.top_list() #每日龙虎榜列表 ts.cap_tops() #个股上榜统计 ts.broker_tops() #营业部上榜统计 ts.inst_tops() # 获取机构席位追踪统计数据 ts.inst_detail()
def prepare_data(): latest_news = ts.get_latest_news() if latest_news is None or latest_news.empty: return latest_news_json = latest_news.to_json(orient='records') collection = mongoConfig.get_collection_default("latest_news") current_date = time.strftime("%Y-%m-%d") mongoConfig.clear_collection(collection) print(latest_news_json) mongoConfig.insert_json(collection, json.loads(latest_news_json))
def get_news_url(num: int = 1000) -> None: df = ts.get_latest_news(top=num, show_content=False) df['timestamp'] = int(time.time()) data = df.to_dict(orient="records") def update(d): db.break_news.update({"url": d["url"]}, {"$set": d}, True) logging.info(d["url"]) with ThreadPoolExecutor(max_workers=3) as executor: executor.map(update, data)
def newspage(request): info = ts.get_latest_news(top=2, show_content=True) news = models.News() news.title = info.title[0].__str__() news.content = info.content[0].__str__() news.save() news = models.News() news.title = info.title[1].__str__() news.content = info.content[1].__str__() news.save() news = models.News.objects.all() return render(request, 'news.html', {'news': news})
def sina_news_notification(bot): ''' 新浪新闻提醒 :param: :return: ''' path = sys.path[0] today_ISO = datetime.today().date().isoformat() # initialize the notices_last filename = 'notices' try: notices_last = pd.read_excel('./Notices/' + filename + '_' + today_ISO + '.xlsx') except: notices_last = pd.DataFrame( columns=['classify', 'title', 'time', 'url', 'content']) today_md = strftime("%m-%d 00:00", localtime()) if len(notices_last) > 0: notices_last = notices_last[notices_last.time >= today_md] try: notices = ts.get_latest_news(show_content=True) notices = notices[notices.time >= today_md] for title in notices.title: if len(notices_last[notices_last.title == title]) <= 0: notices_last = notices_last.append( notices[notices.title == title]) # load keyword filename = 'key_word' key_word = pd.read_excel(io=path + '/Notices/' + filename + '.xlsx') for word in key_word.key_word: if word in title: msg = title + ' - ' + notices[notices.title == title].iloc[0, 3] if key_word[key_word.key_word == word].iloc[0, 1] == 'Y': bot.friends().search('Yang Hui')[0].send(msg) if key_word[key_word.key_word == word].iloc[0, 2] == 'Y': bot.friends().search('欣')[0].send(msg) except: print( strftime("%Y-%m-%d %H:%M:%S", localtime()) + '-Warning: tushare server maybe down! try again later.') # 保存消息 notices_last.sort_values(by='time', ascending=False, inplace=True) filename = 'notices' notices_last.to_excel(path + '/Notices/' + filename + '_' + today_ISO + '.xlsx', encoding='GBK') return
def get_news(top=None, show_content=True): """获取即时财经新闻,类型包括国内财经、证券、外汇、期货、港股和美股等新闻信息。数据更新较快,使用过程中可用定时任务来获取。""" data_df = None logger.info('Begin get latest news.') try: data_df = ts.get_latest_news(top, show_content) now = str(get_time()) logger.info('End get latest news.') except Exception as e: logger.exception('Error get latest news.') finally: return data_df
def get_latest_news(): try: df = ts.get_latest_news(top=5, show_content=True) del df["url"] rsl = [df.loc[i].to_dict() for i, trial in df.iterrows()] result = {"result": rsl} except Exception as e: result = {"error": "true", "message": str(e)} print("result") print(result) return result
def updatenews(): newsdatalist=ts.get_latest_news() newsdata=pd.DataFrame(newsdatalist) conn= ms.connect(host='localhost',port = 3306,user='******', passwd='123456',db ='investment',charset="utf8") cur = conn.cursor() values=[] for index,row in newsdata.iterrows(): values.append((row['classify'],row['title'],row['time'],row['url'])) cur.execute('delete from news') cur.executemany('insert into news (classify,title,time,url) values(%s,%s,%s,%s)',values) conn.commit() cur.close() conn.close()
def _wordcould(): # df=pd.read_csv('english.txt') # df1=ts.get_industry_classified() # print(df1) # df2=ts.get_concept_classified() # print(df2) df3=ts.get_latest_news() print(df3) mylist=df3['title'] word_list=[' '.join(jieba.cut(sentense)) for sentense in mylist] new_text=' '.join(word_list) #new_text='you are my friends, and you not sure' print(new_text) '''
def _wordcould(): # df=pd.read_csv('english.txt') # df1=ts.get_industry_classified() # print(df1) # df2=ts.get_concept_classified() # print(df2) df3 = ts.get_latest_news() print(df3) mylist = df3['title'] word_list = [' '.join(jieba.cut(sentense)) for sentense in mylist] new_text = ' '.join(word_list) #new_text='you are my friends, and you not sure' print(new_text) '''
def update_news(): df = ts.get_latest_news(show_content=True, top=5000) df["date"] = df["time"].map(lambda x: "2017-" + x.split(' ')[0]) dates = df["date"].unique() for date in dates: date_df = df[df["date"] == date] print date_df.shape filename = "%s%s.csv" % (csv_dir, date) print filename if os.path.exists(filename): old_df = pd.read_csv(filename) if old_df.shape[0] < date_df.shape[0]: date_df.to_csv(filename, index=False, encoding='utf-8') else: date_df.to_csv(filename, index=False, encoding='utf-8')
def get_news_keywords(): try: df = ts.get_latest_news(top=10,show_content=True) df["keywords"]=df["content"].apply( lambda i: jieba.analyse.extract_tags( i, topK=5, withWeight=False, allowPOS=()) if i else []) del df["content"] rsl = [df.loc[i].to_dict() for i,trial in df.iterrows()] result = {"result": rsl} except Exception as e: result = {"error": "true", "message": str(e)} print("result") print(result) return result
def getMarketNews(): num = 50 news = ts.get_latest_news(top=num) classify = list(news['classify']) title = list(news['title']) time = list(news['time']) url = list(news['url']) print(num) for i in classify: print(i) for i in title: print(i) for i in time: print(i) for i in url: print(i)
def fetchBlockTradeByCode(self, code, date, vol=500, drop=False): tableName = 'blockTradeRecords' param = [] try: db, dbname = connectConfigDB('database') cursor = db.cursor() cursor.execute("SET NAMES utf8mb4;") if drop: cursor.execute("DROP TABLE IF EXISTS %s" % tableName) # db.commit() # create table sql = """ CREATE TABLE `%s`.`%s` ( `code` VARCHAR(20) NOT NULL COMMENT '股票代码', `name` VARCHAR(45) NOT NULL COMMENT '股票名称', `date` VARCHAR(20) NULL COMMENT '日期', `time` VARCHAR(20) NULL COMMENT '时间', `price` DOUBLE NULL COMMENT '当前价格', `volume` DOUBLE NULL COMMENT '成交手', `preprice` DOUBLE NULL COMMENT '上一笔价格', `type` VARCHAR(45) NULL COMMENT '买卖类型【买盘、卖盘、中性盘】' ) DEFAULT CHARSET=utf8mb4; """ % (dbname, tableName ) # !IMPORTANT: DEFAULT CHARSET=utf8mb4; cursor.execute(sql) print('table %s created' % tableName) # fetch and insert data res = ts.get_latest_news() sql = 'INSERT IGNORE INTO `' + tableName + \ """` values(%s, %s, %s, %s, %s, %s, %s, %s) """ for row in res.values: tmp = row.tolist() tmp.insert(2, date) param.append(tmp) cursor.executemany(sql, param) db.commit() print('\ntable %s inserted %s records.' % (tableName, len(res.values))) except: print_exc() db.rollback() return False finally: db.close() return True
def getNews(): df = ts.get_latest_news() df1 = df[['title', 'url', 'classify', 'time']] # print df1 #insert() #存入数据库 #df1.to_sql('news',engine, if_exists='append',index_label='url') #df1.to_sql('news',engine, if_exists='append',index_label='title,url,classify,time',index=False) print(engine.execute('delete from news')) df1.to_sql('news', engine, if_exists='append', index_label='title,url,classify,time', index=False) print('---ok---')
def __call__(self, conns): self.base = Base() self.financial_data = conns['financial_data'] '''即时财经新闻''' latest_news = ts.get_latest_news() self.base.batchwri(latest_news, 'latest_news', self.financial_data) ''' 名称:个股信息地雷 功能:获得个股信息地雷数据 参数 code:股票代码 date:信息公布日期 ''' notices = ts.get_notices() self.base.batchwri(notices, 'notices', self.financial_data) '''新浪股吧新闻''' guba_sina = ts.guba_sina() self.base.batchwri(guba_sina, 'guba_sina', self.financial_data)
def getnews(request): """ 调用 tushare ;一次抓取一百条财经新闻,并放在sqlite里存起来 """ df = ts.get_latest_news(top=100, show_content=True) path = os.path.abspath('Result.csv') df.to_csv(path) df = pandas.read_csv(path) print(df) for index, row in df.iterrows(): # time.sleep(2) # 休眠1秒 news.objects.create(classify=row[1], title=row[2], time=row[3], url=row[4], content=row[5]) #print('add success') return HttpResponse("add success")
def show_stats_home(): """大盘指数""" df = ts.get_index() market_index = df2DictList(df, True) """自选股数据""" df = ts.get_realtime_quotes( ['000002', '300122', '002230', '300166', '603189', '000005']) self_stock = df2DictList(df) """新闻数据""" df = ts.get_latest_news() news = df2DictList(df) """电影票房""" df = ts.realtime_boxoffice() boxoffice = df2DictList(df) return dict(market_index=market_index, self_stock=self_stock, news=news, boxoffice=boxoffice)
def index(): whole_indicators = json.loads(ts.get_index()[:4].to_json(orient='records')) whole_news = [] whole_news = json.loads( ts.get_latest_news(top=7).to_json(orient="records")) notice = Article.query.order_by(Article.updatedTime.desc()).limit(7) dates = datetime.datetime.now() week = datetime.datetime.now().weekday() if week in [5, 6]: days = 4 - week dates = dates + datetime.timedelta(days=days) dates = dates.strftime('%Y-%m-%d') return render_template('main/index.html', whole_indicators=whole_indicators, news=whole_news, notices=notice, dates=dates)
def get_all_price(code_list): '''''process all stock''' df = ts.get_realtime_quotes(STOCK) print df df = ts.get_latest_news() print df df = ts.get_cpi() print df df = ts.get_stock_basics() print df df = ts.get_sz50s() print df df = ts.get_hist_data('600848') print df
def fetchFinancialNews(self, drop=False): tableName = 'financialNews' param = [] try: db, dbname = connectConfigDB('database') cursor = db.cursor() cursor.execute("SET NAMES utf8mb4;") if drop: cursor.execute("DROP TABLE IF EXISTS %s" % tableName) # db.commit() # create table sql = """ CREATE TABLE `%s`.`%s` ( `classify` VARCHAR(20) NOT NULL COMMENT '新闻类型', `title` TEXT NULL COMMENT '标题', `date` VARCHAR(45) NULL COMMENT '日期', `url` TEXT NULL COMMENT '链接', PRIMARY KEY ( `title`, `date`) ) DEFAULT CHARSET=utf8mb4; """ % (dbname, tableName ) # !IMPORTANT: DEFAULT CHARSET=utf8mb4; cursor.execute(sql) print('table %s created' % tableName) # fetch and insert data res = ts.get_latest_news() sql = 'INSERT IGNORE INTO `' + tableName + \ """` values(%s, %s, %s, %s) """ for row in res.values: param.append(row.tolist()) cursor.executemany(sql, param) db.commit() print('\ntable %s inserted %s records.' % (tableName, len(res.values))) except: print_exc() db.rollback() return False finally: db.close() return True
def updatenews(): newsdatalist = ts.get_latest_news() newsdata = pd.DataFrame(newsdatalist) conn = ms.connect(host='localhost', port=3306, user='******', passwd='123456', db='investment', charset="utf8") cur = conn.cursor() values = [] for index, row in newsdata.iterrows(): values.append((row['classify'], row['title'], row['time'], row['url'])) cur.execute('delete from news') cur.executemany( 'insert into news (classify,title,time,url) values(%s,%s,%s,%s)', values) conn.commit() cur.close() conn.close()
def job_6(): try: print("I'm working......新闻事件数据") # 即时新闻 latest_news = ts.get_latest_news() data = pd.DataFrame(latest_news) data.to_sql('latest_news',engine,index=True,if_exists='replace') print("即时新闻......done") # 信息地雷 notices = ts.get_notices() data = pd.DataFrame(notices) data.to_sql('notices',engine,index=True,if_exists='replace') print("信息地雷......done") # 新浪股吧 guba_sina = ts.guba_sina() data = pd.DataFrame(guba_sina) data.to_sql('guba_sina',engine,index=True,if_exists='replace') print("新浪股吧......done") except Exception as e: print(e)
def get_data_from_tushare(): # 1- 通过接口获取股票数据,pandas.core.frame.DataFrame # data = ts.get_k_data('hs300', start='2019-01-01', end='2019-06-05') # 2- 设置 DataFrame 索引 # data.set_index('date', inplace=True) # 3- 画出股价走势图。下面的写法等价于 data.close.plot(figsize=(10, 6)) # data['close'].plot(figsize=(10, 6)) # print(data.head()) # plt.show() # 4- 获取数据,tushare 不支持多股票查询。 # data1 = ts.get_k_data('600030') # 默认前复权价格 # data2 = ts.get_k_data('600030', autype='hfq') # 不复权 # data3 = ts.get_k_data('600030', ktype='5') # 两个日期之间的前复权价格 # 4.1- 获取历史逐笔交易数据 # df = ts.get_tick_data('601166', date='2019-06-06') # print(type(df)) # df.sort_index(inplace=True, ascending=False) # print(df.head()) # 4.2- 获取当前主流指数列表 # df = ts.get_index() # print(df.head()) # 4.3- 获取股票基本面数据 # df = ts.get_stock_basics() # print(df.head) # df.set_index('code', inplace=True) # data = ts.get_profit_data(2019, 1) # print(data.head) # date = df.ix['600848']['timeToMarket'] # print(date) # 4.4- 获取非结构化新闻、舆情数据。接口获取不了数据 data = ts.get_latest_news(top=5, show_content=True) # 显示最多5条新闻,并打印新闻内容 print(type(data))
__author__='Bill Dan' import tushare as ts import pymongo from pymongo import MongoClient import json # conn=pymongo.Connection('127.0.0.1',port=27017) client = MongoClient() client = MongoClient('mongodb://47.92.39.111:27017/') db=client.refdata print 'start ...' df =ts.get_latest_news(top=5,show_content=True) #get current year shibor #df = ts.shibor_data(2014) #get 2014 year shibor # df.sort('date', ascending=False).head(10) db.shibor_data.insert(json.loads(df.to_json(orient='records'))) print 'success'
def news(): getnews = ts.get_latest_news() print(type(getnews)) print(getnews) # print(getnews) '''
def getlastedNews(self): return ts.get_latest_news(10, True)
import tushare as ts import sys df =ts.get_latest_news() df.to_csv(sys.argv[1],encoding="utf8")
@author: colec """ import tushare as ts import time file_result = open("NewsWaterfall.txt", "w+") isLoop = True while isLoop: resultTime = time.localtime(time.time()) formatResultTime = time.strftime("%Y-%m-%d %H:%M:%S", resultTime) file_result.write(formatResultTime) file_result.write("\n") newsList = ts.get_latest_news() if newsList is None: continue print("test") for idx, dataRow in newsList.iterrows(): file_result.write(str(idx)) file_result.write(" ") file_result.write(dataRow["classify"].encode("utf-8")) file_result.write(" ") file_result.write(dataRow["title"].encode("utf-8")) file_result.write(" ") file_result.write(dataRow["time"]) file_result.write(" ") file_result.write(dataRow["url"]) file_result.write("\n")