def history_fund(engine, session, code): tsl.log("get data for code : " + code + " start...") sdate = datetime.date(2013, 1, 1) edate = datetime.date.today() df = ts.get_nav_history(code, start=str(sdate), end=str(edate)) df.to_csv('/home/data/f_' + code + '.csv') tsl.log("get data for code : " + code + " done")
def news_real(engine): tbl = "news_real" cnt = 0 tsl.log(tbl + " start...") df = ts.get_latest_news() if df is None: tsl.log("no latest news") return st = datetime.datetime.today() et = st - datetime.timedelta(hours=2) st = '%02i-%02i %02i:00' % (st.month, st.day, st.hour) et = '%02i-%02i %02i:00' % (et.month, et.day, et.hour) df = df[df.time >= et] df = df[df.time < st] urls = df.url.values contents = ['' for i in range(len(df))] for i in range(len(df)): if 'blog.sina.com.cn' in urls[i]: continue try: content = ts.latest_content(urls[i]) if content is not None: contents[i] = unicode(content)#.encode('raw_unicode_escape').decode('utf8') cnt += 1 except BaseException, e: print e print urls[i]
def trade_block_worker(engine, codes, sdate, edate): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) df = pd.DataFrame() cdate = sdate temp = {} while cdate <= edate: if not tsu.is_holiday(cdate): for code in codes: try: newdf = ts.get_sina_dd(code, cdate, vol=10000) if newdf is not None: newdf['date'] = cdate df = df.append(newdf, ignore_index=True) except BaseException, e: if 'timed out' in str(e) or 'urlopen error' in str(e): temp.setdefault(cdate, []) temp[cdate].append(code) pass else: print e tsl.log("pid %i error for %s on %s" % (pid, code, str(cdate))) if len(df) != 0: df = df.set_index('code', drop='true') df.to_sql('trade_block', engine, if_exists='append') cdate += datetime.timedelta(days=1)
def news_sina_bar(engine): tbl = "news_sina_bar" tsl.log(tbl + " start...") df = ts.guba_sina(True) df = df.set_index('ptime', drop='true') df.to_sql(tbl,engine,if_exists='append') tsl.log(tbl + " done")
def weekly(engine, session, cdate): if 5 == cdate.isoweekday(): tops(engine, 5) elif 1 == cdate.isoweekday(): sdate = cdate - datetime.timedelta(days=7) edate = cdate - datetime.timedelta(days=3) history(engine, session, sdate, edate) else: tsl.log("no weekly task for module : tops")
def margin_sz_dtl(engine, ddate): tbl = "invest_margin_sz_dtl" try: df = ts.sz_margin_details(ddate) df = df.set_index('opDate', drop='true') df.to_sql(tbl, engine, if_exists='append') except BaseException, e: print e tsl.log(tbl + " error on " + ddate)
def trade_hist_worker(engine, codes, sdate, edate): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) for code in codes: try: df = ts.get_k_data(code, start=str(sdate), end=str(edate)) df = df.set_index('code', drop='true') df.to_sql('trade_market_history', engine, if_exists='append') except BaseException, e: print e tsl.log("pid %i error for %s" % (pid, code))
def history(engine, session, sdate, edate): margin_sh_smry(engine, str(sdate), str(edate)) margin_sh_dtl(engine, str(sdate), str(edate)) margin_sz_smry(engine, str(sdate), str(edate)) tbl = "invest_margin_sz_dtl" tsl.log(tbl + " start...") cdate = sdate while cdate <= edate: if not tsu.is_holiday(cdate): margin_sz_dtl(engine, str(cdate)) cdate += datetime.timedelta(days=1) tsl.log(tbl + " done")
def news_notices_worker(engine, codes, ddate): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) df = pd.DataFrame() for code in codes: try: newdf = ts.get_notices(code, ddate, True) if newdf is not None: newdf['code'] = code df = df.append(newdf, ignore_index=True) except BaseException, e: print e tsl.log("pid %i error for %s" % (pid, code))
def news_notices_mult(engine, session, ddate): tsl.log("news_notices start...") codes = tsu.get_stock_codes(session) pn = len(codes) / tsc.NEWS_PROCESS_NUM + 1 ps = [] for i in range(pn): temp = codes[tsc.NEWS_PROCESS_NUM * i : tsc.NEWS_PROCESS_NUM * (i + 1)] p = multiprocessing.Process(target = news_notices_worker, args=(engine, temp, ddate)) p.daemon = True p.start() ps.append(p) for p in ps: p.join() tsl.log("news_notices done")
def temp_info_mult(engine, session): # get latest codes tsl.log("get latest codes start...") codes = [] df = ts.get_nav_open().symbol.values print codes.extend([str(item) for item in df]) df = ts.get_nav_close().symbol.values print codes.extend([str(item) for item in df]) df = ts.get_nav_grading().symbol.values print codes.extend([str(item) for item in df]) tsl.log("get latest codes done") # insert latest info into fund_temp_info session.execute("delete from fund_temp_info") tsl.log("fund_temp_info start...") pn = len(codes) / tsc.FUND_PROCESS_NUM + 1 ps = [] for i in range(pn): temp = codes[tsc.FUND_PROCESS_NUM * i:tsc.FUND_PROCESS_NUM * (i + 1)] p = multiprocessing.Process(target=temp_info_worker, args=(engine, temp)) p.daemon = True p.start() ps.append(p) for p in ps: p.join() tsl.log("fund_temp_info done")
def temp_info_worker(engine, codes): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) temp = [] df = pd.DataFrame() for code in codes: try: newdf = ts.get_fund_info(code) df = df.append(newdf, ignore_index=True) except BaseException, e: if 'timed out' in str(e) or 'urlopen error' in str(e): temp.append(code) else: print e tsl.log("pid %i error for %s" % (pid, code))
def trade_hist_mult(engine, session, sdate, edate): tsl.log("trade_market_history start...") codes = tsu.get_stock_codes(session) pn = len(codes) / tsc.TRADE_PROCESS_NUM + 1 ps = [] for i in range(pn): temp = codes[tsc.TRADE_PROCESS_NUM * i:tsc.TRADE_PROCESS_NUM * (i + 1)] p = multiprocessing.Process(target=trade_hist_worker, args=(engine, temp, sdate, edate)) p.daemon = True p.start() ps.append(p) for p in ps: p.join() tsl.log("trade_market_history done")
def tops_list_worker(engine, sdate, edate): pid = os.getpid() tsl.log("pid %i start with %s ~ %s..." % (pid, str(sdate), str(edate))) cdate = sdate df = pd.DataFrame() while cdate <= edate: if not tsu.is_holiday(cdate): try: newdf = ts.top_list(str(cdate)) if df is not None: df = df.append(newdf, ignore_index=True) except BaseException, e: print e tsl.log("pid %i error on %s" % (pid, str(cdate))) cdate += datetime.timedelta(days=1)
def history_stock(engine, session, code): tsl.log("get data for code : " + code + " start...") path = '/home/data/' sdate = datetime.date(2013, 1, 1) edate = datetime.date.today() df = ts.get_k_data(code, start=str(sdate), end=str(edate)) df.to_csv(path + 's_' + code + '.csv', columns=['date', 'open', 'close', 'high', 'low', 'volume']) df = ts.get_balance_sheet(code) df.to_csv(path + 'sb_' + code + '.csv') df = ts.get_profit_statement(code) df.to_csv(path + 'sp_' + code + '.csv') df = ts.get_cash_flow(code) df.to_csv(path + 'sc_' + code + '.csv') tsl.log("get data for code : " + code + " done")
def fund_nav_history_mult(engine, session, sdate, edate): tbl = "fund_nav_history" tsl.log(tbl + " start...") codes = tsu.get_fund_codes(session) pn = len(codes) / tsc.FUND_PROCESS_NUM + 1 ps = [] for i in range(pn): temp = codes[tsc.FUND_PROCESS_NUM * i:tsc.FUND_PROCESS_NUM * (i + 1)] p = multiprocessing.Process(target=fund_nav_history_worker, args=(engine, temp, sdate, edate)) p.daemon = True p.start() ps.append(p) for p in ps: p.join() tsl.log(tbl + " done")
def top10_holders_mult(engine, session, year=None, quarter=None): tsl.log("invest_top10_holders start...") codes = tsu.get_stock_codes(session) pn = len(codes) / tsc.INVEST_PROCESS_NUM + 1 ps = [] for i in range(pn): temp = codes[tsc.INVEST_PROCESS_NUM * i:tsc.INVEST_PROCESS_NUM * (i + 1)] p = multiprocessing.Process(target=top10_holders_worker, args=(engine, temp, year, quarter)) p.daemon = True p.start() ps.append(p) for p in ps: p.join() tsl.log("invest_top10_holders done")
def fund_nav_history_worker(engine, codes, sdate, edate): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) temp = [] df = pd.DataFrame() for code in codes: try: newdf = ts.get_nav_history(code[0], code[1], sdate, edate) if newdf is not None: newdf['symbol'] = code[0] df = df.append(newdf) except BaseException, e: if 'timed out' in str(e) or 'urlopen error' in str(e): temp.append(code) else: print e tsl.log("pid %i error for %s" % (pid, code))
def top10_holders_worker(engine, codes, year, quarter): pid = os.getpid() tsl.log("pid %i start with %i codes..." % (pid, len(codes))) df = pd.DataFrame() temp = [] for code in codes: try: newdf = ts.top10_holders(code, year, quarter)[1] if newdf is not None: newdf['code'] = code df = df.append(newdf, ignore_index=True) except BaseException, e: if 'timed out' in str(e) or 'urlopen error' in str(e): temp.append(code) else: print e tsl.log("pid %i error for %s" % (pid, code))
def tops_list_mult(engine, sdate, edate): tsl.log("tops_list start...") ps = [] dayno = 10 pn = (edate - sdate).days / dayno + 1 sd = sdate ed = sd + datetime.timedelta(days=dayno-1) for i in range(pn): if i == pn - 1: ed = edate p = multiprocessing.Process(target = tops_list_worker, args=(engine, sd, ed)) p.daemon = True p.start() ps.append(p) sd = ed + datetime.timedelta(days=1) ed += datetime.timedelta(days=dayno) for p in ps: p.join() tsl.log("tops_list done")
def month_boxoffice(engine, ddate=None): tbl = "month_boxoffice" tsl.log(tbl + " start...") try: df = ts.month_boxoffice(ddate) df['date'] = ddate df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def day_boxoffice(engine, cdate): tbl = "day_boxoffice" tsl.log(tbl + " start...") try: df = ts.day_boxoffice() df['date'] = cdate - datetime.timedelta(days=1) df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def global_index(engine, cdate): tbl = "global_index" tsl.log(tbl + " start...") try: df = ts.global_realtime() df['date'] = cdate df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def futures_ifs(engine, cdate): tbl = "futures_ifs" tsl.log(tbl + " start...") try: df = ts.get_intlfuture() df['date'] = cdate df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def stock(engine): tbl = "basic_stock" tsl.log(tbl + " start...") try: df = ts.get_stock_basics() df = df.reset_index() df.to_sql(tbl, engine, if_exists='replace') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def shibor_ma(engine, year, sdate=None): tbl = "shibor_ma" tsl.log(tbl + " start...") try: df = ts.shibor_ma_data(year) if sdate is not None: df = df[df.date >= sdate] df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print e tsl.log(tbl + " error")
def lifted(engine, year, month): # tsu.to_sql(engine, 'invest_lifted', plist=[year, month]) tbl = "invest_lifted" tsl.log(tbl + " start...") try: df = ts.xsg_data(year, month) df = df.set_index('code', drop='true') df.to_sql(tbl, engine, if_exists='append') tsl.log(tbl + " done") except BaseException, e: print print e tsl.log(tbl + " error")
def fund_hold(engine, year, quarter): tbl = "invest_fund_hold" tsl.log(tbl + " start...") try: df = ts.fund_holdings(year, quarter) df = df.set_index('code', drop='true') df.to_sql(tbl, engine, if_exists='append') print tsl.log(tbl + " done") except BaseException, e: print print e tsl.log(tbl + " error")
def daily(engine, session, cdate): ddate = cdate - datetime.timedelta(days=1) if not tsu.is_holiday(ddate): ddate = str(ddate) margin_sh_smry(engine, ddate, ddate) margin_sh_dtl(engine, ddate, ddate) margin_sz_smry(engine, ddate, ddate) tbl = "invest_margin_sz_dtl" tsl.log(tbl + " start...") margin_sz_dtl(engine, ddate) tsl.log(tbl + " done") else: tsl.log("yesterday is a holiday")
def margin_sz_smry(engine, sdate, edate): tbl = "invest_margin_sz_smry" tsl.log(tbl + " start...") try: df = ts.sz_margins(sdate, edate) df = df.set_index('opDate', drop='true') df.to_sql(tbl, engine, if_exists='append') print tsl.log(tbl + " done") except BaseException, e: print print e tsl.log(tbl + " error")