def get_sina_goods(): from codes import add_params, GOODS_URL, goods, get_page, parse_df for good in goods: dst = "%s/sina_goods/%s.csv" % (CURDIR, good['breed']) if os.path.exists(dst): ytrack.error("%s exists.." % dst) continue url = add_params(GOODS_URL, good) page = get_page(url) if not page: ytrack.fail('%s page is None' % good['name']) continue data = pd.DataFrame() for i in range(1, page + 1): good.update({"page": i}) url = add_params(GOODS_URL, good) df = parse_df(url) if df is None or len(df) == 0: continue else: data = data.append(df, ignore_index=True) if len(data) > 0: data.columns = ['date', 'close', 'open', 'high', 'low', 'volume'] data.to_csv(dst) ytrack.info("%s finished.." % good['name']) else: ytrack.error("%s len is 0") ytrack.show()
def _update_ohlc_daily(date, code, table): assert table in ['hs_stocks_ohlc_daily', 'hs_indexs_ohlc_daily'] is_index = True if table == 'hs_indexs_ohlc_daily' else False if not date: date = datetime.datetime.now() else: date = datetime.datetime.strptime(str(date), "%Y%m%d") dt_s = date.strftime("%Y%m%d") dt_i = int(dt_s) dt_d = date.strftime("%Y-%m-%d") isOpen = tradecal_df.loc[dt_i]['isOpen'] if not isOpen: ytrack.fail("%s is not Open.." % date) return if code == 'ALL': # 在date日(包括)之前上市的股票 if is_index: import sinacodes codes = sinacodes.hsindexs.keys() else: tmpdf = basics_df.loc[(basics_df['timeToMarket'] <= dt_i) & (basics_df['timeToMarket'] > 0)] codes = tmpdf.index.values.tolist() else: codes = [code] quart = year_qua(dt_s) data = pd.DataFrame() if DEBUG: pass for code in codes: df = _parse_fq_data(_get_index_url(is_index, code, quart), is_index, 3, 0.01) if df is None: # 可能df为空,比如停牌 ytrack.fail("Date=%s, code=%s is_index=%s is None ." % (date, code, is_index)) continue else: df = df[df.date == dt_d] df.insert(0, 'code', code) data = data.append(df) if 'factor' in data.columns: data = data.drop('factor', axis=1) data['date'] = dt_i for label in ['open', 'high', 'low', 'close']: data[label] = data[label].map(lambda x: '%.2f' % x) data[label] = data[label].astype(float) data = data.set_index('code') data = data.sort_index(ascending=False) return data
def get_market_date(code): try: s = str(basics_df.ix[code]['timeToMarket']) if len(s) != 8: return None d = datetime.datetime.strptime(s, "%Y%m%d") return str(d.date()) except Exception as e: ytrack.fail("get_market_date: %s" % code) ytrack.fail(traceback.format_exc()) return None
def my_update_someday_data(df, date, save_table): sql = "delete from %s where date = %s" % (save_table, date) ytrack.success("execute: %s" % sql) try: engine.execute(sql) except: ytrack.fail(traceback.format_exc()) else: ytrack.success("%s删除数据成功" % save_table) try: df.to_sql(save_table, engine, if_exists='append', index=True, index_label='code') except: ytrack.fail(traceback.format_exc()) else: ytrack.success("%s 成功更新 %s 条记录." % (save_table, df.shape[0]))
def _parse_fq_data(url, index, retry_count, pause): for _ in range(retry_count): time.sleep(pause) try: request = Request(url) text = urlopen(request, timeout=10).read() text = text.decode('GBK') html = lxml.html.parse(StringIO(text)) res = html.xpath('//table[@id=\"FundHoldSharesTable\"]') if six.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) if sarr == '': return None df = pd.read_html(sarr, skiprows=[0, 1])[0] if len(df) == 0: return None if index: df.columns = HIST_FQ_COLS[0:7] else: df.columns = HIST_FQ_COLS if df['date'].dtypes == np.object: df['date'] = df['date'].astype(np.datetime64) df = df.drop_duplicates('date') except ValueError as e: ytrack.fail("_parse_fq_data: %s" % url) ytrack.fail(traceback.format_exc()) # 时间较早,已经读不到数据 return None except Exception as e: ytrack.fail("_parse_fq_data: %s" % url) ytrack.fail(traceback.format_exc()) else: return df ytrack.fail("_parse_fq_data: retry_count = %s failed, %s" % (retry_count, url))
def get_data(): page_url = 'http://www.jin10.com/' s = requests.Session() s.headers.update({ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36", }) r = None for _ in range(3): try: time.sleep(0.5) r = s.get(page_url) break except requests.exceptions.ConnectionError as e: yyhtools.error("%s" % (page_url)) yyhtools.error(traceback.format_exc()) return if r is None: yyhtools.error("requests.get('%s') is None" % page_url) return soup = BeautifulSoup(r.text.encode(r.encoding)) allnews = soup.findAll("div", {"class": "newsline"}) cnt = 0 session = Session() for news in allnews: try: id = long(news.attrs.get('id')) / 100 html = str(news) session.merge(News(id=id, html=html)) cnt += 1 except: ytrack.fail(traceback.format_exc()) session.commit() ytrack.success("%s 成功更新 %s 条记录." % ('jin10_news', cnt))
def run_daily(exchange, date, symbol): symbol = symbol.upper() if not date: # 美股时间差一天 day = datetime.datetime.now() - datetime.timedelta(days=1) day = day.replace(hour=0, minute=0, second=0, microsecond=0) date = int(day.strftime("%Y%m%d")) else: day = datetime.datetime.strptime(str(date), "%Y%m%d") if day.weekday() in (5, 6): ytrack.fail('%s is not open day' % date) ynotice.send(ytrack.get_logs(), style='stock', title=u'%s-%s-K线图更新' % (get_day_date(day), exchange)) return def my_update_someday_data(df, date, save_table): sql = "delete from %s where date = %s" % (save_table, date) ytrack.success("execute: %s" % sql) try: engine.execute(sql) except: ytrack.fail(traceback.format_exc()) else: ytrack.success(u"%s删除数据成功" % save_table) try: df.to_sql(save_table, engine, if_exists='append', index=True, index_label='code') except: ytrack.fail(traceback.format_exc()) else: ytrack.success(u"%s 成功更新 %s 条记录." % (save_table, df.shape[0])) ytrack.success("start run_daily(date=%s, exchange=%s, symbol=%s)" % (date, exchange, symbol)) table1 = "%s_ohlc_daily" % exchange df1 = _update_ohlc_daily(day, symbol, table1, exchange) if df1 is not None: my_update_someday_data(df1, get_day_date(day), table1) else: ytrack.success(u"%s 需要更新的数据为空" % table1) table2 = '%s_ohlc_weekly' % exchange df2 = _update_ohlc_weekly(date, symbol, table2) if df2 is not None: my_update_someday_data(df2, get_week_date(day), table2) else: ytrack.success(u"%s 需要更新的数据为空" % table2) table3 = '%s_ohlc_monthly' % exchange df3 = _update_ohlc_monthly(date, symbol, table3) if df3 is not None: my_update_someday_data(df3, get_month_date(day), table3) else: ytrack.success(u"%s 需要更新的数据为空" % table3) macd_cols = ['date'] + ['ma%s' % i for i in range(5, 251, 5)] df4 = _update_macd_daily(date, symbol, table1) if df4 is not None: df4 = df4[macd_cols] my_update_someday_data(df4, get_day_date(day), '%s_macd_daily' % exchange) else: ytrack.success(u"%s_macd_daily 需要更新的数据为空" % exchange) df5 = _update_macd_weekly(date, symbol, table2) if df5 is not None: df5 = df5[macd_cols] my_update_someday_data(df5, get_week_date(day), '%s_macd_weekly' % exchange) else: ytrack.success(u"%s_macd_weekly 需要更新的数据为空" % exchange) df6 = _update_macd_monthly(date, symbol, table3) if df6 is not None: df6 = df6[macd_cols] my_update_someday_data(df6, get_month_date(day), '%s_macd_monthly' % exchange) else: ytrack.success(u"%s_macd_monthly 需要更新的数据为空" % exchange) ynotice.send(ytrack.get_logs(), style='stock', title=u'%s-%s-K线图更新' % (get_day_date(day), exchange))
def run_daily_hs_indexs(date, code, save): if not date: day = datetime.datetime.now() date = int(day.strftime("%Y%m%d")) else: day = datetime.datetime.strptime(str(date), "%Y%m%d") if is_open_day(day): ytrack.fail("%s is not Open.." % date) ynotice.send(ytrack.get_logs(), style='error', title='%s-不是交易日' % get_day_date(day)) return def my_update_someday_data(df, date, save_table): sql = "delete from %s where date = %s" % (save_table, date) ytrack.success("execute: %s" % sql) try: engine.execute(sql) except: ytrack.fail(traceback.format_exc()) else: ytrack.success("%s删除数据成功" % save_table) try: df.to_sql(save_table, engine, if_exists='append', index=True, index_label='code') except: ytrack.fail(traceback.format_exc()) else: ytrack.success("%s 成功更新 %s 条记录." % (save_table, df.shape[0])) ytrack.success("start run_daily_hs_indexs(date=%s, code=%s, save=%s)" % (date, code, save)) df1 = _update_ohlc_daily(date, code, 'hs_indexs_ohlc_daily') if df1 is not None: my_update_someday_data(df1, get_day_date(day), "hs_indexs_ohlc_daily") else: ytrack.success("hs_indexs_ohlc_daily 需要更新的数据为空") df2 = _update_ohlc_weekly(date, code, 'hs_indexs_ohlc_weekly') if df2 is not None: my_update_someday_data(df2, get_week_date(day), 'hs_indexs_ohlc_weekly') else: ytrack.success("hs_indexs_ohlc_weekly 需要更新的数据为空") df3 = _update_ohlc_monthly(date, code, 'hs_indexs_ohlc_monthly') if df3 is not None: my_update_someday_data(df3, get_month_date(day), 'hs_indexs_ohlc_monthly') else: ytrack.success("hs_indexs_ohlc_monthly 需要更新的数据为空") macd_cols = ['date'] + ['ma%s' % i for i in range(5, 251, 5)] df4 = _update_macd_daily(date, code, 'hs_indexs_ohlc_daily') if df4 is not None: df4 = df4[macd_cols] my_update_someday_data(df4, get_day_date(day), 'hs_indexs_macd_daily') else: ytrack.success("hs_indexs_macd_daily 需要更新的数据为空") df5 = _update_macd_weekly(date, code, 'hs_indexs_ohlc_weekly') if df5 is not None: df5 = df5[macd_cols] my_update_someday_data(df5, get_week_date(day), 'hs_indexs_macd_weekly') else: ytrack.success("hs_indexs_macd_weekly 需要更新的数据为空") df6 = _update_macd_monthly(date, code, 'hs_indexs_ohlc_monthly') if df6 is not None: df6 = df6[macd_cols] my_update_someday_data(df6, get_month_date(day), 'hs_indexs_macd_monthly') else: ytrack.success("hs_indexs_macd_monthly 需要更新的数据为空") ynotice.send(ytrack.get_logs(), style='stock', title='%s-沪深指数K线图更新' % get_day_date(day))