def conv_db_htick2min(db_table, inst_file, out_table='hist_fut_min', database='hist_data', dstep=10): conf_dict = {} instIDs = [] if inst_file == '': instIDs = get_col_dist_values(database + '.' + db_table, 'instID', {}) conf_dict = {'instIDs': instIDs} try: inst_file = 'instID_file.json' with open(inst_file, 'w') as ofile: json.dump(conf_dict, ofile) except: pass else: with open(inst_file, 'r') as infile: conf_dict = json.load(infile) instIDs = conf_dict['instIDs'] dbconfig = { 'user': '******', 'password': '******', 'host': 'localhost', 'database': database } cnx = mysqlconn.connect(**dbconfig) for inst in instIDs: field_dict = {'instID': "\'" + inst + "\'"} datestr_list = get_col_dist_values(database + '.' + db_table, 'date', field_dict) mdata = pd.DataFrame() prod = misc.inst2product(inst) exch = misc.inst2exch(inst) num_run = (len(datestr_list) + dstep - 1) / dstep for idx in range(num_run): s_idx = idx * dstep e_idx = min((idx + 1) * dstep - 1, len(datestr_list) - 1) sdate = datetime.datetime.strptime(datestr_list[s_idx], "%Y-%m-%d").date() edate = datetime.datetime.strptime(datestr_list[e_idx], "%Y-%m-%d").date() df = load_hist_tick(db_table, inst, sdate, edate) mdf = conv_ohlc_freq(df, '1Min') mdf['min_id'] = ( (mdf.index.hour + 6) % 24) * 100 + mdf.index.minute mdf = backtest.cleanup_mindata(mdf, prod) mdf.index.name = 'datetime' mdf['instID'] = inst mdf['exch'] = exch mdf = mdf.reset_index() mdf.set_index(['instID', 'exch', 'datetime'], inplace=True) mdf.to_sql(name=out_table, flavor='mysql', con=cnx, if_exists='append') cnx.commit() print inst, sdate, edate, len(mdf) cnx.close() return
def get_wind_data(inst_list, start_date, end_date, save_loc='C:\\dev\\data\\', freq='m'): exch_map = {v: k for k, v in wind_exch_map.items()} for instID in inst_list: exch = misc.inst2exch(instID) ex = exch_map[exch] ticker = instID + '.' + ex product = misc.inst2product(instID) sdate = start_date edate = end_date stime = datetime.time(9, 0, 0) etime = datetime.time(15, 0, 0) if product in ['T', 'TF']: stime = datetime.time(9, 15, 0) etime = datetime.time(15, 15, 0) elif product in misc.night_session_markets: stime = datetime.time(21, 0, 0) sdate = misc.day_shift(sdate, '-1b') smin = datetime.datetime.combine(sdate, stime) emin = datetime.datetime.combine(edate, etime) fields = 'open,high,low,close,volume,oi' try: if freq == 'm': outfile = save_loc + instID + '_min.csv' if os.path.isfile(outfile): continue raw_data = w.wsi(ticker, fields, smin, emin) if len(raw_data.Data) > 1: output = {'datetime': raw_data.Times, 'open': raw_data.Data[0], 'high': raw_data.Data[1], 'low': raw_data.Data[2], 'close': raw_data.Data[3], 'volume': raw_data.Data[4], 'openInterest': raw_data.Data[5]} dump2csvfile(output, outfile) else: print "no min data obtained for ticker=%s" % ticker elif freq == 'd': outfile = save_loc + instID + '_daily.csv' if os.path.isfile(outfile): continue raw_data = w.wsd(ticker, fields, start_date, end_date) if len(raw_data.Data) > 1: output = {'datetime': raw_data.Times, 'open': raw_data.Data[0], 'high': raw_data.Data[1], 'low': raw_data.Data[2], 'close': raw_data.Data[3], 'volume': raw_data.Data[4], 'openInterest': raw_data.Data[5]} dump2csvfile(output, outfile) else: print "no daily data obtained for ticker=%s" % ticker except ValueError: pass w.stop() return True
def insert_min_data(cnx, inst, min_data, dbtable='fut_min', option='IGNORE'): cursor = cnx.cursor() exch = misc.inst2exch(inst) min_data['date'] = min_data['datetime'].date() stmt = "INSERT {opt} INTO {table} (instID,exch,{variables}) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)".format( opt=option, table=dbtable, variables=','.join(min_columns)) args = tuple([inst, exch] + [min_data[col] for col in min_columns]) cursor.execute(stmt, args) cnx.commit()
def insert_min_data(inst, min_data, dbtable = 'fut_min'): cnx = mysql.connector.connect(**dbconfig) cursor = cnx.cursor() col_list = min_data.keys() exch = misc.inst2exch(inst) stmt = "INSERT IGNORE INTO {table} (instID,exch,{variables}) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)".format(table=dbtable,variables=','.join(col_list)) args = tuple([inst, exch]+[min_data[col] for col in col_list]) cursor.execute(stmt, args) cnx.commit() cnx.close() pass
def insert_min_data(inst, min_data, dbtable = 'fut_min'): cnx = mysql.connector.connect(**dbconfig) cursor = cnx.cursor() exch = misc.inst2exch(inst) min_data['date'] = min_data['datetime'].date() col_list = min_data.keys() stmt = "INSERT IGNORE INTO {table} (instID,exch,{variables}) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)".format(table=dbtable,variables=','.join(col_list)) args = tuple([inst, exch]+[min_data[col] for col in col_list]) cursor.execute(stmt, args) cnx.commit() cnx.close() pass
def insert_daily_data(cnx, inst, daily_data, is_replace=False, dbtable='fut_daily'): cursor = cnx.cursor() col_list = daily_data.keys() exch = misc.inst2exch(inst) if is_replace: cmd = "REPLACE" else: cmd = "INSERT IGNORE" stmt = "{commd} INTO {table} (instID,exch,{variables}) VALUES (%s,%s,{formats})".format(\ commd=cmd, table=dbtable, variables=','.join(col_list), \ formats=','.join(['%s'] * len(col_list))) args = tuple([inst, exch] + [daily_data[col] for col in col_list]) cursor.execute(stmt, args) cnx.commit()
def insert_daily_data(inst, daily_data, is_replace = False, dbtable = 'fut_daily'): cnx = mysql.connector.connect(**dbconfig) cursor = cnx.cursor() col_list = daily_data.keys() exch = misc.inst2exch(inst) if is_replace: cmd = "REPLACE" else: cmd = "INSERT IGNORE" stmt = "{commd} INTO {table} (instID,exch,{variables}) VALUES (%s,%s,{formats})".format(commd=cmd, table=dbtable,variables=','.join(col_list), formats=','.join(['%s']*len(col_list))) args = tuple([inst, exch]+[daily_data[col] for col in col_list]) cursor.execute(stmt, args) cnx.commit() cnx.close() pass
def bulkinsert_min_data(cnx, inst, mindata_list, dbtable='fut_min', is_replace=False): if len(mindata_list) == 0: return cursor = cnx.cursor() exch = misc.inst2exch(inst) if is_replace: cmd = "REPLACE" else: cmd = "INSERT IGNORE" stmt = "{cmd} INTO {table} (instID,exch,{variables}) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)".format(\ cmd=cmd, table=dbtable, variables=','.join(min_columns)) args = [] for min_data in mindata_list: args.append(tuple([inst, exch] + [min_data[col] for col in min_columns])) cursor.executemany(stmt, args) cnx.commit()
def conv_db_htick2min(db_table, inst_file, out_table = 'hist_fut_min', database = 'hist_data', dstep = 10): conf_dict = {} instIDs = [] if inst_file == '': instIDs = get_col_dist_values(database + '.' + db_table, 'instID',{}) conf_dict = {'instIDs': instIDs} try: inst_file = 'instID_file.json' with open(inst_file, 'w') as ofile: json.dump(conf_dict, ofile) except: pass else: with open(inst_file, 'r') as infile: conf_dict = json.load(infile) instIDs = conf_dict['instIDs'] dbconfig = {'user': '******', 'password':'******', 'host':'localhost', 'database': database} cnx = mysqlconn.connect(**dbconfig) for inst in instIDs: field_dict = {'instID': "\'"+inst+"\'"} datestr_list = get_col_dist_values(database + '.' + db_table, 'date', field_dict) mdata = pd.DataFrame() prod = misc.inst2product(inst) exch = misc.inst2exch(inst) num_run = (len(datestr_list)+dstep-1)/dstep for idx in range(num_run): s_idx = idx * dstep e_idx = min((idx + 1) *dstep - 1, len(datestr_list)-1) sdate = datetime.datetime.strptime(datestr_list[s_idx], "%Y-%m-%d").date() edate = datetime.datetime.strptime(datestr_list[e_idx], "%Y-%m-%d").date() df = load_hist_tick(db_table, inst, sdate, edate) mdf = conv_ohlc_freq(df, '1Min') mdf['min_id'] = ((mdf.index.hour + 6) % 24) * 100 + mdf.index.minute mdf = backtest.cleanup_mindata(mdf, prod) mdf.index.name = 'datetime' mdf['instID'] = inst mdf['exch'] = exch mdf = mdf.reset_index() mdf.set_index(['instID', 'exch', 'datetime'], inplace = True) mdf.to_sql(name = out_table, flavor = 'mysql', con = cnx, if_exists='append') cnx.commit() print inst, sdate, edate, len(mdf) cnx.close() return
def get_fut_hist(instID, freq = 'd', proxies = PROXIES): exch = misc.inst2exch(instID) if exch in ['DCE', 'SHFE']: ticker = instID.upper() elif exch in ['CZCE']: year = instID[-3] if int(year) < 8: year = '2' + year else: year = '1' + year month = instID[-2:] ticker = instID[-3] + year + month elif exch in ['CFFEX', ]: ticker = "CFF_RE_" + instID # choose daily or 5 mins historical data if freq[-1] == 'd': url = 'http://stock2.finance.sina.com.cn/futures/api/json.php/IndexService.getInnerFuturesDailyKLine?symbol=%s' % ticker date_col = 'date' elif freq[-1] == 'm': if int(freq[:-1]) in [5, 15, 30, 60]: url = 'http://stock2.finance.sina.com.cn/futures/api/json.php/IndexService.getInnerFuturesMiniKLine%s?symbol=%s' % (freq, ticker) date_col = 'datetime' else: return else: return [] raw = urllib.urlopen(url, proxies = proxies).read() result = json.loads(raw) df = pd.DataFrame(result, columns=['datetime','open', 'high','low','close','volume']) if date_col == 'date': df[date_col] = df[date_col].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d").date()) else: df[date_col] = df[date_col].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").date()) for col in ['open', 'high','low','close']: df[col] = df[col].apply(lambda x: float(x)) df['volume'] = df['volume'].apply(lambda x: int(x)) df['exch'] = exch df = df.set_index([date_col]) return df
def get_fut_quotes(inst_list, proxies = PROXIES): index_list = [] exch_list = [] for idx in inst_list: inst = idx exch = misc.inst2exch(idx) if exch in ['DCE', 'SHFE']: inst = idx.upper() elif exch in ['CZCE']: year = idx[-3] if int(year) < 8: year = '2' + year else: year = '1' + year month = idx[-2:] inst = idx[-3] + year + month elif exch in ['CFFEX',]: inst = "CFF_RE_" + idx index_list.append(inst) exch_list.append(exch) url = "http://hq.sinajs.cn/list=%s" % ','.join(index_list) raw = urllib.urlopen(url, proxies = proxies).read() raw = raw.split('\n') result = dict() for instID, exch, raw_data in zip(inst_list, exch_list, raw): if len(raw_data) == 0: continue quotes = raw_data.split(',') if exch in ['CFFEX',]: tick_time = quotes[37].split(':') tick_id = ((int(tick_time[0]) + 6) % 24) * 10000 + int(tick_time[1])*100 + int(tick_time[2]) data = { 'tick_id': tick_id * 10, 'open': float(quotes[0]), 'high': float(quotes[1]), 'low': float(quotes[2]), 'close': float(quotes[3]), 'bidPrice1': float(quotes[7]), 'askPrice1': float(quotes[7]), 'price': float(quotes[7]), 'settlement': float(quotes[8]), 'bidVol1': 0, 'askVol1': 0, 'openInterest': int(quotes[6]), 'volume': int(quotes[4]), 'date': quotes[36], } else: tick_id = int(quotes[1]) tick_id = ((int(tick_id/10000) + 6) % 24) * 10000 + (tick_id % 10000) data = { 'tick_id': tick_id * 10, 'open': float(quotes[2]), 'high': float(quotes[3]), 'low': float(quotes[4]), 'prev_close': float(quotes[5]), 'bidPrice1': float(quotes[6]), 'askPrice1': float(quotes[7]), 'price': float(quotes[8]), 'prev_settlement': float(quotes[9]), 'bidVol1': int(quotes[11]), 'askVol1': int(quotes[12]), 'openInterest': int(quotes[13]), 'volume': int(quotes[14]), 'date': quotes[17], } result[instID] = data return result