Пример #1
0
def calRecordNum(trade_dates):
    tot_record_num = 0
    for tmp_date in trade_dates:
        tmp_codes = c.sector("001004", tmp_date)
        tot_record_num += len(tmp_codes.Data) / 2

    return tot_record_num
Пример #2
0
def getStockExistInfo(trade_dates):  # csd can get data easily
    stock_record = pd.DataFrame([])
    tot_record_num = 0
    for tmp_date in trade_dates:
        tmp_codes = c.sector("001004", tmp_date)
        tmp_codes = [
            tmp_codes.Data[i] for i in range(0, len(tmp_codes.Data), 2)
        ]  # only get stock codes
        tmp_record = pd.DataFrame({'code': tmp_codes})
        tmp_record.loc[:, 'date'] = tmp_date
        stock_record = stock_record.append(tmp_record)
        tot_record_num += len(tmp_codes)

    stock_record.loc[:, 'exist'] = 1
    exist_stock_flag = stock_record.pivot_table(values='exist',
                                                index='date',
                                                columns='code',
                                                aggfunc=np.sum)

    # get the first and last existing date
    stock_existing_points = pd.DataFrame([],
                                         index=exist_stock_flag.columns,
                                         columns=['first', 'last'])
    for tmp_code in exist_stock_flag.columns:
        tmp_all_existing_dates = exist_stock_flag.loc[
            ~exist_stock_flag[tmp_code].isnull()].index
        stock_existing_points.loc[tmp_code] = [
            tmp_all_existing_dates[0], tmp_all_existing_dates[-1]
        ]

    return exist_stock_flag, stock_existing_points, tot_record_num
Пример #3
0
def updateCSS(max_data_num, start_date='2007-01-01', end_date=''):
    # get trade calendar
    quant_engine = create_engine(
        'mysql+pymysql://{user}:{password}@{host}/{db}?charset={charset}'.
        format(**ConfigQuant))
    if end_date == '':
        end_date = datetime.strftime(datetime.today() - timedelta(1),
                                     '%Y-%m-%d')  # yesterday
    sql_statement = "select date from %s where date between '%s' and '%s'" % (
        calendarTableName, start_date, end_date)
    trade_dates = pd.read_sql(sql_statement, quant_engine)
    trade_dates = trade_dates['date'].values

    # check if table exists, and make sure no duplicate data
    sql_statement = "select TABLE_NAME from INFORMATION_SCHEMA.TABLES where TABLE_SCHEMA='quant' and TABLE_NAME='%s';" % targetTableName
    tmp_result = pd.read_sql(sql_statement, quant_engine)
    if tmp_result.shape[0] > 0:  # table already exist
        write_method = 'append'  # append data, keep the old data

        tmp_conn = pymysql.connect(**ConfigQuant)
        with tmp_conn.cursor() as tmp_cur:
            sql_statement = "delete from %s where date between '%s' and '%s'" % (
                targetTableName, start_date, end_date)  # avoid duplicated data
            tmp_num = tmp_cur.execute(sql_statement)
            print('delete duplicate record num:', tmp_num)

            sql_statement = "delete from %s where date between '%s' and '%s'" % (
                adjQuoteTableName, start_date, end_date
            )  # avoid duplicated data
            tmp_num = tmp_cur.execute(sql_statement)
            print('delete duplicate record num:', tmp_num)
        tmp_conn.commit()
        tmp_conn.close()
    else:
        write_method = 'replace'

    # choice API data
    loginResult = c.start("ForceLogin=1", '', mainCallback)  # login
    if (loginResult.ErrorCode != 0):
        print("login in fail")
        exit()

    # get total record num
    tot_record_num = calRecordNum(
        trade_dates)  # c.sector  not consume data limit
    print('total record num:', tot_record_num)
    if max_data_num < tot_record_num * (len(targetColNames) - 2):
        print('data quota not enough to download all the data')
        raise ValueError

    # loop over trade dates to download stock quotes from API
    choice_data = pd.DataFrame([], columns=targetColNames)
    dump_data_size = 50000  # dump data every n records
    # write_method = 'replace'
    tmp_index = 0
    current_dump_num = 0
    for tmp_date in trade_dates:
        tmp_codes = c.sector("001004", tmp_date)
        tmp_codes = [
            tmp_codes.Data[i] for i in range(0, len(tmp_codes.Data), 2)
        ]
        tmp_codes = ','.join(tmp_codes)
        tmp_data = c.css(tmp_codes, choiceFieldName,
                         "TradeDate=%s,AdjustFlag=1" %
                         tmp_date)  # 1: unadjusted price
        for tmp_c, tmp_quote in tmp_data.Data.items():
            tmp_c = tmp_c[:-3]
            tmp_buffer = [tmp_date, tmp_c]
            tmp_buffer.extend(tmp_quote)
            choice_data.loc[tmp_index] = tmp_buffer
            tmp_index += 1

        tmp_cols = set(targetColNames) - {'date', 'code'}
        for tmp_col in tmp_cols:
            choice_data.loc[:, tmp_col] = choice_data[tmp_col].astype(
                'float')  # make sure datetype consistent

        # dump data into database by trunk (or reach the final)
        if (choice_data.shape[0] > dump_data_size) or (tmp_date
                                                       == trade_dates[-1]):
            choice_data.loc[:, 'time_stamp'] = datetime.now()
            choice_data.to_sql(targetTableName,
                               quant_engine,
                               index=False,
                               if_exists=write_method)

            # Houfuquan quotes
            for tmp_col in adjColNames:
                choice_data.loc[:, tmp_col] = choice_data[
                    tmp_col] * choice_data['adj_factor']
            choice_data = choice_data.drop('adj_factor', axis=1)
            choice_data.to_sql(adjQuoteTableName,
                               quant_engine,
                               index=False,
                               if_exists=write_method)

            current_dump_num += choice_data.shape[0]
            print('finish dump data:', current_dump_num)

            write_method = 'append'
            choice_data = pd.DataFrame([], columns=targetColNames)

    c.stop()  # logout