Пример #1
0
def fake_data(df):
    #if df.shape[1] < 2:

    df.columns = ['OPEN']
    df["HIGH"] = df.iloc[:, 0]
    df["LOW"] = df.iloc[:, 0]
    df["CLOSE"] = df.iloc[:, 0]
    df["VOLUME"] = np.sign(df.iloc[:, 0]) * 1e9
    df["ADJUSTED"] = df.iloc[:, 0]
    dt_fmt = '%Y/%m/%d'

    sd = df.index[0].strftime(dt_fmt)
    #ed = df.index[-1].strftime(dt_fmt)
    ed = date.today().strftime(dt_fmt)
    bd_list = get_business_date_list(fmt=dt_fmt)
    print(sd, ed, type(bd_list))
    short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)])
    newdf = df.copy(deep=True)
    #print('newdf\n',newdf)
    ''' 
    newdf = newdf.reindex(short_bd_list).ffill(limit=10)
    df = newdf
    '''
    newdf = newdf.reindex(short_bd_list)
    df = df.append(newdf)
    df.sort_index(inplace=True)
    df = df[~df.index.duplicated(keep='first')]
    df.ffill(limit=3, inplace=True)
    df.bfill(limit=3, inplace=True)

    #print('test',df)
    return df
Пример #2
0
def amend_daily_data(i,sd,ed,dk,ded):
    dt_series = None
    try:
        dt_series = (pd.read_sql_table(table_name=i, con=ded)['date'].sort_values())
    except Exception as e:
        print(e)
    df = None
    if dt_series is None or dt_series.empty:
        if dk == 'fund_nav':
            df = fetch_fund_data(i,sd,ed,dk) 
        #elif dk == 'index':
        #    df = fetch_index_data(i,sd,ed,dk)
        else:
            df = fetch_daily_data(i,sd,ed,dk) 
    else:
        dt_set = set(dt_series)
        bd_list = get_business_date_list(fmt='%Y%m%d')
        print('sd/ed',sd,ed )
        bd_list = (bd_list[(bd_list > sd) & (bd_list < ed)])
        bd_set = set(bd_list)
        missing_dates = dt_set.union(bd_set)  - dt_set.intersection(bd_set)
        missing_dates = sorted(list(missing_dates))
        df = pd.DataFrame()
        if missing_dates is None:
            return None 
        print('missing_dates',(missing_dates))
        print('dt_series',dt_series)
        print('bd_list',bd_list)
        
        dt_begin = missing_dates[0]
        fast_mode = False 
        for dt in missing_dates[1:] :
            if dt > :
                continue
            pd_dt = pd.to_datetime(dt)
            dt_diff = pd_dt - pd.to_datetime(dt_begin)
            if fast_mode and dt_diff < timedelta(7):
                continue
            dt_end = (pd_dt - timedelta(1)).strftime('%Y%m%d')
            if dt_diff > timedelta(31):
                dt_end = dt_begin
            tmpdf = fetch_daily_data(i, dt_begin, dt_end, dk)
            print('amending date:',dt_begin,dt_end,dt_diff)
            dt_begin = dt 
            time.sleep(0.10)
            if tmpdf is None:
                continue
            print(tmpdf)
            df = pd.concat([df, tmpdf]).drop_duplicates()
    print('amend_daily_data',i)
    print(df)
    return df
Пример #3
0
def fill_missing_data(fin, fout, index_col, zfix):
    df = None
    try:
        df = pd.read_csv(fin, index_col=index_col, parse_dates=True)
    except Exception as err:
        print(str(err))
    dt_fmt = '%Y-%m-%d'

    if df.shape[1] < 2:
        df.columns = ['open']

    if df.empty:
        return False
    sd = df.index[0].strftime(dt_fmt)
    ed = df.index[-1].strftime(dt_fmt)
    bd_list = get_business_date_list(fmt=dt_fmt)
    print(sd, ed, type(bd_list))
    short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)])
    print('jzcheck', df.iloc[-10:, ])
    print(short_bd_list)
    df.sort_index(inplace=True)
    try:
        #df.index.df.drop_duplicates(inplace=True)
        df = df[~df.index.duplicated()]

        #df = df.drop_duplicates()
    except Exception as err:
        print('jzerror:', str(err))
    print('jzcheck2', df.iloc[-10:, ])
    df.sort_index(inplace=True)
    df = df.reindex(short_bd_list, method='ffill')
    df = df.fillna(method='ffill')

    if df.shape[1] < 2:
        df = fake_data(df)
    elif re.match(r'.*FX\.csv$', fin.split('/')[-1]):
        df = fake_data(df, adjflag=True)

    if zfix:
        zfix_dt = pd.to_datetime(bd_list[bd_list > ed][0])
        zseries = df.iloc[-1, ]
        zdf = pd.DataFrame(data=zseries, index=[zfix_dt])
        zseries.name = zfix_dt
        df = df.append(zseries)
        print('zfix: appended extra row', zfix_dt)
    df.index.names = ['date']
    df.sort_index().round(7).to_csv(fout,
                                    index=True,
                                    date_format=dt_fmt,
                                    na_rep='')
    return True
Пример #4
0
def fill_missing_data(fin, fout, index_col, zfix):
    df = None
    try:
        df = pd.read_csv(fin, index_col=index_col, parse_dates=True)
    except Exception as err:
        print(str(err))
    dt_fmt = '%Y%m%d'

    if df.shape[1] < 2:
        df.columns = ['close']

    if df.empty:
        return False
    sd = df.index[0].strftime(dt_fmt)
    ed = df.index[-1].strftime(dt_fmt)
    bd_list = get_business_date_list(fmt=dt_fmt)
    print(sd, ed, type(bd_list))
    short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)])
    df = df.reindex(short_bd_list, method='ffill')
    df = df.fillna(method='ffill')

    if df.shape[1] < 2:
        df = fake_data(df)

    if zfix:
        zfix_dt = pd.to_datetime(bd_list[bd_list > ed][0])
        zseries = df.iloc[-1, ]
        zdf = pd.DataFrame(data=zseries, index=[zfix_dt])
        zseries.name = zfix_dt
        df = df.append(zseries)
        print('zfix: appended extra row', zfix_dt)
    df.index.names = ['date']
    df.sort_index().round(7).to_csv(fout,
                                    index=True,
                                    date_format=dt_fmt,
                                    na_rep='')
    return True
Пример #5
0
def main():
    import getopt, sys
    try:
        opts, args = getopt.getopt(sys.argv[1:], "d:u:e:hoclv",
                                   ["datakey=", "help"])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(2)
    uname = pwd.getpwuid(os.getuid()).pw_name
    sys.path.append('/work/' + uname + '/project/zlib/')
    from zutils import get_prev_business_date, get_business_date_list
    bdl = get_business_date_list(fmt='%Y%m%d')
    output_flag = False
    conv_flag = False
    link_flag = False
    verbose = False
    dkey = 'opt'
    edate = None
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-d", "--datakey"):
            dkey = a
        elif o == '-u':
            uname = a
        elif o == '-o':
            output_flag = True
        elif o == '-c':
            conv_flag = True
        elif o == '-e':
            edate = a
        elif o == '-l':
            link_flag = True
        else:
            assert False, 'unhandled option'

    print(dkey)
    edate = get_prev_business_date(date.today(),
                                   -1) if edate is None else edate
    sdate = get_prev_business_date(date.today() - timedelta(7),
                                   -1)  #.strftime("%Y%m%d")
    print(sdate, edate)

    input_path = '/work/' + uname + '/input/' + dkey + '/'

    if dkey in ('opt', 'fut', 'fund', 'fund_nav', 'index', 'stock'):
        if dkey in ('stock'):
            for k in fs_list:
                get_db_data(input_path,
                            sdate,
                            edate,
                            uname,
                            bdt_list=bdl,
                            dk=dkey,
                            d_type=k,
                            oflag=output_flag,
                            lflag=link_flag)
        elif dkey in ('index'):
            for k in ix_list:
                get_db_data(input_path,
                            sdate,
                            edate,
                            uname,
                            bdt_list=bdl,
                            dk=dkey,
                            d_type=k,
                            oflag=output_flag,
                            lflag=link_flag)

        get_db_data(input_path,
                    sdate,
                    edate,
                    uname,
                    bdt_list=bdl,
                    dk=dkey,
                    d_type='basic',
                    oflag=output_flag,
                    lflag=link_flag)
        get_db_data(input_path,
                    sdate,
                    edate,
                    uname,
                    bdt_list=bdl,
                    dk=dkey,
                    d_type='daily',
                    oflag=output_flag,
                    lflag=link_flag)