Пример #1
0
 def test_2_prev_busday(self):
     res1 = stxcal.prev_busday(self.dt1)
     res2 = stxcal.prev_busday(self.dt2)
     res3 = stxcal.prev_busday(self.dt3)
     res5 = stxcal.prev_busday(self.dt5)
     self.assertTrue((res1 == '2016-12-14') and (res2 == '2016-12-15') and
                     (res3 == '2016-12-16') and (res5 == '2016-12-23'))
Пример #2
0
 def adjust_data(self, new_pos):
     # A new date can:
     # (1) move into a new date range, or (2) stay in the old one
     if self.pos == new_pos:
         return new_pos
     new_s, new_e = [g for g in self.num_gaps if g[0] <= new_pos <= g[1]][0]
     # in (1): undo all split adjustments in the old date range,
     # adjust for splits in the new date range, & reset self.start/self.end
     if new_s != self.start or new_e != self.end:
         sdd = self.df.index[self.start]
         for adj_dt in self.adj_splits:
             split_ratio, split_divi = self.splits.get(adj_dt)
             self.adjust(sdd, stxcal.prev_busday(adj_dt), 1 / split_ratio,
                         ['o', 'hi', 'lo', 'c'])
             if split_divi in [0, 1, 3, 6]:
                 self.adjust(sdd, stxcal.prev_busday(adj_dt), split_ratio,
                             ['volume'])
         del self.adj_splits[:]
         self.start = new_s
         self.end = new_e
         self.adjust_splits_date_range(self.start, new_pos)
     # in (2), if moving:
     # forward, adjust for splits between the old and the new position,
     # backward, undo split adjustments between the old and the new position
     else:
         if new_pos > self.pos:
             self.adjust_splits_date_range(self.pos, new_pos)
         else:
             self.adjust_splits_date_range(new_pos, self.pos, 1)
     self.pos = new_pos
Пример #3
0
 def adjust_splits_date_range(self, s_ix, e_ix, inv=0):
     bdd = self.df.index[self.start]
     sdd = self.df.index[s_ix]
     edd = self.df.index[e_ix]
     splts = {k: v for k, v in self.splits.items() if sdd < k <= edd}
     for k, v in splts.items():
         r = v[0] if inv == 0 else 1 / v[0]
         self.adjust(bdd, stxcal.prev_busday(k), r, ['o', 'hi', 'lo', 'c'])
         if v[1] in [0, 1, 3, 6]:
             self.adjust(bdd, stxcal.prev_busday(k), 1 / r, ['v'])
         self.adj_splits.append(k)
Пример #4
0
 def update_local_directory(self, crt_date):
     today_date = stxcal.today_date()
     start_of_current_month = f'{today_date[:8]}01'
     prev_month_date = stxcal.prev_busday(start_of_current_month)
     start_of_prev_month = f'{prev_month_date[:8]}01'
     zipfile_name = os.path.join(
         self.report_dir, f'{stxcal.prev_busday(start_of_prev_month)}.zip')
     logging.info(f'Archive all reports prior to {start_of_prev_month} '
                  f'in {zipfile_name}')
     pdf_file_list = glob.glob(os.path.join(self.report_dir, '*.pdf'))
     zipfile_open_mode = 'a' if os.path.isfile(zipfile_name) else 'w'
     num_archived_pdfs = 0
     z = zipfile.ZipFile(zipfile_name, zipfile_open_mode)
     for pdf_file in pdf_file_list:
         short_filename = pdf_file.split(os.path.sep)[-1]
         if short_filename < start_of_prev_month:
             z.write(pdf_file)
             num_archived_pdfs += 1
             os.remove(pdf_file)
     z.close()
     logging.info(f'Archived {num_archived_pdfs} PDF reports '\
                  f'in {zipfile_name}')
Пример #5
0
    def get_gaps(self, df):
        df['prev_dt'] = df.index.shift(-1, freq=StxTS.busday_us)
        df['prev_date'] = df.index
        df['prev_date'] = df['prev_date'].shift(1)
        s1 = pd.Series(df['prev_date'])
        s1[0] = pd.to_datetime(stxcal.prev_busday(df.index[0].date()))

        def gapfun(x):
            return stxcal.num_busdays(x['prev_date'].date(),
                                      x['prev_dt'].date())

        df['gap'] = df.apply(gapfun, axis=1)
        glist = df.loc[df['gap'] > 20].index.tolist()
        gaps = []
        if len(glist) == 0:
            gaps.append(tuple([self.sd, self.ed]))
        else:
            gaps.append(tuple([self.sd, df.loc[glist[0]]['prev_date']]))
            for i in range(1, len(glist)):
                gaps.append(
                    tuple([glist[i - 1], df.loc[glist[i]]['prev_date']]))
            gaps.append(tuple([glist[-1], self.ed]))
        return gaps
Пример #6
0
 def upload_splits(self, splits_file):
     print('Uploading stocks from file {0:s}'.format(splits_file))
     with open(splits_file, 'r') as f:
         lines = f.readlines()
     num = 0
     for line in lines:
         tokens = line.split()
         if len(tokens) < 3:
             print('Skipping line {0:s}'.format(line))
             continue
         stk = tokens[0].strip()
         dt = stxcal.prev_busday(tokens[1].strip())
         ratio = float(tokens[2].strip())
         db_cmd = "insert into {0:s} values('{1:s}','{2:s}',{3:f},0) "\
             "on conflict (stk, dt) do update set ratio={4:f}".format(
             self.divi_tbl, stk, dt, ratio, ratio)
         try:
             stxdb.db_write_cmd(db_cmd)
             num += 1
         except Exception as ex:
             print('Failed to upload split {0:s}, {1:s}, '
                   'error {2:s}'.format(stk, dt, str(ex)))
     print('Successfully uploaded {0:d} out of {1:d} stock splits'.format(
         num, len(lines)))
Пример #7
0
 def move_downloaded_options(self, start_yymm, end_yymm):
     start_yy, start_mm = start_yymm.split('-')
     start_year = int(start_yy)
     start_month = int(start_mm)
     end_yy, end_mm = end_yymm.split('-')
     end_year = int(end_yy)
     end_month = int(end_mm)
     start_date = '{0:d}-{1:02d}-01'.format(start_year, start_month)
     logging.info('start_date = {0:s}'.format(start_date))
     if not stxcal.is_busday(start_date):
         start_date = stxcal.next_busday(start_date)
     if end_month == 12:
         end_year += 1
     end_month = (end_month + 1) % 12
     end_date = '{0:d}-{1:02d}-01'.format(end_year, end_month)
     logging.info('end_date = {0:s}'.format(end_date))
     end_date = stxcal.prev_busday(end_date)
     logging.info('Moving to downloaded_options table all options dated '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     sql = 'INSERT INTO downloaded_options '\
         '(expiry, und, cp, strike, dt, bid, ask, v, oi) '\
         "SELECT * FROM options WHERE dt BETWEEN '{0:s}' AND '{1:s}' "\
         'ON CONFLICT (expiry, und, cp, strike, dt) DO NOTHING'.format(
         start_date, end_date)
     logging.info('sql cmd: {0:s}'.format(sql))
     stxdb.db_write_cmd(sql)
     logging.info('Moved to downloaded_options table all options dated '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     logging.info('Removing from options table all options downloaded '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     sql = "DELETE FROM options WHERE dt BETWEEN '{0:s}' AND '{1:s}'"\
         " ".format(start_date, end_date)
     logging.info('sql cmd: {0:s}'.format(sql))
     stxdb.db_write_cmd(sql)
     logging.info('Removed from options table all options downloaded '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
Пример #8
0
    def parse_stooq_new(self, last_db_date):
        logging.info('Checking if a new stooq file has been downloaded')
        # stooq_file = os.path.join(os.getenv('DOWNLOAD_DIR'), 'data_d.txt')
        download_dir = self.config.get('datafeed', 'download_dir')
        stooq_file = os.path.join(download_dir, 'data_d.txt')
        if not os.path.exists(stooq_file):
            logging.info('No new stooq data file found.  Nothing to do.')
            return
        logging.info('Reading stooq file, renaming columns, getting daily '
                     'US stocks data')
        df = pd.read_csv(stooq_file,
                         dtype={
                             "<TICKER>": "string",
                             "<PER>": "string",
                             "<DATE>": "string",
                             "<TIME>": "string",
                             "<OPEN>": float,
                             "<HIGH>": float,
                             "<LOW>": float,
                             "<CLOSE>": float,
                             "<VOL>": int,
                             "<OPENINT>": int
                         })
        df.columns = [x[1:-1].lower() for x in df.columns]
        stx_df = df.query('ticker.str.endswith(".US") and per == "D"',
                          engine='python').copy()
        logging.info(
            'Getting {0:d} daily US stocks out of {1:d} records'.format(
                len(stx_df), len(df)))
        stx_df['date'] = stx_df['date'].astype(str)
        stx_df['date'] = stx_df.apply(lambda r: '{0:s}-{1:s}-{2:s}'.format(
            r['date'][0:4], r['date'][4:6], r['date'][6:8]),
                                      axis=1)
        logging.info('Converted stx_df dates in yyyy-mm-dd format')
        dates = stx_df.groupby(by='date')['ticker'].count()
        next_date = stxcal.next_busday(last_db_date)
        ix0, num_dates = 0, len(dates)
        logging.info('Data available for {0:d} dates, from {1:s} to {2:s}; DB '
                     'needs data starting from {3:s}'.format(
                         len(dates), dates.index[0],
                         dates.index[num_dates - 1], next_date))
        db_dates = []
        while ix0 < num_dates:
            if dates.index[ix0] == next_date:
                break
            ix0 += 1
        for ixx in range(ix0, num_dates):
            if dates.index[ixx] == next_date and dates.values[ixx] > 9000:
                db_dates.append(dates.index[ixx])
            else:
                if dates.index[ixx] != next_date:
                    logging.error(f'Missing date {next_date}; got '
                                  f'{dates.index[ixx]} instead')

                if dates.values[ixx] < 9000:
                    logging.error(f'Not enough records ({dates.values[ixx]}) '
                                  f'available for {dates.index[ixx]}')
                break
            next_date = stxcal.next_busday(next_date)

        if not db_dates:
            logging.info('No new data available for processing. Exiting')
            return
        logging.info('Check that there are no time gaps between DB data and '
                     'upload data')
        start_date = stxcal.next_busday(last_db_date)
        num_bdays = stxcal.num_busdays(start_date, db_dates[0])
        if num_bdays > 0:
            logging.warn(
                'No data for {0:d} days ({1:s} - {2:s}). Exiting ...'.format(
                    num_bdays, start_date, stxcal.prev_busday(db_dates[0])))
            return
        logging.info('Check that there are no time gaps in the upload data')
        for ixx in range(len(db_dates) - 1):
            if stxcal.next_busday(db_dates[ixx]) != db_dates[ixx + 1]:
                logging.warn('Inconsistent dates {0:s} and {1:s} '
                             'at indexes {2:d} and {3:d}'.format(
                                 db_dates[ixx], db_dates[ixx + 1], ixx,
                                 ixx + 1))

        sel_stx_df = stx_df.query('date in @db_dates').copy()
        logging.info(
            '{0:d}/{1:d} records found for following dates: [{2:s}]'.format(
                len(sel_stx_df), len(stx_df), ', '.join(db_dates)))
        sel_stx_df['invalid'] = sel_stx_df.apply(
            lambda r: np.isnan(r['open']) or np.isnan(r['high']) or np.
            isnan(r['low']) or np.isnan(r['close']) or np.isnan(r['vol']) or r[
                'vol'] == 0 or r['open'] > r['high'] or r['open'] < r[
                    'low'] or r['close'] > r['high'] or r['close'] < r['low'],
            axis=1)
        valid_stx_df = sel_stx_df.query('not invalid').copy()
        logging.info('Found {0:d} valid records out of {1:d} records'.format(
            len(valid_stx_df), len(sel_stx_df)))

        def process_row(r):
            stk = r['ticker'][:-3].replace("-.", ".P.").replace("_",
                                                                ".").replace(
                                                                    '-', '.')
            o = int(100 * r['open'])
            hi = int(100 * r['high'])
            lo = int(100 * r['low'])
            c = int(100 * r['close'])
            v = int(r['vol'])
            v = v // 1000
            if v == 0:
                v = 1
            lst = [stk, o, hi, lo, c, v]
            return pd.Series(lst)

        valid_stx_df[['ticker', 'open', 'high', 'low', 'close', 'vol']] = \
            valid_stx_df.apply(process_row, axis=1)
        valid_stx_df['openint'] = 2
        valid_stx_df.drop(columns=['per', 'time', 'invalid'],
                          axis=1,
                          inplace=True)
        valid_stx_df.columns = ['stk', 'dt', 'o', 'hi', 'lo', 'c', 'v', 'oi']

        with closing(stxdb.db_get_cnx().cursor()) as crs:
            sql = 'CREATE TEMPORARY TABLE temp_table ('\
                'stk VARCHAR(16) NOT NULL, '\
                'dt DATE NOT NULL, '\
                'o INTEGER NOT NULL, '\
                'hi INTEGER NOT NULL, '\
                'lo INTEGER NOT NULL, '\
                'c INTEGER NOT NULL, '\
                'v INTEGER, '\
                'oi INTEGER, '\
                'PRIMARY KEY(stk, dt))'
            crs.execute(sql)
            logging.info('Created temporary table')
            upload_data = valid_stx_df.values.tolist()
            execute_values(
                crs, 'INSERT INTO temp_table '
                '(stk, dt, o, hi, lo, c, v, oi) VALUES %s', upload_data)
            logging.info('Uploaded dataframe into temporary table')
            stxdb.db_write_cmd(
                'INSERT INTO eods (stk, dt, o, hi, lo, c, v, oi) '
                'SELECT * FROM temp_table ON CONFLICT (stk, dt) DO '
                'UPDATE SET o = EXCLUDED.o, hi = EXCLUDED.hi, '
                'lo = EXCLUDED.lo, c = EXCLUDED.c, v = EXCLUDED.v, '
                'oi = EXCLUDED.oi')
            logging.info('Uploaded data into eods table')
        last_upload_date = valid_stx_df['dt'].max()
        stxdb.db_write_cmd("UPDATE analyses SET dt='{0:s}' WHERE "
                           "analysis='eod_datafeed'".format(last_upload_date))
        logging.info('Updated latest eod datafeed date {0:s} in DB'.format(
            last_upload_date))
        self.rename_stooq_file(dates.index[0], dates.index[num_dates - 1])