def load_data_calc_output_and_check_forward_looking(strategy): """ :param strategy: function with data loading and output calculation :return: whole output """ qdc.MAX_DATE_LIMIT = None qdc.MAX_DATETIME_LIMIT = None log_info("Computing of the whole output...") whole_output = strategy() last_date = datetime.datetime.now().date() last_date = last_date - datetime.timedelta( days=FORWARD_LOOKING_TEST_OFFSET) qdc.MAX_DATE_LIMIT = last_date qdc.MAX_DATETIME_LIMIT = datetime.datetime.combine(last_date, datetime.time.min) log_info("Computing of the cropped output...") cropped_output = strategy() qdc.MAX_DATE_LIMIT = None qdc.MAX_DATETIME_LIMIT = None check_forward_looking(cropped_output, whole_output) return whole_output
def check_forward_looking(cropped_output, whole_output): cropped_output = sort_and_crop_output(cropped_output) whole_output = sort_and_crop_output(whole_output) max_time = min(cropped_output.coords[ds.TIME].values.max(), whole_output.coords[ds.TIME].values.max()) cropped_output = cropped_output.loc[:max_time] whole_output = whole_output.loc[:max_time] cropped_output, whole_output = xr.align(cropped_output, whole_output, join='outer') cropped_output = cropped_output.fillna(0) whole_output = whole_output.fillna(0) diff = whole_output - cropped_output # print(diff.where(diff!=0).dropna('time', 'all').dropna('asset','all')) delta = abs(diff).max().values if delta > FORWARD_LOOKING_TEST_DELTA: log_info('WARNING: This strategy uses forward looking! Delta = ' + str(delta)) return True else: log_info('Ok. There is no forward looking.') return False
def write(state): if state is None: return path = get_env("OUT_STATE_PATH", "state.out.pickle.gz") with gzip.open(path, 'wb') as gz: pickle.dump(state, gz) log_info("State saved.")
def load_data( assets: tp.List[tp.Union[dict, str]] = None, min_date: tp.Union[str, datetime.date, None] = None, max_date: tp.Union[str, datetime.date, None] = None, dims: tp.Tuple[str, str, str] = (ds.FIELD, ds.TIME, ds.ASSET), forward_order: bool = True, tail: tp.Union[datetime.timedelta, float, int] = DEFAULT_TAIL) -> xr.DataArray: """ :param assets: list of ticker names to load :param min_date: first date in data :param max_date: last date of data :param dims: tuple with ds.FIELD, ds.TIME, ds.ASSET in the specified order :param forward_order: boolean, set true if you need the forward order of dates, otherwise the order is backward :param tail: datetime.timedelta, tail size of data. min_date = max_date - tail :return: xarray DataArray with historical data for selected assets """ t = time.time() data = load_origin_data(assets=assets, min_date=min_date, max_date=max_date, tail=tail) log_info("Data loaded " + str(round(time.time() - t)) + "s") data = adjust_by_splits(data, False) data = data.transpose(*dims) if forward_order: data = data.sel(**{ds.TIME: slice(None, None, -1)}) data.name = "stocks" return data
def calc_output_and_check_forward_looking(data, strategy): """ :param data: loaded data xarray :param strategy: function, that calculates outputs using provided data :return: output """ cropped_data = data last_date = data.coords[ds.TIME].values.max() last_date = str(last_date)[0:10] last_date = datetime.datetime.strptime(last_date, '%Y-%m-%d').date() last_date = last_date - datetime.timedelta( days=FORWARD_LOOKING_TEST_OFFSET) last_date = str(last_date) if data.coords[ds.TIME][0] < data.coords[ds.TIME][-1]: cropped_data = cropped_data.loc[{ds.TIME: slice(None, last_date)}] else: cropped_data = cropped_data.loc[{ds.TIME: slice(last_date, None)}] cropped_data = cropped_data.dropna(ds.ASSET, 'all') cropped_data = cropped_data.dropna(ds.TIME, 'all') log_info("Computing of the cropped output...") cropped_output = strategy(cropped_data) log_info("Computing of the whole output...") whole_output = strategy(data) check_forward_looking(cropped_output, whole_output) return whole_output
def show_table(row_offset, column_offset): try: from IPython.display import display display(output.iloc[row_offset:row_offset + tail_r, column_offset:column_offset + tail_c]) except: log_info(output.iloc[row_offset:row_offset + tail_r, column_offset:column_offset + tail_c])
def calc_correlation(relative_returns, suppress_exception=True): try: if "SUBMISSION_ID" in os.environ and os.environ["SUBMISSION_ID"] != "": log_info("correlation check disabled") return [] ENGINE_CORRELATION_URL = get_env( "ENGINE_CORRELATION_URL", "https://quantiacs.io/referee/submission/forCorrelation") STATAN_CORRELATION_URL = get_env( "STATAN_CORRELATION_URL", "https://quantiacs.io/statan/correlation") PARTICIPANT_ID = get_env("PARTICIPANT_ID", "0") with request.urlopen(ENGINE_CORRELATION_URL + "?participantId=" + PARTICIPANT_ID) as response: submissions = response.read() submissions = json.loads(submissions) submission_ids = [s['id'] for s in submissions] rr = relative_returns.to_netcdf(compute=True) rr = gzip.compress(rr) rr = base64.b64encode(rr) rr = rr.decode() cofactors = [] chunks = [ submission_ids[x:x + 50] for x in range(0, len(submission_ids), 50) ] for c in chunks: r = {"relative_returns": rr, "submission_ids": c} r = json.dumps(r) r = r.encode() with request.urlopen(STATAN_CORRELATION_URL, r) as response: cs = response.read() cs = json.loads(cs) cofactors = cofactors + cs result = [] for c in cofactors: sub = next( (s for s in submissions if str(c['id']) == str(s['id']))) sub['cofactor'] = c['cofactor'] sub['sharpe_ratio'] = c['sharpe_ratio'] result.append(sub) return result except Exception as e: log_err("WARNING! Can't calculate correlation.") if suppress_exception: import logging logging.exception("network error") return [] else: raise e
def load_output(fn, date): output = xr.open_dataarray(fn, cache=False) output = output.compute() if 'time' not in output.coords: log_info('append dimension') output = xr.concat([output], pd.DatetimeIndex([date], name='time')) output.coords['asset'] = [ idt.translate_server_id_to_user_id(id) for id in output.asset.values ] return output
def read(path=None): if path is None: path = get_env("IN_STATE_PATH", "state.in.pickle.gz") try: with gzip.open(path, 'rb') as gz: res = pickle.load(gz) log_info("State loaded.") return res except Exception as e: log_err("Can't load state.", e) return None
def check_output(output, data_type='stocks'): if data_type != 'stocks' and data_type != 'stocks_long' and data_type != 'futures' \ and data_type != 'crypto' and data_type != 'crypto_futures' and data_type != 'cryptofutures': log_err("Unsupported data_type", data_type) return in_sample_points = qnt.stats.get_default_is_period_for_type(data_type) min_date = qnt.stats.get_default_is_start_date_for_type(data_type) output_tail = output.where(output.time > np.datetime64(min_date)).dropna( 'time', 'all') if len(output_tail) < in_sample_points: log_err("ERROR! In sample period does not contain enough points. " + str(len(output_tail)) + " < " + str(in_sample_points)) else: log_info("Ok. In sample period contains enough points." + str(len(output_tail)) + " >= " + str(in_sample_points)) log_info() log_info("Load data...") data = qnt.data.load_data_by_type( data_type, assets=output.asset.values.tolist(), min_date=(pd.Timestamp(min_date) - pd.Timedelta(days=60)).to_pydatetime()) log_info() qnt.output.check(output, data)
def run_iterations(time_series, data, window, start_date, lookback_period, strategy, step, collect_all_states): def copy_window(data, dt, tail): return copy.deepcopy(window(data, dt, tail)) log_info("Run iterations...\n") ts = np.sort(time_series) outputs = [] all_states = [] output_time_coord = ts[ts >= start_date] output_time_coord = output_time_coord[::step] i = 0 sys.stdout.flush() with progressbar.ProgressBar(max_value=len(output_time_coord), poll_interval=1) as p: state = None for t in output_time_coord: tail = copy_window(data, t, lookback_period) result = strategy(tail, copy.deepcopy(state)) output, state = unpack_result(result) if type(output) != xr.DataArray: log_err("Output is not xarray!") return if set(output.dims) != {'asset'} and set( output.dims) != {'asset', 'time'}: log_err("Wrong output dimensions. ", output.dims, "Should contain only:", {'asset', 'time'}) return if 'time' in output.dims: output = output.sel(time=t) output = output.drop(['field', 'time'], errors='ignore') outputs.append(output) if collect_all_states: all_states.append(state) i += 1 p.update(i) sys.stderr.flush() log_info("Merge outputs...") output = xr.concat(outputs, pd.Index(output_time_coord, name=qndata.ds.TIME)) return output, all_states if collect_all_states else state
def calc_sharpe_ratio_for_check(data, output, kind=None, check_dates=True): """ Calculates sharpe ratio for check according to the rules :param data: :param output: :param kind: competition type :param check_dates: do you need to check the sharpe ratio dates? :return: """ import qnt.stats as qns if kind is None: kind = data.name start_date = qns.get_default_is_start_date_for_type(kind) sdd = pd.Timestamp(start_date) osd = pd.Timestamp( output.where(abs(output).sum('asset') > 0).dropna( 'time', 'all').time.min().values) dsd = pd.Timestamp(data.time.min().values) if check_dates: if (dsd - sdd).days > 10: log_err("WARNING! There are not enough points in the data") log_err("The first point(" + str(dsd.date()) + ") should be earlier than " + str(sdd.date())) log_err("Load data more historical data.") else: if len(data.sel(time=slice(None, sdd)).time) < 15: log_err( "WARNING! There are not enough points in the data for the slippage calculation." ) log_err( "Add 15 extra data points to the data head (load data more historical data)." ) if (osd - sdd).days > 7: log_err("WARNING! There are not enough points in the output.") log_err("The output series should start from " + str(sdd.date()) + " or earlier instead of " + str(osd.date())) sd = max(sdd, dsd) sd = sd.to_pydatetime() fd = pd.Timestamp(data.time.max().values).to_pydatetime() log_info("Period: " + str(sd.date()) + " - " + str(fd.date())) output_slice = align(output, data.time, sd, fd) rr = qns.calc_relative_return(data, output_slice) sr = qns.calc_sharpe_ratio_annualized(rr) sr = sr.isel(time=-1).values return sr
def write(output): """ writes output in the file for submission :param output: xarray with daily weights """ import qnt.data.id_translation as idt from qnt.data.common import ds, get_env, track_event output = output.copy() output.coords[ds.ASSET] = [idt.translate_user_id_to_server_id(id) for id in output.coords[ds.ASSET].values] output = normalize(output) data = output.to_netcdf(compute=True) data = gzip.compress(data) path = get_env("OUTPUT_PATH", "fractions.nc.gz") log_info("Write output: " + path) with open(path, 'wb') as out: out.write(data) track_event("OUTPUT_WRITE")
def run_init(): if os.path.exists("init.ipynb"): log_info("Run init.ipynb..") cmd = " jupyter nbconvert --to html --ExecutePreprocessor.timeout=1800 --execute init.ipynb --stdout " + \ "| html2text -utf8" # "\\\n 2>&1" log_info("cmd:", cmd) log_info("output:") proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, executable='bash') for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): sys.stdout.write(line) proc.wait() code = proc.returncode log_info("return code:", code)
def show_asset_stat(asset): if asset in stat_per_asset.asset.values.tolist(): out = output.sel(asset=[asset]) stat = stat_per_asset.sel(asset=asset) else: out = output stat = stat_global log_info("Output:") display_scrollable_output_table(out) log_info("Stats:") display_scrollable_stats_table(stat) make_major_plots(stat) log_info("---")
def assemble_output(add_mode='all'): log_info("Merge outputs...") files = os.listdir(result_dir) files = [f for f in files if f.endswith(".fractions.nc.gz")] files.sort() output = None if len(files) == 0: log_err("ERROR! There are no outputs.") for f in files: date = f.split(".")[0] date = datetime.date.fromisoformat(date) fn = result_dir + "/" + f _output = load_output(fn, date) _output = _output.where(_output.time <= np.datetime64(date)).dropna( 'time', 'all') if len(_output) == 0: continue if output is None: log_info("init output:", fn, str(_output.time.min().values)[:10], str(_output.time.max().values)[:10]) output = _output else: if add_mode == 'all': _output = _output.where( _output.time > output.time.max()).dropna('time', 'all') elif add_mode == 'one': _output = _output.where( _output.time == np.datetime64(date)).dropna('time', 'all') else: raise Exception("wrong add_mode") if len(_output) == 0: continue log_info("add output:", fn, str(_output.time.min().values)[:10], str(_output.time.max().values)[:10]) output = xr.concat([output, _output], dim="time") return output
def load_facts( ciks: tp.List[str], facts: tp.List[str], types: tp.Union[None, tp.List[str]] = None, skip_segment: bool = False, period: tp.Union[str, None] = None, # 'A', 'S', 'Q' columns: tp.Union[tp.List[str], None] = None, min_date: tp.Union[str, datetime.date, None] = None, max_date: tp.Union[str, datetime.date, None] = None, tail: tp.Union[datetime.timedelta, float, int] = DEFAULT_TAIL, group_by_cik: bool = False) -> tp.Generator[dict, None, None]: """ Load SEC Forms (Fundamental data) :param ciks: list of cik (you can get cik from asset id) :param types: list of form types: ['10-K', '10-Q', '10-K/A', '10-Q/A'] :param facts: list of facts for extraction, for example: ['us-gaap:Goodwill'] :param skip_segment: skip facts with segment :param period: fact periods ('Q', 'A' or 'S') :param columns: list of columns to load: ['fact_name','unit_type','unit','segment','period_type','period','period_length','report_type','report_url','report_date'] :param min_date: min form date :param max_date: max form date :param tail: datetime.timedelta, tail size of data. min_date = max_date - tail :return: generator """ track_event("DATA_SECGOV_FACTS") max_date = parse_date(max_date) if min_date is not None: min_date = parse_date(min_date) else: min_date = max_date - parse_tail(tail) params = { 'ciks': list(set(ciks)), 'types': list(set(types)) if types is not None else None, 'facts': list(set(facts)), 'skip_segment': skip_segment, 'columns': list(set(columns)) if columns is not None else None, 'period': period, 'min_date': min_date.isoformat(), 'max_date': max_date.isoformat() } max_batch_size = min(50, SECGOV_BATCH_SIZE // len(facts)) log_info("load secgov facts...") t = time.time() for offset in range(0, len(ciks), max_batch_size): batch_ciks = [] if offset + max_batch_size > len(ciks): batch_ciks = ciks[offset:] else: batch_ciks = ciks[offset:(offset + max_batch_size)] params['ciks'] = batch_ciks params_js = json.dumps(params) raw = request_with_retry("sec.gov/facts", params_js.encode()) js = raw.decode() facts = json.loads(js) if group_by_cik: facts = sorted(facts, key=lambda k: k['cik']) groups = itertools.groupby(facts, key=lambda f: f['cik']) for g in groups: yield (g[0], list(g[1])) else: for f in facts: yield f log_info("fetched chunk", (offset // max_batch_size + 1), '/', math.ceil(len(ciks) / max_batch_size), math.ceil(time.time() - t), 's') log_info("facts loaded.")
def check_correlation(portfolio_history, data, print_stack_trace=True): """ Checks correlation for current output. """ track_event("CHECK_CORRELATION") portfolio_history = output_normalize(portfolio_history) rr = calc_relative_return(data, portfolio_history) try: cr_list = calc_correlation(rr, False) except: import logging if print_stack_trace: logging.exception("Correlation check failed.") else: log_err("Correlation check failed.") return log_info() if len(cr_list) == 0: log_info("Ok. This strategy does not correlate with other strategies.") return log_err( "WARNING! This strategy correlates with other strategies and will be rejected." ) log_err("Modify the strategy to produce the different output.") log_info( "The number of systems with a larger Sharpe ratio and correlation larger than 0.9:", len(cr_list)) log_info( "The max correlation value (with systems with a larger Sharpe ratio):", max([i['cofactor'] for i in cr_list])) my_cr = [i for i in cr_list if i['my']] log_info( "Current sharpe ratio(3y):", calc_sharpe_ratio_annualized(rr, calc_avg_points_per_year(data) * 3)[-1].values.item()) log_info() if len(my_cr) > 0: log_info("My correlated submissions:\n") headers = ['Name', "Coefficient", "Sharpe ratio"] rows = [] for i in my_cr: rows.append([i['name'], i['cofactor'], i['sharpe_ratio']]) log_info(tabulate(rows, headers)) ex_cr = [i for i in cr_list if i['template']] if len(ex_cr) > 0: log_info("Correlated examples:\n") headers = ['Name', "Coefficient", "Sharpe ratio"] rows = [] for i in ex_cr: rows.append([i['name'], i['cofactor'], i['sharpe_ratio']]) log_info(tabulate(rows, headers))
def check_exposure(portfolio_history, soft_limit=0.05, hard_limit=0.1, days_tolerance=0.02, excess_tolerance=0.02, avg_period=252, check_period=252 * 5): """ Checks exposure according to the submission filters. :param portfolio_history: output DataArray :param soft_limit: soft limit for exposure :param hard_limit: hard limit for exposure :param days_tolerance: the number of days when exposure may be in range 0.05..0.1 :param excess_tolerance: max allowed average excess :param avg_period: period for the ratio calculation :param check_period: period for checking :return: """ portfolio_history = portfolio_history.loc[{ ds.TIME: np.sort(portfolio_history.coords[ds.TIME]) }] exposure = calc_exposure(portfolio_history) max_exposure = exposure.max(ds.ASSET) max_exposure_over_limit = max_exposure.where( max_exposure > soft_limit).dropna(ds.TIME) if len(max_exposure_over_limit) > 0: max_exposure_asset = exposure.sel({ ds.TIME: max_exposure_over_limit.coords[ds.TIME] }).idxmax(ds.ASSET) log_info("Positions with max exposure over the limit:") pos = xr.concat([max_exposure_over_limit, max_exposure_asset], pd.Index(['exposure', 'asset'], name='field')) log_info(pos.to_pandas().T) periods = min(avg_period, len(portfolio_history.coords[ds.TIME])) bad_days = xr.where(max_exposure > soft_limit, 1.0, 0.0) bad_days_proportion = bad_days[-check_period:].rolling(dim={ ds.TIME: periods }).mean() days_ok = xr.where(bad_days_proportion > days_tolerance, 1, 0).sum().values == 0 excess = exposure - soft_limit excess = excess.where(excess > 0, 0).sum(ds.ASSET) excess = excess[-check_period:].rolling(dim={ds.TIME: periods}).mean() excess_ok = xr.where(excess > excess_tolerance, 1, 0).sum().values == 0 hard_limit_ok = xr.where(max_exposure > hard_limit, 1, 0).sum().values == 0 if hard_limit_ok and (days_ok or excess_ok): log_info("Ok. The exposure check succeed.") return True else: log_err("WARNING! The exposure check failed.") log_info("Hard limit check: ", 'Ok.' if hard_limit_ok else 'Failed.') log_info("Days check: ", 'Ok.' if days_ok else 'Failed.') log_info("Excess check:", 'Ok.' if excess_ok else 'Failed.') return False
forward_order=True) data_pd = (data.sel(field="high").to_pandas(), data.sel(field="low").to_pandas(), data.sel(field="close").to_pandas()) t1 = time.time() adx1 = ADX(data, 14) t2 = time.time() _plus_di, _minus_di, _adx, _adxr = dms(data.sel(field="high"), data.sel(field="low"), data.sel(field="close"), 14, 14, 14) t3 = time.time() (pd_res) = dms(data_pd[0], data_pd[1], data_pd[2], 14, 14, 14) t4 = time.time() log_info(t2 - t1, t3 - t2, t4 - t3, "relative delta =", abs(adx1 - _adx).mean().values, "t(talib)/t(this) =", (t2 - t1) / (t3 - t2)) import matplotlib.pyplot as plt plt.plot(adx1.coords[ds.TIME].values, adx1.sel(asset='NASDAQ:AAPL').values, 'r') plt.plot(_adx.coords[ds.TIME].values, _adx.sel(asset='NASDAQ:AAPL').values, 'g') plt.show()
def clean(output, data, kind=None, debug=True): """ Checks the output and fix common errors: - liquidity - missed dates - exposure - normalization :param output: :param data: :param kind: :return: """ import qnt.stats as qns import qnt.exposure as qne from qnt.data.common import ds, f, track_event if kind is None: kind = data.name output = output.drop(ds.FIELD, errors='ignore') with LogSettings(err2info=True): log_info("Output cleaning...") single_day = ds.TIME not in output.dims if not single_day: track_event("OUTPUT_CLEAN") if single_day: output = output.drop(ds.TIME, errors='ignore') output = xr.concat([output], pd.Index([data.coords[ds.TIME].values.max()], name=ds.TIME)) else: log_info("ffill if the current price is None...") output = output.fillna(0) output = output.where(np.isfinite(data.sel(field='close'))) output = output.ffill('time') output = output.fillna(0) if kind == "stocks" or kind == "stocks_long": log_info("Check liquidity...") non_liquid = qns.calc_non_liquid(data, output) if len(non_liquid.coords[ds.TIME]) > 0: log_info("WARNING! Strategy trades non-liquid assets.") log_info("Fix liquidity...") is_liquid = data.sel(field=f.IS_LIQUID) is_liquid = xr.align(is_liquid, output, join='right')[0] output = xr.where(is_liquid == 0, 0, output) log_info("Ok.") if not single_day: log_info("Check missed dates...") missed_dates = qns.find_missed_dates(output, data) if len(missed_dates) > 0: log_info("WARNING! Output contain missed dates.") log_info("Adding missed dates and set zero...") add = xr.concat([output.isel(time=-1)] * len(missed_dates), pd.DatetimeIndex(missed_dates, name="time")) add = xr.full_like(add, np.nan) output = xr.concat([output, add], dim='time') output = output.fillna(0) if kind == "stocks" or kind == "stocks_long": output = output.where(data.sel(field='is_liquid') > 0) output = output.dropna('asset', 'all').dropna('time', 'all').fillna(0) output = normalize(output) else: log_info("Ok.") if kind == 'stocks_long': log_info("Check positive positions...") neg = output.where(output < 0).dropna(ds.TIME, 'all') if len(neg.time) > 0: log_info("WARNING! Output contains negative positions. Clean...") output = output.where(output >= 0).fillna(0) else: log_info("Ok.") if kind == "stocks" or kind == "stocks_long": log_info("Check exposure...") if not qns.check_exposure(output): log_info("Cut big positions...") output = qne.cut_big_positions(output) log_info("Check exposure...") if not qns.check_exposure(output): log_info("Drop bad days...") output = qne.drop_bad_days(output) if kind == "crypto": log_info("Check BTC...") if output.where(output != 0).dropna("asset", "all").coords[ds.ASSET].values.tolist() != ['BTC']: log_info("WARNING! Output contains not only BTC.") log_info("Fixing...") output=output.sel(asset=['BTC']) else: log_info("Ok.") log_info("Normalization...") output = normalize(output) log_info("Output cleaning is complete.") return output
url = tracking_host + '/engine/tracklib?apiKey=' + api_key + '&event=' + event if 'STRATEGY_ID' in os.environ: url = url + '&strategyId=' + os.environ.get('STRATEGY_ID', '') t = threading.Thread(target=get_url_silent, args=(url,)) t.start() def get_url_silent(url): try: urllib.request.urlopen(url) except: pass if __name__ == '__main__': log_info(parse_max_datetime_from_url('http://hl.datarelay:7070/last/2020-10-07T10/')) log_info(parse_max_datetime_from_url('http://hl.datarelay:7070/last/2016-10-28/')) # t = parse_max_datetime_from_url('http://hl.datarelay:7070/last/2020-10-07T10/') # print(datetime.datetime.combine(t.date(), datetime.time.min)) # TODO Strange stuff, need to check usage def from_xarray_3d_to_dict_of_pandas_df(xarray_data): assets_names = xarray_data.coords[ds.ASSET].values pandas_df_dict = {} for asset_name in assets_names: pandas_df_dict[asset_name] = xarray_data.loc[:, :, asset_name].to_pandas() return pandas_df_dict
def check(output, data, kind=None): """ This function checks your output and warn you if it contains errors. :return: """ import qnt.stats as qns from qnt.data.common import ds, f, get_env, track_event if kind is None: kind = data.name single_day = ds.TIME not in output.dims if single_day: output = xr.concat([output], pd.Index([data.coords[ds.TIME].values.max()], name=ds.TIME)) try: if kind == "stocks" or kind == "stocks_long": log_info("Check liquidity...") non_liquid = qns.calc_non_liquid(data, output) if len(non_liquid.coords[ds.TIME]) > 0: log_err("ERROR! Strategy trades non-liquid assets.") log_err("Multiply the output by data.sel(field='is_liquid') or use qnt.output.clean") else: log_info("Ok.") if not single_day: log_info("Check missed dates...") missed_dates = qns.find_missed_dates(output, data) if len(missed_dates) > 0: log_err("ERROR! Some dates were missed)") log_err("Your strategy dropped some days, your strategy should produce a continuous series.") else: log_info("Ok.") track_event("OUTPUT_CHECK") if kind == "stocks" or kind == "stocks_long": log_info("Check exposure...") if not qns.check_exposure(output): log_err("Use more assets or/and use qnt.output.clean") if kind == "crypto": log_info("Check BTC...") if output.where(output != 0).dropna("asset", "all").coords[ds.ASSET].values.tolist() != ['BTC']: log_err("ERROR! Output contains not only BTC.\n") log_err("Remove the other assets from the output or use qnt.output.clean") else: log_info("Ok.") if not single_day: if abs(output).sum() == 0: log_err("ERROR! Output is empty. All positions are zero.") else: # if kind == 'crypto' or kind == 'cryptofutures' or kind == 'crypto_futures': # log_info("Check holding time...") # ht = qns.calc_avg_holding_time(output) # ht = ht.isel(time=-1).values # if ht < 4: # log_err("ERROR! The holding time is too low.", ht, "<", 4) # else: # log_info("Ok.") # # if kind == 'stocks_long': # log_info("Check holding time...") # ht = qns.calc_avg_holding_time(output) # ht = ht.isel(time=-1).values # if ht < 15: # log_err("ERROR! The holding time is too low.", ht, "<", 15) # else: # log_info("Ok.") if kind == 'stocks_long': log_info("Check positive positions...") neg = output.where(output < 0).dropna(ds.TIME, 'all') if len(neg.time) > 0: log_err("ERROR! Output contains negative positions.") log_err("Drop all negative positions.") else: log_info("Ok.") log_info("Check the sharpe ratio...") sr = calc_sharpe_ratio_for_check(data, output, kind, True) log_info("Sharpe Ratio =", sr) if sr < 1: log_err("ERROR! The Sharpe Ratio is too low.", sr, '<', 1,) log_err("Improve the strategy and make sure that the in-sample Sharpe Ratio more than 1.") else: log_info("Ok.") log_info("Check correlation.") qns.check_correlation(output, data, False) except Exception as e: log_err(e)
def load_origin_data(assets=None, min_date=None, max_date=None, tail: tp.Union[datetime.timedelta, float, int] = 4 * 365): track_event("DATA_STOCKS_SERIES") setup_ids() if assets is not None: assets = [a['id'] if type(a) == dict else a for a in assets] if assets is None: assets_array = load_list(min_date=min_date, max_date=max_date, tail=tail) assets_arg = [a['id'] for a in assets_array] else: assets_arg = assets assets_arg = [idt.translate_user_id_to_server_id(id) for id in assets_arg] assets_arg = list(set(assets_arg)) # rm duplicates # load data from server if max_date is None and "LAST_DATA_PATH" in os.environ: whole_data_file_flag_name = get_env("LAST_DATA_PATH", "last_data.txt") with open(whole_data_file_flag_name, "w") as text_file: text_file.write("last") max_date = parse_date(max_date) if min_date is not None: min_date = parse_date(min_date) else: min_date = max_date - parse_tail(tail) # print(str(max_date)) if min_date > max_date: raise Exception("min_date must be less than or equal to max_date") start_time = time.time() days = (max_date - min_date).days + 1 chunk_asset_count = math.floor(BATCH_LIMIT / days) chunks = [] assets_arg.sort() for offset in range(0, len(assets_arg), chunk_asset_count): chunk_assets = assets_arg[offset:(offset + chunk_asset_count)] chunk = load_origin_data_chunk(chunk_assets, min_date.isoformat(), max_date.isoformat()) if chunk is not None: chunks.append(chunk) log_info("fetched chunk " + str(round(offset / chunk_asset_count + 1)) + "/" + str(math.ceil(len(assets_arg) / chunk_asset_count)) + " " + str(round(time.time() - start_time)) + "s") fields = [ f.OPEN, f.LOW, f.HIGH, f.CLOSE, f.VOL, f.DIVS, f.SPLIT, f.SPLIT_CUMPROD, f.IS_LIQUID ] if len(chunks) == 0: whole = xr.DataArray([[[np.nan]]] * len(fields), dims=[ds.FIELD, ds.TIME, ds.ASSET], coords={ ds.FIELD: fields, ds.TIME: pd.DatetimeIndex([max_date]), ds.ASSET: ['ignore'] })[:, 1:, 1:] else: whole = xr.concat(chunks, ds.ASSET) whole.coords[ds.ASSET] = [ idt.translate_server_id_to_user_id(id) for id in whole.coords[ds.ASSET].values ] if assets is not None: assets = sorted(assets) assets = xr.DataArray(assets, dims=[ds.ASSET], coords={ds.ASSET: assets}) whole = whole.broadcast_like(assets) whole = whole.transpose(ds.FIELD, ds.TIME, ds.ASSET) whole = whole.loc[fields, np.sort(whole.coords[ds.TIME])[::-1], np.sort(whole.coords[ds.ASSET])] return whole.dropna(ds.TIME, 'all')
return lwma(series, weights) if type(weights) is list: weights = np.array(weights, np.float64) return nda.nd_universal_adapter(wma_np_1d, (series, ), (weights, )) def lwma(series: nda.NdType, periods: int = 20): return nda.nd_universal_adapter(lwma_np_1d, (series, ), (periods, )) def vwma(price: nda.NdType, volume: nda.NdType, periods: int = 20): return nda.nd_universal_adapter(vwma_np_1d, (price, volume), (periods, )) if __name__ == '__main__': log_info(np.divide(1., 0.)) d1_array = np.array([0, 1, 2, 3, 4, np.nan, 5, np.nan, 6, 7], np.double) d1_result_lwma = lwma(d1_array, 3) d1_result_wma = wma(d1_array, [3, 2, 1]) d1_result_vwma = vwma(d1_array, d1_array, 3) log_info("d1_array:\n", d1_array, '\n') log_info('d1_result_lwma:\n', d1_result_lwma) log_info('d1_result_wma:\n', d1_result_wma) log_info('d1_result_vwma:\n', d1_result_vwma) log_info('---') np_array = np.array([[ [1, 2, 3, 4], [5, 6, 7, 8], ], [
def tema(series: nda.NdType, periods: int = 20, warm_periods: tp.Union[int, None] = None) -> nda.NdType: """ Triple Exponential Moving Average """ ma = ema(series, periods, warm_periods) ma = ema(ma, periods, warm_periods) ma = ema(ma, periods, warm_periods) return ma if __name__ == '__main__': d1_array = np.array([0, 1, 2, 3, 4, np.nan, 5, np.nan, 6, 7], np.double) d1_result = ema(d1_array, 3) log_info("d1_array:\n", d1_array, '\n') log_info('d1_result:\n', d1_result) log_info('---') date_rng = pd.date_range(start='2018-01-01', end='2018-01-10', freq='D') series_in = pd.Series(d1_array, date_rng) series_out = ema(series_in, 3) log_info("series_in:\n", series_in, '\n') log_info('series_out:\n', series_out) log_info('---') np_array = np.array([[ [1, 2, 3, 4], [5, 6, 7, 8], ], [ [2, 3, 4, 5],
:return: """ if isinstance(prices, np.ndarray): return ad_line_np(prices) return nd_to_1d_universal_adapter(ad_line_np, (prices, ), ()) if __name__ == '__main__': from qnt.data import load_data, load_assets, ds assets = load_assets() ids = [i['id'] for i in assets[0:2000]] data = load_data(assets=ids, dims=(ds.TIME, ds.ASSET, ds.FIELD), forward_order=True).sel(field="close") adr = ad_ratio(data) adr_pd = ad_ratio(data.to_pandas()) log_info(adr.to_pandas() - adr_pd.T) adl = ad_line(data) import matplotlib.pyplot as plt plt.plot(adr.coords[ds.TIME].values, adr.values, 'r') plt.show() plt.plot(adl.coords[ds.TIME].values, adl.values, 'g') plt.show()
def evaluate_passes(data_type='stocks', passes=3, dates=None): log_info("Output directory is:", result_dir) os.makedirs(result_dir, exist_ok=True) log_info("Rm previous results...") for i in os.listdir(result_dir): fn = result_dir + "/" + i if os.path.isfile(fn): log_info("rm:", fn) os.remove(fn) if dates is None: log_info("Prepare test dates...") min_date = (pd.Timestamp( qnt.stats.get_default_is_start_date_for_type(data_type)) ).to_pydatetime() data = qnt.data.load_data_by_type(data_type, min_date=min_date) if 'is_liquid' in data.field: data = data.where(data.sel(field='is_liquid') > 0).dropna( 'time', 'all') data = data.time dates = [data.isel(time=-1).values, data.isel(time=1).values] \ + [data.isel(time=round(len(data) * (i+1)/(passes-1))).values for i in range(passes-2)] dates = list(set(dates)) dates.sort() dates = [pd.Timestamp(i).date() for i in dates] del data else: dates = [qnt.data.common.parse_date(d) for d in dates] log_info("Dates:", *(i.isoformat() for i in dates)) i = 0 for date in dates: try: os.remove(fractions_fn) except FileNotFoundError: pass try: os.remove(last_data_fn) except FileNotFoundError: pass try: os.remove(html_fn) except FileNotFoundError: pass log_info("---") i += 1 log_info("pass:"******"/", len(dates), "max_date:", date.isoformat()) if data_type == 'stocks' or data_type == 'stocks_long': timeout = 30 * 60 if data_type == 'futures': timeout = 10 * 60 if data_type == 'crypto' or data_type == 'crypto_futures' or data_type == 'cryptofutures': timeout = 5 * 60 data_url = urllib.parse.urljoin( urllib.parse.urljoin(qnt.data.common.BASE_URL, 'last/'), date.isoformat()) + "/" cmd = "DATA_BASE_URL=" + data_url + " \\\n" + \ "LAST_DATA_PATH=" + last_data_fn + " \\\n" + \ "OUTPUT_PATH=" + fractions_fn + " \\\n" + \ "SUBMISSION_ID=-1\\\n" + \ " jupyter nbconvert --to html --ExecutePreprocessor.timeout=" + str(timeout)+ " --execute strategy.ipynb --output=" + html_fn # + \ # "\\\n 2>&1" log_info("cmd:", cmd) log_info("output:") proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, executable='bash') for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): sys.stdout.write(line) proc.wait() code = proc.returncode log_info("return code:", code) if not os.path.exists(fractions_fn): log_err("ERROR! Output is not found.") if not os.path.exists(last_data_fn): log_err("ERROR! The strategy does not use all data.") if not os.path.exists(html_fn): log_err("ERROR! Conversion to html failed.") if code != 0: log_err("ERROR! Return code != 0.") if os.path.exists(fractions_fn): log_info("Check the output...") output = load_output(fractions_fn, date) if data_type == 'stocks' or data_type == 'stocks_long': qnt.stats.check_exposure(output) log_info("Load data...") data = qnt.data.load_data_by_type( data_type, assets=output.asset.values.tolist(), min_date=str(output.time.min().values)[:10], max_date=date) if data_type == 'stocks' or data_type == 'stocks_long': non_liquid = qnt.stats.calc_non_liquid(data, output) if len(non_liquid.time) > 0: log_err("ERROR! The output contains illiquid positions.") missed = qnt.stats.find_missed_dates(output, data) if len(missed) > 0: log_err("ERROR: some dates are missed in the output.", missed) else: log_info("There are no missed dates.") del data try: shutil.move( fractions_fn, result_dir + "/" + date.isoformat() + ".fractions.nc.gz") except FileNotFoundError: pass try: shutil.move(last_data_fn, result_dir + "/" + date.isoformat() + ".last_data.txt") except FileNotFoundError: pass try: shutil.move(html_fn, result_dir + "/" + date.isoformat() + ".strategy.html") except FileNotFoundError: pass log_info("---") log_info("Evaluation complete.")
import numba as nb import qnt.ta.ndadapter as nda from qnt.log import log_info, log_err @nb.jit(nb.float64[:](nb.float64[:], nb.int64), nopython=True) def shift_np_1d(series: np.ndarray, periods: int) -> np.ndarray: if periods < 0: return np.flip(shift_np_1d(np.flip(series), -periods)) tail = np.empty((periods + 1,), dtype=np.double) not_nan_cnt = 0 result = np.full(series.shape, np.nan, dtype=np.double) for i in range(series.shape[0]): if not np.isnan(series[i]): idx = not_nan_cnt % tail.shape[0] tail[idx] = series[i] if not_nan_cnt >= periods: result[i] = tail[idx - periods] not_nan_cnt += 1 return result def shift(series: nda.NdType, periods: int = 1) -> nda.NdType: return nda.nd_universal_adapter(shift_np_1d, (series,), (periods,)) if __name__ == "__main__": arr = np.array([1, 2, np.nan, 4, 5, np.nan, 7, 9, 0], np.double) sh = shift(arr, 2) log_info(sh)
return None arr = xr.open_dataarray(raw, cache=False, decode_times=True) arr = arr.compute() return arr FIRST = True def setup_ids(): global FIRST if idt.USE_ID_TRANSLATION and FIRST: js = request_with_retry('assets', None) js = js.decode() tickers = json.loads(js) idt.USE_ID_TRANSLATION = next( (i for i in tickers if i.get('FIGI') is not None), None) is not None FIRST = False if __name__ == '__main__': # import qnt.id_translation # qnt.id_translation.USE_ID_TRANSLATION = False assets = load_list() log_info(len(assets)) ids = [i['id'] for i in assets] log_info(ids) data = load_data(min_date='1998-11-09', assets=ids[-2000:]) log_info(data.sel(field='close').transpose().to_pandas())