forked from harvey1673/pyktrader2
/
hist_vol_tool.py
223 lines (209 loc) · 8.25 KB
/
hist_vol_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import bsopt
import copy
import dateutil
import pandas as pd
import numpy as np
import math
import mysqlaccess
from scipy.stats import norm
from misc import *
SOLVER_ERROR_EPSILON = 1e-5
ITERATION_NUM = 100
ITERATION_STEP = 0.001
YEARLY_DAYS = 365.25
# Cash flow calculation for delta hedging.
# Inside the period, Vol is constant and hedging frequency is once per ndays
# bussinessDays is number of business days from the startD to expiryT
def delta_cashflow(df, vol, option_input, rehedge_period = 1, column = 'close'):
CF = 0.0
strike = option_input['strike']
otype = option_input.get('otype', 1)
expiry = option_input['expiry']
rd = option_input['rd']
rf = option_input.get('rf', rd)
dfunc_name = option_input.get('delta_func', 'bsopt.BSDelta')
delta_func = eval(dfunc_name)
nlen = len(df.index)
for pidx in range(int(nlen/rehedge_period)):
idx = pidx * rehedge_period
nxt_idx = min((pidx + 1) * rehedge_period, nlen)
if nxt_idx >= nlen -1:
break
tau = (expiry - df.index[idx]).days/YEARLY_DAYS
opt_delta = delta_func(otype, df[column][idx], strike, vol, tau, rd, rf)
CF = CF + opt_delta * (df[column][nxt_idx] - df[column][idx])
return CF
def realized_vol(df, option_input, calib_input, column = 'close'):
strike = option_input['strike']
otype = option_input.get('otype', 1)
expiry = option_input['expiry']
rd = option_input['rd']
rf = option_input.get('rf', rd)
ref_vol = calib_input.get('ref_vol', 0.5)
opt_payoff = calib_input.get('opt_payoff', 0.0)
rehedge_period = calib_input.get('rehedge_period', 1)
fwd = df[column][0]
is_dtime = calib_input.get('is_dtime', False)
pricer_func = eval(option_input.get('pricer_func', 'bsopt.BSOpt'))
if expiry < df.index[-1]:
raise ValueError, 'Expiry time must be no earlier than the end of the time series'
numTries = 0
diff = 1000.0
start_d = df.index[0]
if is_dtime:
start_d = start_d.date()
tau = (expiry - start_d).days/YEARLY_DAYS
vol = ref_vol
def func(x):
return pricer_func(otype, fwd, strike, x, tau, rd, rf) + delta_cashflow(df, x, option_input, rehedge_period, column) - opt_payoff
while diff >= SOLVER_ERROR_EPSILON and numTries <= ITERATION_NUM:
current = func(vol)
high = func(vol + ITERATION_STEP)
low = func(vol - ITERATION_STEP)
if high == low:
volnext = max(vol -ITERATION_STEP, 1e-2)
else:
volnext = vol - 2* ITERATION_STEP * current/(high-low)
if volnext < 1e-2:
volnext = vol/2.0
diff = abs(volnext - vol)
vol = volnext
numTries += 1
if diff >= SOLVER_ERROR_EPSILON or numTries > ITERATION_NUM:
return None
else :
return vol
def bs_delta_to_ratio(delta, vol, texp):
ys = norm.ppf(delta)
return math.exp((ys - 0.5 * vol * math.sqrt(texp)) * vol * math.sqrt(texp))
def realized_termstruct(option_input, data):
is_dtime = data.get('is_dtime', False)
column = data.get('data_column', 'close')
xs = data.get('xs', [0.5])
xs_cols = data.get('xs_names', ['atm'])
xs_func = data.get('xs_func', 'bs_delta_to_ratio')
xs_func = eval(xs_func)
term_tenor = data.get('term_tenor', '-1m')
df = data['dataframe']
calib_input = {}
calib_input['rehedge_period'] = data.get('rehedge_period', 1)
expiry = option_input['expiry']
otype = option_input.get('otype', 1)
ref_vol = option_input.get('ref_vol', 0.5)
rd = option_input['rd']
rf = option_input.get('rf', rd)
end_vol = option_input.get('end_vol', 0.0)
pricer_func = eval(option_input.get('pricer_func', 'bsopt.BSOpt'))
if is_dtime:
datelist = df['date']
dexp = expiry.date()
else:
datelist = df.index
dexp = expiry
xdf = df[datelist <= dexp]
datelist = datelist[datelist <= dexp]
end_d = datelist[-1]
final_value = 0.0
vol_ts = pd.DataFrame(columns = xs_cols )
roll_idx = 0
while end_d > datelist[0]:
roll_idx += 1
start_d = day_shift(end_d, term_tenor)
sub_df = xdf[(datelist <= end_d) & (datelist > start_d)]
if len(sub_df) < 2:
break
vols = []
for idx, x in enumerate(xs):
strike = sub_df[column][0]
texp = (expiry - start_d).days/YEARLY_DAYS
if idx > 0:
strike *= xs_func(xs[idx], vols[0], texp)
option_input['strike'] = strike
if end_vol > 0:
tau = (expiry - end_d).days/YEARLY_DAYS
final_value = pricer_func(otype, sub_df[column][-1], strike, end_vol, tau, rd, rf)
ref_vol = end_vol
elif end_vol == 0:
if otype:
final_value = max((sub_df[column][-1] - strike), 0)
else:
final_value = max((strike - sub_df[column][-1]), 0)
elif end_vol == None:
raise ValueError, 'no vol is found to match PnL'
calib_input['ref_vol'] = ref_vol
calib_input['opt_payoff'] = final_value
vol = realized_vol(sub_df, option_input, calib_input, column)
vols.append(vol)
end_vol = vol
end_d = start_d
tenor_str = str(roll_idx * int(term_tenor[-2])) + term_tenor[-1]
vol_ts.ix[tenor_str, :] = vols
return vol_ts
def hist_realized_vol_by_product(prodcode, start_d, end_d, periods = 12, tenor = '-1m', writeDB = False):
cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode)
contlist = contract_range(prodcode, exch, cont_mth, start_d, end_d)
print contlist
exp_dates = [get_opt_expiry(cont, inst2contmth(cont)) for cont in contlist]
data = {'is_dtime': False,
'data_column': 'close',
'xs': [0.5, 0.25, 0.75],
'xs_names': ['atm', 'd25', 'd75'],
'xs_func': 'bs_delta_to_ratio',
'rehedge_period': 1,
'term_tenor': tenor,
}
option_input = {'otype': 1,
'rd': 0.0,
'rf': 0.0,
'end_vol': 0.0,
'ref_vol': 0.2,
'pricer_func': 'bsopt.BSOpt',
'delta_func': 'bsopt.BSDelta',
}
for cont, expiry in zip(contlist, exp_dates):
if expiry > end_d:
break
p_str = '-' + str(int(tenor[1:-1]) * periods) + tenor[-1]
d_start = day_shift(expiry, p_str)
df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, d_start, expiry, database = 'hist_data')
option_input['expiry'] = expiry
data['dataframe'] = df
vol_df = realized_termstruct(option_input, data)
print cont, expiry, vol_df
def variance_ratio(ts, freqs):
data = ts.values
nlen = len(data)
res = {'n': [], 'ln':[]}
var1 = np.var(data[1:] - data[:-1])
lnvar1 = np.var(np.log(data[1:]/data[:-1]))
for freq in freqs:
nrow = nlen/freq
nsize = freq * nrow
shaped_arr = np.reshape(data[:nsize], (nrow, freq))
diff = shaped_arr[1:,freq-1] - shaped_arr[:-1,freq-1]
res['n'].append(np.var(diff)/freq/var1)
ln_diff = np.log(shaped_arr[1:,freq-1]/shaped_arr[:-1,freq-1])
res['ln'].append(np.var(ln_diff)/freq/lnvar1)
return res
def price_stats(df):
stats = {}
stats['20H'] = max(df['high'][-20:])
stats['20H'] = min(df['low'][-20:])
def validate_db_data(tday, filter = False):
all_insts = filter_main_cont(tday, filter)
data_count = {}
inst_list = {'min': [], 'daily': [] }
for instID in all_insts:
df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday)
if len(df) <= 0:
inst_list['daily'].append(instID)
elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0:
inst_list['daily'].append(instID)
df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale')
if len(df) <= 100:
output = instID + ':' + str(len(df))
inst_list['min'].append(output)
elif df.min_id < 2055:
output = instID + ': end earlier'
inst_list['min'].append(output)
print inst_list