def irregular_vs_regular(w=8, l=10, save=False): uk_regular = 'WELTON' no_irregular = 'VALE' uk_prod = get_production('UK')[uk_regular].dropna() no_prod = get_production('NO')[no_irregular].dropna() fig = plt.figure(figsize=(w, l)) ax = fig.add_subplot(211) uk_prod.plot(ax=ax, marker='x', ls='', color='b') l1 = ax.get_lines()[0] ax.set_ylabel('Production [barrels/day]') ax.set_title('Example of a regular field') ax.legend([l1], [uk_regular.lower().capitalize() + ' (UK)']) ax2 = fig.add_subplot(212) no_prod.plot(ax=ax2, marker='o', ls='', color='b') l2 = ax2.get_lines()[0] ax2.set_xlabel('Time') ax2.set_ylabel('Production [barrels/day]') ax2.set_title('Example of an irregular field') ax2.legend([l2], [no_irregular.lower().capitalize() + ' (NO)'], loc='upper left') fig.tight_layout() if save: os.chdir('article') fig.savefig('regular_and_irregular.pdf') os.chdir('..') else: plt.show()
def rate_of_discoveries(w=8, l=8, save=False): fig = plt.figure(figsize=(w, l)) fig2 = plt.figure(figsize=(w, l)) rkps_uk = load(open('data/UK_2013_rate.pkl')) rkps_no = load(open('data/NO_2013_rate.pkl')) prod_uk = get_production('UK') prod_no = get_production('NO') start_of_prod_uk = dict((field, prod_uk[field].dropna().index[0]) for field in prod_uk) start_of_prod_uk = Series(start_of_prod_uk) start_of_prod_no = dict((field, prod_no[field].dropna().index[0]) for field in prod_no) start_of_prod_no = Series(start_of_prod_no) cats_uk = classify_fields_according_to_urr('UK', '2013-02-01', 'random') cats_no = classify_fields_according_to_urr('NO') activity_uk = date_range(START['UK'], '2013-02-01', freq='MS') n_vs_ts_uk = [] for category in cats_uk: starts = array(sorted(start_of_prod_uk[category])) n = [(starts <= start).sum() for start in activity_uk] t = activity_uk n_vs_ts_uk.append((t, n)) activity_no = date_range(START['NO'], '2013-02-01', freq='MS') n_vs_ts_no = [] for category in cats_no: starts = array(sorted(start_of_prod_no[category])) n = [(starts <= start).sum() for start in activity_no] t = activity_no n_vs_ts_no.append((t, n)) small_uk, medium_uk, big_uk = n_vs_ts_uk[0], n_vs_ts_uk[1], n_vs_ts_uk[2] small_no, medium_no, big_no = n_vs_ts_no[0], n_vs_ts_no[1], n_vs_ts_no[2] ax = fig.add_subplot(311) _ax = fig2.add_subplot(111) dates_uk = small_uk[0] dates_no = small_no[0] dates_plot_uk = date_range(START['UK'], '2025-01-01', freq='MS') dates_plot_no = date_range(START['NO'], '2025-01-01', freq='MS') x1 = array([float(d.toordinal())-START['UK'].toordinal() for d in dates_plot_uk]) x2 = array([float(d.toordinal())-START['NO'].toordinal() for d in dates_plot_no]) def logi((r, k, p), x): return k * p * exp(r * x) / (k + p * (exp(r * x) - 1))
def perturb_fit_parms(country, until='2013-02-01', fit_style='stretched exponential', xmin='my_xmin', perturbation=0.1, step=0.01, parm_names=['tau', 'beta']): """We perturb the parameters of the fit and look at the impact on the cost function. Args: country -> str The string representing the country. until -> str The datelike string used for backtesting. fit_style -> str The string representing the fitting style. xmin -> str The choice for the "left cutoff" when fitting. perturbation -> float The percentage with which the fit parameters are perturbated. step -> float The resolution of the perturbation. parm_names -> list The name of the parameters we wish to perturb. Return: res -> tuple of lists Each list represents """ fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style, xmin=xmin) production = get_production(country, until) changes = arange(1.-perturbation, 1+perturbation+step, step) res = {} print 'It will be implemented later.'
def extend_all(country, until='2013-02-01', fit_style='stretched exponential', style='urr', xmin='my_xmin'): """Extends the production of all fields (regular, irregular, inactive and insufficient) and returns the results in single DataFrame. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. fit_style -> str The string representing the fit style. style -> str The style used for the extension of "bad" fields. Ex: 'random','urr' xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ regulars = extend_production(country, until, fit_style, xmin) bad_ones = extend_the_bad_ones(country, until, style, fit_style, xmin) production = get_production(country, until) classification = get_classification(country, until) fields = classification[classification['inactive']]['inactive'].index inactives = production[fields] extended_prod = concat((regulars, bad_ones, inactives), axis=1).sort(axis=1) return extended_prod
def logistic_extrap(country, field, until, start, cutoff, k_frac): """Extrapolates the the logistic fit of a field from start, only taking data from cutoff into account, until the fit reaches k_frac * k. """ prod = get_production(country, until)[field].dropna()[cutoff:] x, y = prod.index, prod.values nx = len(x) fname = os.path.join(DPATH, '%s_logistic_production_%s.pkl' % (country, until[:4])) if os.path.exists(fname): f = open(fname) rkp_dict = load(f) f.close() r, k, p = rkp_dict[field] else: r, k, p = fit_logistic(x, y, Timestamp(start)) if y[-1] > k_frac * k: return (x, y) while y[-1] < k_frac * k: x = list(x) last_date = x[-1] future = date_range(last_date, periods=120, freq='MS')[1:] x.extend(future) y = compute_logistic(x, (r, k, p), Timestamp(start)) _y = y[nx:] _y = _y[_y < k_frac * k] _x = x[nx:] _x = _x[:len(_y)] return (_x, _y)
def stretched_exponential(w=8, l=10, save=False): field = 'WELTON' prod = get_production('UK')[field].dropna() fit_parms = get_fit_parms('UK', '2013-02-01').ix[field] tau, beta, y0 = fit_parms['tau'], fit_parms['beta'], fit_parms['y0'] x, y = prepare_xy(prod) yfit = y0 * exp(-(x/abs(tau))**beta) fit = Series(yfit, index=prod.index) lower_cutoff = '1996-08-01' fig = plt.figure(figsize=(w, l)) ax = fig.gca() prod.plot(ax=ax, marker='x', ls='') l1 = ax.get_lines()[0] l2, = ax.plot(fit[lower_cutoff:].index, fit[lower_cutoff:], '-k') ax.legend([l1, l2], [field.lower().capitalize() + ' (UK)', r'$y = y0 + exp(-(\frac{t}{\tau})^\beta)$']) ax.set_xlabel('Time') ax.set_ylabel('Prodction [barrels/day]') fig.tight_layout() if save: os.chdir('article') fig.savefig('stretched_exponential.pdf') os.chdir('..') else: plt.show()
def rate_of_discoveries(country, until='2013-02-01', fit_style='logistic', confid=None, show_plot=False, style='urr'): """Fits a logistic curve to the number of fields discovered up to time t. This informs us about the underlying discovery mechanism. This mechanism can depend on size. Args: country -> str: The string representing the name of the country. until -> str: datelike string useful for backtesting (left cutoff) Return: discoveries -> DataFrame: DataFrame containing all the discoveries. """ production = get_production(country, until) fields = production.columns start_of_prod = dict((field, production[field].dropna().index[0]) for field in fields) start_of_prod = Series(start_of_prod) categories = classify_fields_according_to_urr(country, until, style) activity = date_range(START[country], until, freq='MS') n_vs_ts= [] for category in categories: starts = array(sorted(start_of_prod[category])) n = [(starts <= start).sum() for start in activity] t = activity n_vs_ts.append((t, n)) #return n_vs_ts fit_params = [] fname = os.path.join(DPATH, '%s_%s_rate.pkl' % (country, until[:4])) if os.path.exists(fname): f = open(fname) rkp_dict = load(f) f.close() for i, n_vs_t in enumerate(n_vs_ts): x, y = n_vs_t[0], n_vs_t[1] if os.path.exists(fname): (r, k, p) = rkp_dict[i] else: #res = fit_logistic(x, y, START[country], confid, show_plot) (r, k, p) = fit_logistic(x, y, START[country], confid, show_plot) fit_params.append((r, k, p)) return fit_params
def extend_production(country, until='2013-02-01', fit_style='stretched exponential', xmin='my_xmin'): """Extends the production of the different oil fields into the future, given the fit style. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. fit_style -> str The string representing the fit style. xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ fpath = os.path.join(DPATH, '%s_extended_%s_%s' % (country, until[:4], fit_style[:2])) if os.path.exists(fpath): f = open(fpath) extended_production = load(f) f.close() return extended_production production = get_production(country=country, until=until) classification = get_classification(country=country, until=until) fields = classification[classification['regular']].index fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style, xmin=xmin) extended_productions = [] for field in fields: _prod = production[field] _extended_prod = _extend_production(_prod, fit_parms.ix[field], 'stretched exponential') extended_productions.append(_extended_prod) f = open(fpath, 'w') extended_production = concat(extended_productions, axis=1) dump(extended_production, f) f.close() return extended_production
def get_fit_parms(country, until='2013-02-01', fit_style='stretched exponential', xmin='my_xmin'): """Return a DataFrame with the fields as the index and name of the relevant fit parameters as columns. Args: country -> str. The string representing the name of the country. until -> str. The string representing the date until which we take the monthly oil production into account. This is especially useful for backtesting. fit_style -> str. The string representing the fit style. xmin -> str. The string representing the x-coordinate of the production timeseries from which the fitting is done. Return: fit_parms -> DataFrame. The DataFrame containing the fit parameters. """ fit_parms = {} if fit_style == 'exponential': fit = fit_exponential columns = ['tau', 'y0'] elif fit_style == 'stretched exponential': fit = fit_stretched_exponential columns = ['tau', 'beta', 'y0'] elif fit_style == 'power law': fit = fit_power_law columns = ['alpha', 'y0'] fpath = os.path.join(DPATH, '%s_fit_parms_%s_%s' % (country, until[:4], fit_style[:2])) if os.path.exists(fpath): f = open(fpath) fit_parms = load(f) f.close() return fit_parms production = get_production(country, until) classification = get_classification(country, until) #Returns the fields that are regular (and thus fittable) fields = classification[classification['regular']].index #We define get_xmin, a function that returns the "left cutoff value" for the #fits. if xmin == 'my_xmin': def get_xmin(field): field_xmin = classification.ix[field]['from'] return field_xmin #Filling the fit_parms DataFrame fit_parms = DataFrame(index=fields, columns=columns) for field in fields: fit_parms.ix[field] = fit(production[field], xmin=get_xmin(field)) f = open(fpath, 'w') dump(fit_parms, f) f.close() return fit_parms
def extend_the_bad_ones(country, until='2013-02-01', style='urr', fit_style='stretched exponential', xmin='my_xmin'): """Extends the production of the irregular/insufficient oil fields into the future, given the fit style. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. style -> str The style used for the extension. Ex: 'random', 'urr' fit_style -> str The string representing the fit style. xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ ###A big mess was created with is_logistic... Taking into account the ###possibility of future rise of oil_production of irregular fields. #No URR data for UK if country == 'UK': assert style == 'random' production = get_production(country=country, until=until) classification = get_classification(country=country, until=until) fields = classification[classification['bad']].index if style == 'random': fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % ( country, until[:4])) f = open(fname) logistic_start = load(f) f.close() #The "good fields" i.e. those who are regular enough for a fit. We will #sample the decay for our "bad fields" from the good ones. _extended_production = extend_production(country, until, fit_style, xmin) samples = choice(_extended_production.columns, len(fields)) ss = [] for field, sample in zip(fields, samples): is_logistic = False if field in logistic_start.keys(): if_logistic = True start = logistic_start[field][0] cutoff = logistic_start[field][1] x, y = logistic_extrap(country, field, until, start, cutoff, 0.95) if is_logistic: tail_shape = _extended_production[sample][until:][1:].values tail = list(y) + list(y[-1]/tail_shape[0] * tail_shape) idx = _extended_production[until:][1:].index nmax = len(idx) tail = tail[:nmax] ntail = len(tail) tail = Series(tail, index=idx[:ntail], name=field) else: tail_shape = _extended_production[sample][until:][1:] #We need to scale the tail_shape to the field we want to extend. tail = production[field].dropna()[-1]/tail_shape[0] * tail_shape tail.name = field ss.append(tail) is_logistic = False future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod if style == 'urr': urr_estimates = classification['urr'].ix[fields] if country == 'NO': urr_estimates *= 1e6 * M3_TO_BARRELS fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % ( country, until[:4])) f = open(fname) logistic_start = load(f) f.close() ss = [] for field in fields: prod_until_now = production[field].sum() prod_remaining = urr_estimates[field] - prod_until_now prod_now = production[field].dropna()[-1] is_logistic = False if field in logistic_start.keys(): is_logistic = True start = logistic_start[field][0] cutoff = logistic_start[field][1] x, y = logistic_extrap('NO', field, until, logistic_start[field][0], logistic_start[field][1], 0.95) prod_now = y[-1] prod_remaining -= sum(y) #comes from the constraint that sum over future equals remaining tau = prod_remaining / prod_now #time from start in months. if is_logistic: tnow = len(production[field].dropna()) + len(y) else: tnow = len(production[field].dropna()) #comes from the constraint that p(tnow) = pnow y0 = prod_now * exp(tnow/tau) if is_logistic: lifetime = -tau * log(MIN_PROD/y0) + len(y) else: lifetime = -tau * log(MIN_PROD/y0) max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\ (MAX_DATE.month - Timestamp(until).month) if is_logistic: nfuture_months = min(max_nfuture_months, max(0, int(lifetime-tnow)+len(y))) else: nfuture_months = min(max_nfuture_months, max(0, int(lifetime-tnow))) if nfuture_months == 0: ss.append(production[field]) continue future = date_range(Timestamp(until) + MonthBegin(), periods=nfuture_months, freq='MS') if is_logistic: ly = list(y) ly.extend(y0 * exp(-arange(tnow+1, tnow+1+nfuture_months-len(y))/tau)) prod_future = array(ly) else: prod_future = y0 * exp(-arange(tnow+1, tnow+1+nfuture_months)/tau) ss.append(Series(prod_future, index=future, name=field, dtype='float64')) is_logistic = False future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod if style == 'const': urr_estimates = classification['urr'].ix[fields] if country == 'NO': urr_estimates *= 1e6 * M3_TO_BARRELS ss = [] for field in fields: prod_until_now = production[field].sum() prod_remaining = urr_estimates[field] - prod_until_now prod_now = production[field].dropna()[-5:].mean() n = prod_remaining / prod_now max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\ (MAX_DATE.month - Timestamp(until).month) nfuture_months = min(max_nfuture_months, max(0, int(n))) if nfuture_months == 0: ss.append(production[field]) continue future = date_range(Timestamp(until) + MonthBegin(), periods=nfuture_months, freq='MS') prod_future = len(future) * [prod_now] ss.append(Series(prod_future, index=future, name=field)) future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod
x = arange(0., len(tot_prod)) prod_multi = doublecycle(multi_parms['NO']['2003-02-01'], x) prod_multi = Series(prod_multi, index=tot_prod.index) if 'ax' not in kwargs.keys(): fig = plt.figure(figsize=(w, l)) ax = fig.gca() else: ax = kwargs['ax'] tot_prod[:'2003-02-01'].plot(ax=ax, marker='o', ls='', color='k') prod_multi[:'2003-02-01'].plot(ax=ax, ls='-', color='r') tot_prod['2003-02-01':END_DATE].plot(ax=ax, ls='-', color='b') prod_multi['2003-02-01':END_DATE].plot(ax=ax, ls='--', color='r') prod_2003_2013 = get_production('NO').sum(axis=1)['2003-02-01':].dropna() prod_2003_2013.plot(ax=ax, marker='x', ls='', color='k') l1, l2, l3, l4, l5 = ax.get_lines() ax.legend([l1, l5, l3, l4], ['Data up to 2003', 'Data from 2003 to 2013', 'Monte-Carlo forecast', 'Hubbert forecast'], loc='upper right') print prod_multi['2013-02-01':].sum() / 1e9 print tot_prod['2013-02-01':].sum() / 1e9 ax.set_xlabel('Time') ax.set_ylabel('Norwegian oil production [barrels/day]') if 'ax' not in kwargs.keys(): fig.tight_layout()