def urr_vs_urr(country, until='2013-02-01', fit_style='stretched exponential'): """Returns the DataFrame containing our estimate of the ultimate recoverable resources (URR) and the official estimate. Args: country -> str The country of the oil fields. until -> str The datelike string for the "left cutoff". Return: urrs -> DataFrame The DataFrame containing our and the official estimates of the URR. """ assert country != 'UK', 'UK has no official URR estimate!' extended_production = extend_production(country, until=until, fit_style=fit_style, xmin='my_xmin') our_urrs = extended_production.sum() our_urrs.name = 'ours' classification = get_classification(country, until) their_urrs = classification.ix[our_urrs.index]['urr'] their_urrs = 1e6 * M3_TO_BARRELS * Series(their_urrs, name='theirs') return concat((our_urrs, their_urrs), axis=1)
def extend_all(country, until='2013-02-01', fit_style='stretched exponential', style='urr', xmin='my_xmin'): """Extends the production of all fields (regular, irregular, inactive and insufficient) and returns the results in single DataFrame. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. fit_style -> str The string representing the fit style. style -> str The style used for the extension of "bad" fields. Ex: 'random','urr' xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ regulars = extend_production(country, until, fit_style, xmin) bad_ones = extend_the_bad_ones(country, until, style, fit_style, xmin) production = get_production(country, until) classification = get_classification(country, until) fields = classification[classification['inactive']]['inactive'].index inactives = production[fields] extended_prod = concat((regulars, bad_ones, inactives), axis=1).sort(axis=1) return extended_prod
def extend_production(country, until='2013-02-01', fit_style='stretched exponential', xmin='my_xmin'): """Extends the production of the different oil fields into the future, given the fit style. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. fit_style -> str The string representing the fit style. xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ fpath = os.path.join(DPATH, '%s_extended_%s_%s' % (country, until[:4], fit_style[:2])) if os.path.exists(fpath): f = open(fpath) extended_production = load(f) f.close() return extended_production production = get_production(country=country, until=until) classification = get_classification(country=country, until=until) fields = classification[classification['regular']].index fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style, xmin=xmin) extended_productions = [] for field in fields: _prod = production[field] _extended_prod = _extend_production(_prod, fit_parms.ix[field], 'stretched exponential') extended_productions.append(_extended_prod) f = open(fpath, 'w') extended_production = concat(extended_productions, axis=1) dump(extended_production, f) f.close() return extended_production
def classify_fields_according_to_urr(country, until='2013-02-01', style='urr'): """Returns a list of list of fields according to the classification based on their URR. """ bounds = BOUNDS[country][until] bins = [] lower_bound = 0. for bound in bounds: bins.append((lower_bound, bound)) lower_bound = bound bins.append((bound, inf)) if not style == 'urr': urrs = extend_all(country, until, style=style).sum() else: cls = get_classification(country, until) urrs = cls['urr'] * 1e6 * M3_TO_BARRELS categories = [] for bin in bins: fields = urrs[(urrs > bin[0]) & (urrs < bin[1])].index categories.append(fields) return categories
def get_fit_parms(country, until='2013-02-01', fit_style='stretched exponential', xmin='my_xmin'): """Return a DataFrame with the fields as the index and name of the relevant fit parameters as columns. Args: country -> str. The string representing the name of the country. until -> str. The string representing the date until which we take the monthly oil production into account. This is especially useful for backtesting. fit_style -> str. The string representing the fit style. xmin -> str. The string representing the x-coordinate of the production timeseries from which the fitting is done. Return: fit_parms -> DataFrame. The DataFrame containing the fit parameters. """ fit_parms = {} if fit_style == 'exponential': fit = fit_exponential columns = ['tau', 'y0'] elif fit_style == 'stretched exponential': fit = fit_stretched_exponential columns = ['tau', 'beta', 'y0'] elif fit_style == 'power law': fit = fit_power_law columns = ['alpha', 'y0'] fpath = os.path.join(DPATH, '%s_fit_parms_%s_%s' % (country, until[:4], fit_style[:2])) if os.path.exists(fpath): f = open(fpath) fit_parms = load(f) f.close() return fit_parms production = get_production(country, until) classification = get_classification(country, until) #Returns the fields that are regular (and thus fittable) fields = classification[classification['regular']].index #We define get_xmin, a function that returns the "left cutoff value" for the #fits. if xmin == 'my_xmin': def get_xmin(field): field_xmin = classification.ix[field]['from'] return field_xmin #Filling the fit_parms DataFrame fit_parms = DataFrame(index=fields, columns=columns) for field in fields: fit_parms.ix[field] = fit(production[field], xmin=get_xmin(field)) f = open(fpath, 'w') dump(fit_parms, f) f.close() return fit_parms
def extend_the_bad_ones(country, until='2013-02-01', style='urr', fit_style='stretched exponential', xmin='my_xmin'): """Extends the production of the irregular/insufficient oil fields into the future, given the fit style. Args: country -> str The string representing the country we are dealing with. Ex: 'NO' until -> str The datetime like string used for backtesting. style -> str The style used for the extension. Ex: 'random', 'urr' fit_style -> str The string representing the fit style. xmin -> str The choice for the "left cutoff" when fitting. Return: extended_prod -> DataFrame The DataFrame of the extended production. """ ###A big mess was created with is_logistic... Taking into account the ###possibility of future rise of oil_production of irregular fields. #No URR data for UK if country == 'UK': assert style == 'random' production = get_production(country=country, until=until) classification = get_classification(country=country, until=until) fields = classification[classification['bad']].index if style == 'random': fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % ( country, until[:4])) f = open(fname) logistic_start = load(f) f.close() #The "good fields" i.e. those who are regular enough for a fit. We will #sample the decay for our "bad fields" from the good ones. _extended_production = extend_production(country, until, fit_style, xmin) samples = choice(_extended_production.columns, len(fields)) ss = [] for field, sample in zip(fields, samples): is_logistic = False if field in logistic_start.keys(): if_logistic = True start = logistic_start[field][0] cutoff = logistic_start[field][1] x, y = logistic_extrap(country, field, until, start, cutoff, 0.95) if is_logistic: tail_shape = _extended_production[sample][until:][1:].values tail = list(y) + list(y[-1]/tail_shape[0] * tail_shape) idx = _extended_production[until:][1:].index nmax = len(idx) tail = tail[:nmax] ntail = len(tail) tail = Series(tail, index=idx[:ntail], name=field) else: tail_shape = _extended_production[sample][until:][1:] #We need to scale the tail_shape to the field we want to extend. tail = production[field].dropna()[-1]/tail_shape[0] * tail_shape tail.name = field ss.append(tail) is_logistic = False future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod if style == 'urr': urr_estimates = classification['urr'].ix[fields] if country == 'NO': urr_estimates *= 1e6 * M3_TO_BARRELS fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % ( country, until[:4])) f = open(fname) logistic_start = load(f) f.close() ss = [] for field in fields: prod_until_now = production[field].sum() prod_remaining = urr_estimates[field] - prod_until_now prod_now = production[field].dropna()[-1] is_logistic = False if field in logistic_start.keys(): is_logistic = True start = logistic_start[field][0] cutoff = logistic_start[field][1] x, y = logistic_extrap('NO', field, until, logistic_start[field][0], logistic_start[field][1], 0.95) prod_now = y[-1] prod_remaining -= sum(y) #comes from the constraint that sum over future equals remaining tau = prod_remaining / prod_now #time from start in months. if is_logistic: tnow = len(production[field].dropna()) + len(y) else: tnow = len(production[field].dropna()) #comes from the constraint that p(tnow) = pnow y0 = prod_now * exp(tnow/tau) if is_logistic: lifetime = -tau * log(MIN_PROD/y0) + len(y) else: lifetime = -tau * log(MIN_PROD/y0) max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\ (MAX_DATE.month - Timestamp(until).month) if is_logistic: nfuture_months = min(max_nfuture_months, max(0, int(lifetime-tnow)+len(y))) else: nfuture_months = min(max_nfuture_months, max(0, int(lifetime-tnow))) if nfuture_months == 0: ss.append(production[field]) continue future = date_range(Timestamp(until) + MonthBegin(), periods=nfuture_months, freq='MS') if is_logistic: ly = list(y) ly.extend(y0 * exp(-arange(tnow+1, tnow+1+nfuture_months-len(y))/tau)) prod_future = array(ly) else: prod_future = y0 * exp(-arange(tnow+1, tnow+1+nfuture_months)/tau) ss.append(Series(prod_future, index=future, name=field, dtype='float64')) is_logistic = False future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod if style == 'const': urr_estimates = classification['urr'].ix[fields] if country == 'NO': urr_estimates *= 1e6 * M3_TO_BARRELS ss = [] for field in fields: prod_until_now = production[field].sum() prod_remaining = urr_estimates[field] - prod_until_now prod_now = production[field].dropna()[-5:].mean() n = prod_remaining / prod_now max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\ (MAX_DATE.month - Timestamp(until).month) nfuture_months = min(max_nfuture_months, max(0, int(n))) if nfuture_months == 0: ss.append(production[field]) continue future = date_range(Timestamp(until) + MonthBegin(), periods=nfuture_months, freq='MS') prod_future = len(future) * [prod_now] ss.append(Series(prod_future, index=future, name=field)) future_prod = concat(ss, axis=1) extended_prod = concat((production[fields], future_prod)) return extended_prod