Python get_classification示例，_get_data.get_classification Python示例

示例#1

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def urr_vs_urr(country, until='2013-02-01', fit_style='stretched exponential'):
    """Returns the DataFrame containing our estimate of the ultimate recoverable
    resources (URR) and the official estimate.

    Args:
        country -> str
            The country of the oil fields.
        until -> str
            The datelike string for the "left cutoff".

    Return:
        urrs -> DataFrame
            The DataFrame containing our and the official estimates of the URR.
    """
    assert country != 'UK', 'UK has no official URR estimate!'

    extended_production = extend_production(country, until=until, 
                              fit_style=fit_style, xmin='my_xmin')
    our_urrs = extended_production.sum()
    our_urrs.name = 'ours'
    classification = get_classification(country, until)
    their_urrs = classification.ix[our_urrs.index]['urr']
    their_urrs = 1e6 * M3_TO_BARRELS * Series(their_urrs, name='theirs')

    return concat((our_urrs, their_urrs), axis=1)

示例#2

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def extend_all(country, until='2013-02-01', fit_style='stretched exponential',
    style='urr', xmin='my_xmin'):
    """Extends the production of all fields (regular, irregular, inactive and
    insufficient) and returns the results in single DataFrame.
    
     Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        fit_style -> str
            The string representing the fit style.
        style -> str
            The style used for the extension of "bad" fields. Ex: 'random','urr'
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    regulars = extend_production(country, until, fit_style, xmin)
    bad_ones = extend_the_bad_ones(country, until, style, fit_style, xmin)
    production = get_production(country, until)
    classification = get_classification(country, until)
    fields = classification[classification['inactive']]['inactive'].index
    inactives = production[fields]
    extended_prod = concat((regulars, bad_ones, inactives), axis=1).sort(axis=1)

    return extended_prod

示例#3

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def extend_production(country, until='2013-02-01', 
    fit_style='stretched exponential', xmin='my_xmin'):
    """Extends the production of the different oil fields into the future, given
    the fit style.

    Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        fit_style -> str
            The string representing the fit style.
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    fpath = os.path.join(DPATH, '%s_extended_%s_%s' % (country, until[:4], 
                fit_style[:2]))
    if os.path.exists(fpath):
        f = open(fpath)
        extended_production = load(f)
        f.close()
        return extended_production

    production = get_production(country=country, until=until)
    classification = get_classification(country=country, until=until)
    fields = classification[classification['regular']].index
    fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style,
                    xmin=xmin)

    extended_productions = []
    for field in fields:
        _prod = production[field]
        _extended_prod = _extend_production(_prod, fit_parms.ix[field], 
            'stretched exponential')
        extended_productions.append(_extended_prod)

    f = open(fpath, 'w')
    extended_production = concat(extended_productions, axis=1)
    dump(extended_production, f)
    f.close()
    return extended_production

示例#4

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def classify_fields_according_to_urr(country, until='2013-02-01', style='urr'):
    """Returns a list of list of fields according to the classification
    based on their URR.
    """
    bounds = BOUNDS[country][until]
    bins = []
    lower_bound = 0.
    for bound in bounds:
        bins.append((lower_bound, bound))
        lower_bound = bound
    bins.append((bound, inf))

    if not style == 'urr':
        urrs = extend_all(country, until, style=style).sum()
    else:
        cls = get_classification(country, until)
        urrs = cls['urr'] * 1e6 * M3_TO_BARRELS
    categories = []
    for bin in bins:
        fields = urrs[(urrs > bin[0]) & (urrs < bin[1])].index 
        categories.append(fields)

    return categories

示例#5

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def get_fit_parms(country, until='2013-02-01', 
    fit_style='stretched exponential', xmin='my_xmin'):
    """Return a DataFrame with the fields as the index and name of the relevant 
    fit parameters as columns.

    Args:
        country -> str.
            The string representing the name of the country.
        until -> str.
            The string representing the date until which we take the monthly
            oil production into account. This is especially useful for 
            backtesting.
        fit_style -> str.
            The string representing the fit style.
        xmin -> str.
            The string representing the x-coordinate of the production 
            timeseries from which the fitting is done.

    Return:
        fit_parms -> DataFrame.
            The DataFrame containing the fit parameters. 
    """
    
    fit_parms = {}
    if fit_style == 'exponential':
        fit = fit_exponential
        columns = ['tau', 'y0']
    elif fit_style == 'stretched exponential':
        fit = fit_stretched_exponential
        columns = ['tau', 'beta', 'y0']
    elif fit_style == 'power law':
        fit = fit_power_law
        columns = ['alpha', 'y0']

    fpath = os.path.join(DPATH, '%s_fit_parms_%s_%s' % (country, until[:4], 
                fit_style[:2]))

    if os.path.exists(fpath):
        f = open(fpath)
        fit_parms = load(f)
        f.close()
        return fit_parms

    production = get_production(country, until)
    classification = get_classification(country, until)
    #Returns the fields that are regular (and thus fittable)
    fields = classification[classification['regular']].index

    #We define get_xmin, a function that returns the "left cutoff value" for the
    #fits.
    if xmin == 'my_xmin':
        def get_xmin(field):
            field_xmin = classification.ix[field]['from']
            return field_xmin

    #Filling the fit_parms DataFrame
    fit_parms = DataFrame(index=fields, columns=columns)
    for field in fields:
        fit_parms.ix[field] = fit(production[field], xmin=get_xmin(field))

    f = open(fpath, 'w')
    dump(fit_parms, f)
    f.close()
    return fit_parms

示例#6

0

显示文件

文件： _analysis.py 项目： LucasFievet/Tomorrow

def extend_the_bad_ones(country, until='2013-02-01', style='urr',
    fit_style='stretched exponential', xmin='my_xmin'):
    """Extends the production of the irregular/insufficient oil fields into the
    future, given the fit style.

    Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        style -> str
            The style used for the extension. Ex: 'random', 'urr'
        fit_style -> str
            The string representing the fit style.
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    ###A big mess was created with is_logistic... Taking into account the 
    ###possibility of future rise of oil_production of irregular fields.

    #No URR data for UK
    if country == 'UK':
        assert style == 'random'

    production = get_production(country=country, until=until)
    classification = get_classification(country=country, until=until)
    fields = classification[classification['bad']].index
 
    if style == 'random':
        fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % (
                country, until[:4]))
        f = open(fname)
        logistic_start = load(f)
        f.close()
        #The "good fields" i.e. those who are regular enough for a fit. We will
        #sample the decay for our "bad fields" from the good ones.
        _extended_production = extend_production(country, until, fit_style,
                                   xmin)
        samples = choice(_extended_production.columns, len(fields))
        ss = []
        for field, sample in zip(fields, samples):
            is_logistic = False
            if field in logistic_start.keys():
                if_logistic = True
                start = logistic_start[field][0]
                cutoff = logistic_start[field][1]
                x, y = logistic_extrap(country, field, until, start, cutoff, 0.95)
            if is_logistic:
                tail_shape = _extended_production[sample][until:][1:].values
                tail = list(y) + list(y[-1]/tail_shape[0] * tail_shape)
                idx = _extended_production[until:][1:].index
                nmax = len(idx)
                tail = tail[:nmax]
                ntail = len(tail)
                tail = Series(tail, index=idx[:ntail], name=field)
            else:
                tail_shape = _extended_production[sample][until:][1:]
                #We need to scale the tail_shape to the field we want to extend.
                tail = production[field].dropna()[-1]/tail_shape[0] * tail_shape
                tail.name = field
            ss.append(tail)
            is_logistic = False
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod))

        return extended_prod

    if style == 'urr':
        urr_estimates = classification['urr'].ix[fields] 
        if country == 'NO':
            urr_estimates *= 1e6 * M3_TO_BARRELS
        fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % (
                country, until[:4]))
        f = open(fname)
        logistic_start = load(f)
        f.close()

        ss = []
        for field in fields:
            prod_until_now = production[field].sum()
            prod_remaining = urr_estimates[field] - prod_until_now
            prod_now = production[field].dropna()[-1]

            is_logistic = False
            if field in logistic_start.keys():
                is_logistic = True
                start = logistic_start[field][0]
                cutoff = logistic_start[field][1]
                x, y = logistic_extrap('NO', field, until, 
                           logistic_start[field][0], logistic_start[field][1],
                           0.95)
                prod_now = y[-1]
                prod_remaining -= sum(y)            

           #comes from the constraint that sum over future equals remaining
            tau = prod_remaining / prod_now
            #time from start in months.
            if is_logistic:
                tnow = len(production[field].dropna()) + len(y)
            else:
                tnow = len(production[field].dropna())

            #comes from the constraint that p(tnow) = pnow
            y0 = prod_now * exp(tnow/tau)

            if is_logistic:
                lifetime = -tau * log(MIN_PROD/y0) + len(y)
            else:
                lifetime = -tau * log(MIN_PROD/y0)
            
            max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\
                                      (MAX_DATE.month - Timestamp(until).month)
            if is_logistic:
                nfuture_months = min(max_nfuture_months, max(0, 
                    int(lifetime-tnow)+len(y)))
            else:
                nfuture_months = min(max_nfuture_months, 
                                     max(0, int(lifetime-tnow)))
       
            if nfuture_months == 0:
                ss.append(production[field])
                continue
            future = date_range(Timestamp(until) + MonthBegin(), 
                         periods=nfuture_months, freq='MS')
            if is_logistic:
                ly = list(y)
                ly.extend(y0 * exp(-arange(tnow+1, tnow+1+nfuture_months-len(y))/tau))
                prod_future = array(ly)
            else:
                prod_future = y0 * exp(-arange(tnow+1, tnow+1+nfuture_months)/tau)
            ss.append(Series(prod_future, index=future, name=field, dtype='float64'))
            is_logistic = False
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod))
       
        return extended_prod

    if style == 'const':
        urr_estimates = classification['urr'].ix[fields]
        if country == 'NO':
            urr_estimates *= 1e6 * M3_TO_BARRELS
        ss = []
        for field in fields:
            prod_until_now = production[field].sum()
            prod_remaining = urr_estimates[field] - prod_until_now
            prod_now = production[field].dropna()[-5:].mean()
            n = prod_remaining / prod_now
            max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\
                                      (MAX_DATE.month - Timestamp(until).month)
            nfuture_months = min(max_nfuture_months, max(0, int(n)))
 
            if nfuture_months == 0:
                ss.append(production[field])
                continue
            future = date_range(Timestamp(until) + MonthBegin(), 
                         periods=nfuture_months, freq='MS')
            prod_future = len(future) * [prod_now]
            ss.append(Series(prod_future, index=future, name=field))
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod)) 

        return extended_prod