Python KernelReg示例，statsmodels.nonparametric.kernel_regression.KernelReg Python示例

示例#1

0

显示文件

文件： lyalpha_nonparam.py 项目： thriveth/spectools

 def __init__(self,
              summaryfile=None,
              inwave=None,
              indata=None,
              inerrs=None,
              inmask=None,
              smooth=None):
     self.data = indata
     self.wave = inwave
     self.errs = inerrs
     self.mask = inmask
     if summaryfile:
         self.open_summary(summaryfile)
         # Interpolate masked areas
         self.data[self.mask] = np.nan
         self.nonnanidx = np.where(~self.mask)[0]
         self.interp = np.interp(self.wave, self.wave[self.nonnanidx],
                                 self.data[self.nonnanidx])
         self.interr = np.interp(self.wave, self.wave[self.nonnanidx],
                                 self.errs[self.nonnanidx])
     if smooth == 'll':
         lle = KernelReg(self.interp, self.wave, 'c', bw=[10])
         mean, marg = lle.fit()
         del marg
         self.smoothed = mean
     elif smooth == 'box':
         mean = np.convolve(self.data, np.array([1, 1, 1]) / 3)
     else:
         self.smoothed = self.data
     self._build_plot()

示例#2

0

显示文件

文件： DETMiner.py 项目： yangzhangs/DETMiner

def dataSmoothing3(changes):
    length = len(changes)
    x = np.linspace(1, length, num=length, endpoint=True)
    y = np.array(changes)
    kr = KernelReg(y, x, 'c')
    r_fit = KernelReg.r_squared(kr)
    #plt.figure(1)
    #plt.subplot(131)
    #plt.plot(x, y, 'go-')
    #plt.title("Original",fontsize=20)
    #plt.xlabel('Periods',fontsize=20)
    #plt.ylabel('Dockerfile Size',fontsize=20)
    #plt.grid(True)
    if length < 20:
        x1 = np.linspace(1, length, num=3 * length, endpoint=True)
    else:
        x1 = x
    y_pred, y_std = kr.fit(x1)
    #plt.subplot(132)
    #plt.plot(x1, y_pred,'bo-')
    #plt.title("Smoothing",fontsize=20)
    #plt.xlabel('Periods',fontsize=20)
    #plt.ylabel('Dockerfile Size',fontsize=20)
    #plt.grid(True)
    #plt.show()
    ynew = dataResampling(y_pred)
    xnew = np.linspace(1, 20, 20, endpoint=False)
    #plt.subplot(133)
    #plt.plot(xnew, ynew,'ro-')
    #plt.title("Resampling",fontsize=20)
    #plt.xlabel('Periods',fontsize=20)
    #plt.ylabel('Dockerfile Size',fontsize=20)
    #plt.grid(True)
    #plt.show()
    return ynew, r_fit

示例#3

0

显示文件

文件： nonlinear.py 项目： mikss/pr3

class NadarayaWatsonUNLR(UnivariateNonlinearRegressor):
    kernel: KernelReg
    bandwidth: float

    def __init__(
        self,
        bandwidth: float = 0.25,
        random_state: Optional[Union[int, np.random.RandomState]] = None,
    ):
        """Instantiates a kernel regression model.

        Args:
            bandwidth: affects the scale on which to locally average samples
            random_state: random state which effects sample bootstrapping
        """
        super().__init__(random_state)
        self.bandwidth = bandwidth

    def _fit_univariate(self, x: np.ndarray, y: np.ndarray, w: Optional[np.ndarray]) -> None:
        if w is not None:
            x, y = self.weighted_resampler(x, y, w)
        self.kernel = KernelReg(endog=y, exog=x, var_type="c", bw=[self.bandwidth])

    def predict(self, x: np.ndarray) -> np.ndarray:
        return self.kernel.fit(x)[0]

    def derivative(self, x: np.ndarray) -> np.ndarray:
        return self.kernel.fit(x)[1].ravel()

示例#4

0

显示文件

文件： metrices.py 项目： fan-ADN/ml-shared

def integrated_calibration_index_mod(y, p):
    """
    local reg 使うバージョン
    TOOD: statsmodels.nonparametric.kernel_regression.KernReg がとても遅い. C++とかで実装したほうが良いのでは?
    """
    ll = KernelReg(endog=y, exog=p, reg_type='ll', var_type='o')
    return mean_absolute_error(y, ll.fit()[0])

示例#5

0

显示文件

def get_fitted_values(week):
    
# week - for knowing for which s_spotify values to take s_streams

    # делаем working_df с которой будет работать модель
    working_df = pd.read_csv(get_paths()[1]+"all_spotify.csv")
    working_df = working_df.drop(working_df.columns[[0]], axis=1)
    
    # делаем регрессию
    y = np.array(list(working_df["streams"]))
    x_r = np.array(list(working_df["rank"]))
    x_s = np.array(list(working_df["s_streams"]))

    var_cont = (np.var(x_s))**0.5
    b_c = var_cont*(len(y)**(-1/5))
    print(b_c)

    # count ordered discrete variable bandwidth
    b_o = len(y)**(-2/5)
    print(b_o)


    reg_new = KernelReg(y, [x_r, x_s], var_type="oc", reg_type = "ll", bw = [b_o, b_c]) 
    
    df_of_needed_week = working_df[working_df["week_f_show"] == week]
    last_week_sstreams = df_of_needed_week["s_streams"][-1:].values[0]
    fit_values = reg_new.fit([[i for i in range(1,201)],[last_week_sstreams for h in range(1,201) ]])[0]
    
    return fit_values

示例#6

0

显示文件

def FWHM(wave, pertdata, mode='data', imin=False, ll_bw='cv_ls'):
    """ Mode can be data, ll, lc
    """
    fwhms = []
    imins = []
    mvels = []
    LLEs = []
    for i in tqdm(range(pertdata.shape[0])):
        data = pertdata[i, :]
        if mode in ['ll', 'lc']:
            lle = KernelReg(data, wave, 'c', reg_type=mode, bw=ll_bw)
            data = lle.fit()[0]
            LLEs.append(data)
            print('LLE bandwidth: ', lle.bw[0], end="\r")
        iplwave = np.linspace(wave.min(), wave.max(), 1000)
        ipldata = np.interp(iplwave, wave, data)
        iplidx = np.where(ipldata > ipldata.max() / 2)[0]
        vmin, vmax = iplidx.min(), iplidx.max()
        fwhms.append(iplwave[vmax] - iplwave[vmin])
        if imin:
            imins.append(1 - data.max())
            mvels.append(iplwave[ipldata.argmax()])
    if imin:
        return np.array(fwhms), np.array(mvels), np.array(imins), np.array(
            LLEs)
    return np.array(fwhms)

示例#7

0

显示文件

文件： demo.py 项目： morbult/pywafo

def kreg_demo1(hs=None, fast=True, fun='hisj'):
    """Compare KRegression to KernelReg from statsmodels.nonparametric

    Examples
    --------
    >>> kreg_demo1()
    """
    N = 100
    # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
    ei = np.array([
        -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
        0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
        -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
        0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
        -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
        0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
        -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
        0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
        0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
        -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
        0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
        0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
        -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
        -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
        -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
        -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
        -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
        0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
        -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
        -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
    x = np.linspace(0, 1, N)

    va_1 = 0.3 ** 2
    va_2 = 0.7 ** 2
    y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3 * np.exp(-(x - 1) ** 2 / (2 * va_2))
    y = y0 + ei
    kernel = Kernel('gauss', fun=fun)
    hopt = kernel.hisj(x)
    kreg = KRegression(
        x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
    if fast:
        kreg.__call__ = kreg.eval_grid_fast

    f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
    plt.figure(0)
    f.plot(label='p=0')

    kreg.p = 1
    f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
    f1.plot(label='p=1')
    # print(f1.data)
    plt.plot(x, y, '.', label='data')
    plt.plot(x, y0, 'k', label='True model')
    from statsmodels.nonparametric.kernel_regression import KernelReg
    kreg2 = KernelReg(y, x, ('c'))
    y2 = kreg2.fit(x)
    plt.plot(x, y2[0], 'm', label='statsmodel')

    plt.legend()

示例#8

0

显示文件

文件： plot_shift_examples.py 项目： adgress/PythonFramework

def smooth_xy(x, y):
    x = np.squeeze(x)
    y = np.squeeze(y)
    #v = lowess(y, x, frac=.05)
    kernel_reg = KernelReg(y, x, var_type='c', reg_type='lc')
    kernel_reg.bw = np.asarray([.01])
    y = kernel_reg.fit(x)[0]
    return x, y

示例#9

0

显示文件

 def fit(self, X, y, variable_types={}):
     self.X_shape = X.shape
     self.y_shape = y.shape
     if variable_types:
         variable_type_string = "".join([variable_types[col] for col in X.columns])
         self.model = KernelReg(y, X, variable_type_string, reg_type="ll")
     else:
         self.model = KernelReg(y, X, "c" * X.shape[1], reg_type="ll")
     return self

示例#10

0

显示文件

文件： dataframe.py 项目： akelleh/causality

 def fit(self, X, y, variable_types={}):
     self.X_shape = X.shape
     self.y_shape = y.shape
     if variable_types:
         variable_type_string = ''.join([variable_types[col] for col in X.columns])
         self.model = KernelReg(y, X, variable_type_string, reg_type='ll')
     else:
         self.model = KernelReg(y, X, 'c' * X.shape[1], reg_type='ll')
     return self

示例#11

0

显示文件

    def pred_from_loess(self, train_x, train_y, x_to_pred):
        """
    	Trains simple loess regression and returns predictions
    	"""
        kr_model = KernelReg(endog=train_y,
                             exog=train_x,
                             var_type='c',
                             bw=[self.bandwidth])

        return kr_model.fit(x_to_pred)[0]

示例#12

0

显示文件

 def __init__(self, x, y, yerr=None):
     reg = KernelReg([y], [x], var_type='c', reg_type='ll')
     vals = reg.fit(x)[0]
     self.spline = interp.UnivariateSpline(x,
                                           vals,
                                           w=np.isfinite(vals),
                                           ext='const')
     # calculate RMS and normalize to stop normalization drifting
     xs = np.linspace(np.min(x), np.max(x), 1000)
     ys = self.spline(xs)
     self.rms = np.sqrt(np.sum(ys**2) / 1000)

示例#13

0

显示文件

文件： pattern-recognition.py 项目： xuongrong86/analyzingalpha

def find_extrema(s, bw='cv_ls'):
    """
    Input:
        s: prices as pd.series
        bw: bandwith as str or array like
    Returns:
        prices: with 0-based index as pd.series
        extrema: extrema of prices as pd.series
        smoothed_prices: smoothed prices using kernel regression as pd.series
        smoothed_extrema: extrema of smoothed_prices as pd.series
    """
    # Copy series so we can replace index and perform non-parametric
    # kernel regression.
    prices = s.copy()
    prices = prices.reset_index()
    prices.columns = ['date', 'price']
    prices = prices['price']

    kr = KernelReg([prices.values], [prices.index.to_numpy()],
                   var_type='c',
                   bw=bw)
    f = kr.fit([prices.index])

    # Use smoothed prices to determine local minima and maxima
    smooth_prices = pd.Series(data=f[0], index=prices.index)
    smooth_local_max = argrelextrema(smooth_prices.values, np.greater)[0]
    smooth_local_min = argrelextrema(smooth_prices.values, np.less)[0]
    local_max_min = np.sort(
        np.concatenate([smooth_local_max, smooth_local_min]))
    smooth_extrema = smooth_prices.loc[local_max_min]

    # Iterate over extrema arrays returning datetime of passed
    # prices array. Uses idxmax and idxmin to window for local extrema.
    price_local_max_dt = []
    for i in smooth_local_max:
        if (i > 1) and (i < len(prices) - 1):
            price_local_max_dt.append(prices.iloc[i - 2:i + 2].idxmax())

    price_local_min_dt = []
    for i in smooth_local_min:
        if (i > 1) and (i < len(prices) - 1):
            price_local_min_dt.append(prices.iloc[i - 2:i + 2].idxmin())

    maxima = pd.Series(prices.loc[price_local_max_dt])
    minima = pd.Series(prices.loc[price_local_min_dt])
    extrema = pd.concat([maxima, minima]).sort_index()

    # Return series for each with bar as index
    return extrema, prices, smooth_extrema, smooth_prices

示例#14

0

显示文件

文件： blob_detector.py 项目： BerkeleyAutomation/line_cutting

 def __init__(self, f, f2, pts3d, left_pts, right_pts, oldpts3d, safety_check=False):
     self.f = f
     self.f2 = f2
     self.safety_check = safety_check
     self.pts3d = np.matrix(pts3d)
     self.minimum = np.min(self.pts3d[:,2])
     self.maximum = np.max(self.pts3d[:,2])
     self.oldpts3d = oldpts3d
     self.left_pts = left_pts
     self.right_pts = right_pts
     pts2d = []
     ptsz = []
     f3 = open("../calibration_data/camera_matrix.p", "rb")
     self.cmat = pickle.load(f3)
     f3.close()
     
     for pt in pts3d:
         pts2d.append(pt[:2])
         ptsz.append(np.ceil(pt[2] * 1000000))
     self.neigh = KNeighborsClassifier(n_neighbors=2)
     self.neigh.fit(pts2d, ptsz)
     self.f = scipy.interpolate.Rbf(np.matrix(pts3d)[:,0].ravel(), np.matrix(pts3d)[:,1].ravel(), np.matrix(pts3d)[:,2].ravel(), function='linear', epsilon=.1)
     pts3d = np.array(pts3d).T
     print pts3d.shape
     print pts3d[:2,:].shape, pts3d[2,:].shape
     self.f = KernelReg(pts3d[2,:], pts3d[:2,:], 'cc')

示例#15

0

显示文件

文件： nonparametric.py 项目： ingrid88/causality

    def __init__(self, X, causes, effects, admissable_set=[], variable_types=None, expectation=False, density=True):
        """
        We want to calculate the causal effect of X and Y through
        back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z) 
        for some admissable set of control variables, Z.  First we 
        calculate the conditional density P(Y|X,Z), then the density
        P(Z).  We find the support of Z so we can properly sum over
        it later.  variable_types are a dictionary with the column name
        pointing to an element of set(['o', 'u', 'c']), for 'ordered',
        'unordered discrete', or 'continuous'.
        """
        conditional_density_vars = causes + admissable_set
        self.causes = causes
        self.effects = effects
        self.admissable_set = admissable_set
        self.conditional_density_vars = conditional_density_vars

        if len(X) > 300 or max(len(causes+admissable_set),len(effects+admissable_set)) >= 3:
            self.defaults=EstimatorSettings(n_jobs=4, efficient=True)
        else:
            self.defaults=EstimatorSettings(n_jobs=-1, efficient=False)
        
        if variable_types:
            self.variable_types = variable_types
            dep_type      = [variable_types[var] for var in effects]
            indep_type    = [variable_types[var] for var in conditional_density_vars]
            density_types = [variable_types[var] for var in admissable_set]
        else:
            self.variable_types = self.__infer_variable_types(X)

        if 'c' not in variable_types.values():
            bw = 'cv_ml'
        else:
            bw = 'normal_reference'


        if admissable_set:            
            self.density = KDEMultivariate(X[admissable_set], 
                                  var_type=''.join(density_types),
                                  bw=bw,
                                  defaults=self.defaults)
        
        self.conditional_density = KDEMultivariateConditional(endog=X[effects],
                                                         exog=X[conditional_density_vars],
                                                         dep_type=''.join(dep_type),
                                                         indep_type=''.join(indep_type),
                                                         bw=bw,
                                                         defaults=self.defaults)
        if expectation:
            self.conditional_expectation = KernelReg(X[effects].values,
                                                 X[conditional_density_vars].values,
                                                 ''.join(indep_type),
                                                 bw='cv_ls')

        self.support = self.__get_support(X)
        
        self.discrete_variables = [ variable for variable, var_type in self.variable_types.items() if var_type in ['o', 'u']]
        self.discrete_Z = list(set(self.discrete_variables).intersection(set(admissable_set)))
        self.continuous_variables = [ variable for variable, var_type in self.variable_types.items() if var_type == 'c' ]
        self.continuous_Z = list(set(self.continuous_variables).intersection(set(admissable_set)))

示例#16

0

显示文件

class local_stack:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        N, p = X_train.shape
        self.kernel = KernelReg(y_train, X_train, var_type=p * 'c')

    def predict(self, X):
        return self.kernel.fit(X)[0]

示例#17

0

显示文件

文件： statlearning.py 项目： unionpacificy/Pacificy

class LocalRegression:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        # By default, this function will do a local linear regression
        self.regression = KernelReg(y_train, X_train, var_type='c')
        return self

    def predict(self, X_test):
        return self.regression.fit(X_test)[0]

示例#18

0

显示文件

文件： tmb_combine_all.py 项目： sishirsubedi/commonTools

def fitData(X, y, method):
    if method == "simple-lr":
        model = LinearRegression().fit(X, y)
        return model.predict(X)
    elif method == "nonpara-lr":
        model = KernelRidge(kernel='linear').fit(X, y)
        return model.predict(X)
    elif method == "nonpara-poly":
        model = KernelReg(endog=y, exog=X, var_type='c', reg_type='ll')
        x2 = np.reshape(range(600), (-1, 1))
        return model.fit(x2)[0]

示例#19

0

显示文件

文件： intensity_calibration.py 项目： fagan2888/options-microstructure

def compute_arrival_rate(volume, duration, strikes):
    volume_duration = pd.concat([volume.sum(), duration.sum()],
                                keys=['Volume', 'Duration'],
                                axis=1)
    volume_duration_kernel = volume_duration.apply(
        lambda vd: vd.groupby('Half-spread').apply(lambda d: KernelReg(
            d.xs(d.name, level='Half-spread'),
            d.xs(d.name, level='Half-spread').index, 'c', 'lc')))
    arrival_rate = volume_duration_kernel.apply(
        lambda vd: vd.groupby('Half-spread').apply(lambda k: pd.Series(
            k.xs(k.name).fit(strikes)[0], strikes)))
    return np.log(arrival_rate['Volume'] / arrival_rate['Duration'])

示例#20

0

显示文件

文件： simple.py 项目： sunnytoes/alpha

    def apply_code(self, mqb, ctx):
        predictions = []

        ctx['iter_count'] += 1

        for duration in self.DURATIONS:

            if ctx['iter_count'] > duration * self.TIMES_IN_WINDOW:
                close_mid_values = mqb['close_mid'].last_with_duration(
                    self.TIMES_IN_WINDOW, duration)
                indexes = linspace(1., len(close_mid_values),
                                   len(close_mid_values))
                close_prices = pd.Series(index=indexes, data=close_mid_values)
                prices = close_prices.copy()

                kr = KernelReg([prices.values], [prices.index.values],
                               var_type='c',
                               bw=[1.8, 1])

                max_mins = self.find_max_min(prices, kr)

                if max_mins.shape[0] == 5:
                    e1 = max_mins.iloc[0]
                    e2 = max_mins.iloc[1]
                    e3 = max_mins.iloc[2]
                    e4 = max_mins.iloc[3]
                    e5 = max_mins.iloc[4]

                    if e1 > e2 and e3 > e2 and e5 > e2 and e1 > e4 and e3 > e4 and e5 > e4:
                        if e5 > e3 > e1 and e2 < e4:
                            if close_mid_values[-1] > e5:
                                prediction = {'duration': duration, 'value': 1}
                                predictions.append(prediction)

                elif max_mins.shape[0] == 6:
                    e1 = max_mins.iloc[0]
                    e2 = max_mins.iloc[1]
                    e3 = max_mins.iloc[2]
                    e4 = max_mins.iloc[3]
                    e5 = max_mins.iloc[4]
                    e6 = max_mins.iloc[5]

                    if e1 > e2 and e3 > e2 and e5 > e2 and e1 > e4 and e1 > e4 and e5 > e4 and e1 > 6 and e3 > e6 and e5 > e6:
                        if e1 < e3 < e5 and e6 < e4 < e2:
                            if close_mid_values[-1] < e6:
                                prediction = {
                                    'duration': duration,
                                    'value': -1
                                }
                                predictions.append(prediction)

        return predictions

示例#21

0

显示文件

文件： helpers.py 项目： agatons/EPFL-financial-big-data

def find_max_min(prices):
    """
    Get min and max of a series consisting of prices
    """

    prices_ = prices.copy()
    prices_.index = np.linspace(1., len(prices_), len(prices_))
    kr = KernelReg([prices_.values], [prices_.index.values],
                   var_type='c',
                   bw=[1.8])
    f = kr.fit([prices_.index.values])
    smooth_prices = pd.Series(data=f[0], index=prices.index)

    local_max = argrelextrema(smooth_prices.values, np.greater)[0]
    local_min = argrelextrema(smooth_prices.values, np.less)[0]

    price_local_max_dt = []
    for i in local_max:
        if (i > 1) and (i < len(prices) - 1):
            price_local_max_dt.append(prices.iloc[i - 2:i + 2].argmax())

    price_local_min_dt = []
    for i in local_min:
        if (i > 1) and (i < len(prices) - 1):
            price_local_min_dt.append(prices.iloc[i - 2:i + 2].argmin())

    prices.name = 'price'
    maxima = pd.DataFrame(prices.loc[price_local_max_dt])
    minima = pd.DataFrame(prices.loc[price_local_min_dt])
    max_min = pd.concat([maxima, minima]).sort_index()
    max_min.index.name = 'date'
    max_min = max_min.reset_index()
    max_min = max_min[~max_min.date.duplicated()]
    p = prices.reset_index()
    max_min['day_num'] = p[p['index'].isin(max_min.date)].index.values
    max_min = max_min.set_index('day_num').price

    return max_min

示例#22

0

显示文件

文件： technical_analysis_patterns.py 项目： ZerounNet/Quantitative-analysis

def calc_smooth(prices: pd.Series, *, bw: Union[np.ndarray, str] = 'cv_ls', a: float = None, use_array: bool = True) -> Union[pd.Series, np.ndarray]:
    """计算Nadaraya-Watson核估计后的价格数据

    Args:
        prices (pd.Series): 价格数据
        bw (Union[np.ndarray,str]): Either a user-specified bandwidth or the method for bandwidth selection. Defaults to cv_ls.
        a (float, optional): 论文中所说的比例数据. Defaults to None.
        use_array (bool, optional): 为True返回ndarray,False返回为pd.Series. Defaults to True.

    Returns:
        Union[pd.Series,np.ndarry]
    """

    if not isinstance(prices, pd.Series):
        raise ValueError('prices必须为pd.Series')

    idx = np.arange(len(prices))

    kr = KernelReg(prices.values, idx,
                   var_type='c', reg_type='ll', bw=bw)

    if a is None:

        f = kr.fit(idx)[0]

    else:

        kr.bw = a * kr.bw  # 论文用的0.3 * h

        f = kr.fit(idx)[0]

    if use_array:

        return f

    else:

        return pd.Series(data=f, index=prices.index)

示例#23

0

显示文件

文件： estimators.py 项目： rbjoern/NeuralEconometrics

def estimator_nw(data, est_kwargs={}, **kwargs):
    from statsmodels.nonparametric.kernel_regression import KernelReg
    #http://www.statsmodels.org/dev/generated/statsmodels.nonparametric.kernel_density.EstimatorSettings.html
    from statsmodels.nonparametric.kernel_regression import EstimatorSettings
    k = len(data['x']['Train'].T)
    #    n = len(data['x']['Train'])

    if 'reg_type' in est_kwargs.keys():
        reg_type = est_kwargs[
            'reg_type']  #Allows for locally linear estimation
    else:
        reg_type = 'lc'  #Default is local constant (Nadaraya-Watson).

    #Estimate model
    nw = KernelReg(
        data['y']['Train'],
        data['x']['Train'],  #Fits regression
        var_type='c' * k,  #Continuous variables
        reg_type=reg_type,
        bw='aic',  #Least-squares cross val. Else aic for aic hurdwidth
        defaults=EstimatorSettings(
            n_jobs=1,  #No parallel
            efficient=True,
            randomize=True,  #bw estimation random subsampling
            n_res=25,  #Number of resamples
            n_sub=50,  # Size of samples 
        ),
    )
    betahat = np.array([])  #NP does not have coefficients

    # Extract results
    prob, mrgeff = {}, {}
    for split in ('Train', 'Test'):
        prob[split], mrgeff[split] = nw.fit(data_predict=data['x'][split])

    return betahat, prob, mrgeff

示例#24

0

显示文件

def find_max_min(prices):
    prices_ = prices.copy()
    prices_.index = linspace(1., len(prices_), len(prices_))
    #kr = KernelReg([prices_.values], [prices_.index.values], var_type='c', bw=[1.8, 1])
    kr = KernelReg([prices_.values], [prices_.index.values], var_type='c', bw=[2]) # 小了捕捉局部，大了捕捉全局 ！
    # Either a user-specified bandwidth or the method for bandwidth selection.
    # If a string, valid values are ‘cv_ls’ (least-squares cross-validation) and ‘aic’ (AIC Hurvich bandwidth estimation).
    # Default is ‘cv_ls’.
    f = kr.fit([prices_.index.values])

    smooth_prices = pd.Series(data=f[0], index=prices.index)

    local_max = argrelextrema(smooth_prices.values, np.greater)[0]
    local_min = argrelextrema(smooth_prices.values, np.less)[0]
    price_local_max_dt = []
    for i in local_max:
        if (i > 1) and (i < len(prices) - 1):
            price_local_max_dt.append(prices.iloc[i - 2:i + 2].argmax())

    price_local_min_dt = []
    for i in local_min:
        if (i > 1) and (i < len(prices) - 1):
            price_local_min_dt.append(prices.iloc[i - 2:i + 2].argmin())

    prices.name = 'price'
    maxima = pd.DataFrame(prices.loc[price_local_max_dt])
    minima = pd.DataFrame(prices.loc[price_local_min_dt])
    max_min = pd.concat([maxima, minima]).sort_index()
    max_min.index.name = 'date'
    max_min = max_min.reset_index()
    max_min = max_min[~max_min.date.duplicated()]
    p = prices.reset_index()
    max_min['day_num'] = p[p['index'].isin(max_min.date)].index.values
    max_min = max_min.set_index('day_num').price

    return max_min

示例#25

0

显示文件

文件： dataframe.py 项目： akelleh/causality

class KernelModelWrapper(object):
    def __init__(self):
        self.model = None
        self.variable_types = {}
        self.X_shape = None
        self.y_shape = None

    def fit(self, X, y, variable_types={}):
        self.X_shape = X.shape
        self.y_shape = y.shape
        if variable_types:
            variable_type_string = ''.join([variable_types[col] for col in X.columns])
            self.model = KernelReg(y, X, variable_type_string, reg_type='ll')
        else:
            self.model = KernelReg(y, X, 'c' * X.shape[1], reg_type='ll')
        return self

    def predict(self, X):
        if X.shape != self.X_shape:
            raise Exception("Expected shape {}, received {}".format(self.X_shape, X.shape))
        return self.model.fit(X)[0]

示例#26

0

显示文件

文件： dataframe.py 项目： kapiya/causality

class KernelModelWrapper(object):
    def __init__(self):
        self.model = None
        self.variable_types = {}
        self.X_shape = None
        self.y_shape = None

    def fit(self, X, y, variable_types={}):
        self.X_shape = X.shape
        self.y_shape = y.shape
        if variable_types:
            variable_type_string = ''.join(
                [variable_types[col] for col in X.columns])
            self.model = KernelReg(y, X, variable_type_string, reg_type='ll')
        else:
            self.model = KernelReg(y, X, 'c' * X.shape[1], reg_type='ll')
        return self

    def predict(self, X):
        if X.shape != self.X_shape:
            raise Exception("Expected shape {}, received {}".format(
                self.X_shape, X.shape))
        return self.model.fit(X)[0]

示例#27

0

显示文件

文件： blob_detector.py 项目： BerkeleyAutomation/line_cutting

class Surface:
    def __init__(self,
                 f,
                 f2,
                 pts3d,
                 left_pts,
                 right_pts,
                 oldpts3d,
                 safety_check=False):
        self.f = f
        self.f2 = f2
        self.safety_check = safety_check
        self.pts3d = np.matrix(pts3d)
        self.minimum = np.min(self.pts3d[:, 2])
        self.maximum = np.max(self.pts3d[:, 2])
        self.oldpts3d = oldpts3d
        self.left_pts = left_pts
        self.right_pts = right_pts
        pts2d = []
        ptsz = []
        f3 = open("../calibration_data/camera_matrix.p", "rb")
        self.cmat = pickle.load(f3)
        f3.close()

        for pt in pts3d:
            pts2d.append(pt[:2])
            ptsz.append(np.ceil(pt[2] * 1000000))
        self.neigh = KNeighborsClassifier(n_neighbors=2)
        self.neigh.fit(pts2d, ptsz)
        self.f = scipy.interpolate.Rbf(np.matrix(pts3d)[:, 0].ravel(),
                                       np.matrix(pts3d)[:, 1].ravel(),
                                       np.matrix(pts3d)[:, 2].ravel(),
                                       function='linear',
                                       epsilon=.1)
        pts3d = np.array(pts3d).T
        print pts3d.shape
        print pts3d[:2, :].shape, pts3d[2, :].shape
        self.f = KernelReg(pts3d[2, :], pts3d[:2, :], 'cc')

    def leftpixels_to_rframe(self, x, y):
        surf = self.f2
        left_pts = self.left_pts
        right_pts = self.right_pts
        pts3d = self.oldpts3d
        xin = np.array([a[0] for a in left_pts])
        bias = np.ones(len(xin))
        yin = np.array([a[1] for a in left_pts])

        xout = np.array([a[0] for a in pts3d])
        yout = np.array([a[1] for a in pts3d])

        A = np.vstack([xin, bias]).T
        m1, c1 = np.linalg.lstsq(A, xout)[0]

        A = np.vstack([yin, bias]).T
        m2, c2 = np.linalg.lstsq(A, yout)[0]

        xnew = m1 * x + c1
        ynew = m2 * y + c2
        cpoint = np.matrix([(xnew, ynew, self.f2(xnew, ynew))])
        pt = np.ones(4)
        pt[:3] = cpoint
        pred = self.cmat * np.matrix(pt).T
        return pred

    def query(self, x, y):
        temp = self.f.fit(np.array((x, y)))[0][0]
        if not self.safety_check:
            return (x, y, temp)
        if temp < self.minimum - 0.02:
            temp = self.query_knn(x, y)[2]
        elif temp > self.maximum + 0.02:
            temp = self.query_knn(x, y)[2]
        print 'asdf', temp
        return (x, y, temp)

    def query_knn(self, x, y):
        return (x, y, (self.neigh.predict([[x, y]]) / 1000000.0)[0])

    def visualize(self):
        fig = plt.figure()
        ax = fig.add_subplot(111)
        pts3d = np.matrix(self.pts3d)
        f = self.f
        a, b = np.ravel(np.min(pts3d, axis=0)), np.ravel(np.max(pts3d, axis=0))
        extra_range = 0.0
        #         xnew = np.arange(a[0] - extra_range,b[0] + extra_range,0.0001)
        #         ynew = np.arange(a[1] - extra_range,b[1] + extra_range,0.0001)
        X, Y = np.mgrid[a[0] + .05:b[0] - .05:100j, a[1]:b[1]:100j]

示例#28

0

显示文件

文件： compute_smooth_hmm_K_sweep_curves_auto.py 项目： jamalw/music_event_structures_tigress

fairK = np.array((3, 5, 9, 15, 20, 25, 30, 35, 40, 45))

event_lengths = durs_run1_new / fairK

unique_event_lengths = np.unique(event_lengths)
x = event_lengths.ravel()

test_x = np.linspace(min(x), max(x), num=100)
smooth_wva = np.zeros((len(unique_event_lengths), len(ROI_data), nBoots))

opt_bw_holder = np.zeros((nBoots, len(ROI_data)))

for ROI in range(len(ROI_data)):
    for b in range(nBoots):
        opt_bw = 0
        y = ROI_data[ROI][:, :, b].ravel()
        KR = KernelReg(y, x, var_type='c')
        opt_bw += KR.bw / len(ROI_data)
        opt_bw_holder[b, ROI] = opt_bw
        y = ROI_data[ROI][:, :, b].ravel()
        KR = KernelReg(y, x, var_type='c', bw=opt_bw)
        smooth_wva[:, ROI, b] += KR.fit(unique_event_lengths)[0]

np.save(
    datadir + 'smooth_' + suffix + '_' + save_fn +
    '_auto_independent_bandwidths', smooth_wva)
np.save(
    datadir + 'smooth_' + suffix + '_' + save_fn +
    '_auto_independent_optimal_bandwidth', opt_bw_holder)

示例#29

0

显示文件

文件： causal_reg.py 项目： MTLC/causality

class CausalEffect(object):
    def __init__(self, X, causes, effects, admissable_set=[], variable_types=None, expectation=False, density=True):
        """
        We want to calculate the causal effect of X and Y through
        back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z) 
        for some admissable set of control variables, Z.  First we 
        calculate the conditional density P(Y|X,Z), then the density
        P(Z).  We find the support of Z so we can properly sum over
        it later.  variable_types are a dictionary with the column name
        pointing to an element of set(['o', 'u', 'c']), for 'ordered',
        'unordered discrete', or 'continuous'.
        """
        conditional_density_vars = causes + admissable_set
        self.causes = causes
        self.effects = effects
        self.admissable_set = admissable_set
        self.conditional_density_vars = conditional_density_vars
        
        if variable_types:
            self.variable_types = variable_types
            dep_type      = [variable_types[var] for var in effects]
            indep_type    = [variable_types[var] for var in conditional_density_vars]
            density_types = [variable_types[var] for var in admissable_set]
        else:
            self.variable_types = self.__infer_variable_types(X)

        if 'c' not in variable_types.values():
            bw = 'cv_ml'
        else:
            bw = 'normal_reference'


        if admissable_set:            
            self.density = KDEMultivariate(X[admissable_set], 
                                  var_type=''.join(density_types),
                                  bw=bw)
        
        self.conditional_density = KDEMultivariateConditional(endog=X[effects],
                                                         exog=X[conditional_density_vars],
                                                         dep_type=''.join(dep_type),
                                                         indep_type=''.join(indep_type),
                                                         bw=bw)
        if expectation:
            self.conditional_expectation = KernelReg(X[effects].values,
                                                 X[conditional_density_vars].values,
                                                 ''.join(indep_type),
                                                 bw='cv_ls')

        self.support = self.__get_support(X)
        
        self.discrete_variables = [ variable for variable, var_type in self.variable_types.items() if var_type in ['o', 'u']]
        self.discrete_Z = list(set(self.discrete_variables).intersection(set(admissable_set)))
        self.continuous_variables = [ variable for variable, var_type in self.variable_types.items() if var_type == 'c' ]
        self.continuous_Z = list(set(self.continuous_variables).intersection(set(admissable_set)))
       
 
    def __infer_variable_types(self,X):
        """
        fill this in later.
        """
        pass
       
 
    def __get_support(self, X):
        """
        find the smallest cube around which the densities are supported,
        allowing a little flexibility for variables with larger bandwidths.
        """
        data_support = { variable : (X[variable].min(), X[variable].max()) for variable in X.columns}
        variable_bandwidths = { variable : bw for variable, bw in zip(self.effects + self.conditional_density_vars, self.conditional_density.bw)}
        support = {}
        for variable in self.effects + self.conditional_density_vars:
            if self.variable_types[variable] == 'c':
                lower_support = data_support[variable][0] - 10. * variable_bandwidths[variable]
                upper_support = data_support[variable][1] + 10. * variable_bandwidths[variable]
                support[variable] = (lower_support, upper_support)
            else:
                support[variable] = data_support[variable]
        return support

        
    def integration_function(self,*args):
        # takes continuous z, discrete z, then x
        data = pd.DataFrame({ k : [v] for k, v in zip(self.continuous_Z + self.discrete_Z + self.causes + self.effects, args)})
        conditional = self.conditional_density.pdf(exog_predict=data[self.conditional_density_vars].values[0], 
                                                   endog_predict=data[self.effects].values[0]) 
        density = self.density.pdf(data_predict=data[self.admissable_set])
        return conditional * density

    
    def expectation_integration_function(self, *args):
        data = pd.DataFrame({ k : [v] for k, v in zip(self.continuous_Z + self.discrete_Z + self.causes, args)})
        conditional = self.conditional_expectation.fit(data_predict=data[self.conditional_density_vars].values)[0]
        density = self.density.pdf(data_predict=data[self.admissable_set])
        return conditional * density

    
    def pdf(self, x):
        """
        Currently, this does the whole sum/integral over the cube support of Z.
        We may be able to improve this by taking into account how the joint
        and conditionals factorize, and/or finding a more efficient support.
        
        This should be reasonably fast for |Z| <= 2 or 3, and small enough discrete
        variable cardinalities.  It runs in O(n_1 n_2 ... n_k) in the cardinality of
        the discrete variables, |Z_1| = n_1, etc.  It likewise runs in O(V^n) for n
        continuous Z variables.  Factorizing the joint/conditional distributions in
        the sum could linearize the runtime.
        """
        causal_effect = 0.
        x = x[self.causes + self.effects]
        if self.discrete_Z:
            discrete_variable_ranges = [ xrange(*(int(self.support[variable][0]), int(self.support[variable][1])+1)) for variable in self.discrete_Z]
            for z_vals in itertools.product(*discrete_variable_ranges):
                z_discrete = pd.DataFrame({k : [v] for k, v in zip(self.discrete_Z, z_vals)})
                if self.continuous_Z:
                    continuous_Z_ranges = [self.support[variable] for variable in self.continuous_Z]
                    args = z_discrete.join(x).values[0]
                    causal_effect += nquad(self.integration_function,continuous_Z_ranges,args=args)[0]
                else:
                    z_discrete = z_discrete[self.admissable_set]
                    exog_predictors = x.join(z_discrete)[self.conditional_density_vars]
                    conditional = self.conditional_density.pdf(exog_predict=exog_predictors, 
                                                               endog_predict=x[self.effects]) 
                    density = self.density.pdf(data_predict=z_discrete)
                    dc = conditional * density
                    causal_effect += dc
            return causal_effect
        elif self.continuous_Z:
            continuous_Z_ranges = [self.support[var] for var in self.continuous_Z]
            causal_effect, error = nquad(self.integration_function,continuous_Z_ranges,args=tuple(x.values[0]))
            return causal_effect
        else:
            return self.conditional_density.pdf(exog_predict=x[self.causes],endog_predict=x[self.effects])

       
 
    def expected_value( self, x):
        """
        Currently, this does the whole sum/integral over the cube support of Z.
        We may be able to improve this by taking into account how the joint
        and conditionals factorize, and/or finding a more efficient support.
        
        This should be reasonably fast for |Z| <= 2 or 3, and small enough discrete
        variable cardinalities.  It runs in O(n_1 n_2 ... n_k) in the cardinality of
        the discrete variables, |Z_1| = n_1, etc.  It likewise runs in O(V^n) for n
        continuous Z variables.  Factorizing the joint/conditional distributions in
        the sum could linearize the runtime.
        """
        causal_effect = 0.
        x = x[self.causes]
        if self.discrete_Z:
            discrete_variable_ranges = [ xrange(*(int(self.support[variable][0]), int(self.support[variable][1])+1)) for variable in self.discrete_Z]
            for z_vals in itertools.product(*discrete_variable_ranges):
                z_discrete = pd.DataFrame({k : [v] for k, v in zip(self.discrete_Z, z_vals)})
                if self.continuous_Z:
                    continuous_Z_ranges = [self.support[variable] for variable in self.continuous_Z]
                    args = z_discrete.join(x).values[0]
                    causal_effect += nquad(self.expectation_integration_function,continuous_Z_ranges,args=args)[0]
                else:
                    z_discrete = z_discrete[self.admissable_set]
                    exog_predictors = x.join(z_discrete)[self.conditional_density_vars]
                    causal_effect += self.conditional_expectation.fit(data_predict=exog_predictors.values)[0] * self.density.pdf(data_predict=z_discrete.values)
            return causal_effect
        elif self.continuous_Z:
            continuous_Z_ranges = [self.support[var] for var in self.continuous_Z]
            causal_effect, error = nquad(self.expectation_integration_function,continuous_Z_ranges,args=tuple(x.values[0]))
            return causal_effect
        else:
            return self.conditional_expectation.fit(data_predict=x[self.causes])[0]

示例#30

0

显示文件

import numpy as np
import matplotlib.pyplot as plt
from statsmodels.nonparametric.kernel_regression import KernelReg

x = np.sort(np.random.rand(400) * 10 - 2)
y = x**4 - 8 * (x**3) + 14 * (x**2) - 32 * (x) + 14 + (
    (np.random.rand(len(x)) - 0.5) * 50)
y_clean = x**4 - 8 * (x**3) + 14 * (x**2) - 32 * (x) + 14

reg = KernelReg(y, x, 'c')
[mean, mfx] = reg.fit()

plt.figure()
plt.scatter(x, y)
plt.plot(x, mean, color="red")
plt.plot(x, y_clean, color="green")
plt.show()

示例#31

0

显示文件

文件： blob_detector.py 项目： BerkeleyAutomation/line_cutting

class Surface:

    def __init__(self, f, f2, pts3d, left_pts, right_pts, oldpts3d, safety_check=False):
        self.f = f
        self.f2 = f2
        self.safety_check = safety_check
        self.pts3d = np.matrix(pts3d)
        self.minimum = np.min(self.pts3d[:,2])
        self.maximum = np.max(self.pts3d[:,2])
        self.oldpts3d = oldpts3d
        self.left_pts = left_pts
        self.right_pts = right_pts
        pts2d = []
        ptsz = []
        f3 = open("../calibration_data/camera_matrix.p", "rb")
        self.cmat = pickle.load(f3)
        f3.close()
        
        for pt in pts3d:
            pts2d.append(pt[:2])
            ptsz.append(np.ceil(pt[2] * 1000000))
        self.neigh = KNeighborsClassifier(n_neighbors=2)
        self.neigh.fit(pts2d, ptsz)
        self.f = scipy.interpolate.Rbf(np.matrix(pts3d)[:,0].ravel(), np.matrix(pts3d)[:,1].ravel(), np.matrix(pts3d)[:,2].ravel(), function='linear', epsilon=.1)
        pts3d = np.array(pts3d).T
        print pts3d.shape
        print pts3d[:2,:].shape, pts3d[2,:].shape
        self.f = KernelReg(pts3d[2,:], pts3d[:2,:], 'cc')

    def leftpixels_to_rframe(self, x, y):
        surf = self.f2
        left_pts = self.left_pts
        right_pts = self.right_pts
        pts3d = self.oldpts3d
        xin = np.array([a[0] for a in left_pts])
        bias = np.ones(len(xin))
        yin = np.array([a[1] for a in left_pts])

        xout = np.array([a[0] for a in pts3d])
        yout = np.array([a[1] for a in pts3d])

        A = np.vstack([xin, bias]).T
        m1, c1 = np.linalg.lstsq(A, xout)[0]

        A = np.vstack([yin, bias]).T
        m2, c2 = np.linalg.lstsq(A, yout)[0]

        xnew = m1 * x + c1
        ynew = m2 * y + c2
        cpoint = np.matrix([(xnew, ynew, self.f2(xnew, ynew))])
        pt = np.ones(4)
        pt[:3] = cpoint
        pred = self.cmat * np.matrix(pt).T
        return pred

    def query(self, x, y):
        temp = self.f.fit(np.array((x, y)))[0][0]
        if not self.safety_check:
            return (x, y, temp)
        if temp < self.minimum - 0.02:
            temp = self.query_knn(x, y)[2]
        elif temp > self.maximum + 0.02:
            temp = self.query_knn(x, y)[2]
        print 'asdf', temp
        return (x, y, temp)

    def query_knn(self, x, y):
        return (x, y, (self.neigh.predict([[x, y]]) / 1000000.0)[0])

    def visualize(self):
        fig = plt.figure()
        ax = fig.add_subplot(111)
        pts3d = np.matrix(self.pts3d)
        f = self.f
        a, b =  np.ravel(np.min(pts3d, axis=0)), np.ravel(np.max(pts3d, axis=0))
        extra_range = 0.0
#         xnew = np.arange(a[0] - extra_range,b[0] + extra_range,0.0001)
#         ynew = np.arange(a[1] - extra_range,b[1] + extra_range,0.0001)
        X, Y = np.mgrid[a[0] + .05 :b[0] - .05 :100j, a[1]:b[1]:100j]

示例#32

0

显示文件

 def fit(self, X_train, y_train):
     N, p = X_train.shape
     self.kernel = KernelReg(y_train, X_train, var_type=p * 'c')

示例#33

0

显示文件

event_lengths = durs_run1_new/fairK

unique_event_lengths = np.unique(event_lengths)
x = event_lengths.ravel()

ROI_data = [a1_data, AG_data, prec_data, mpfc_data]
#ROI_data = [a1_data,AG_data,prec_data]

test_x = np.linspace(min(x), max(x), num=100)
smooth_wva = np.zeros((len(unique_event_lengths), len(ROI_data), nBoots))

for b in range(nBoots):
    # Optimize bandwidth
    opt_bw = 0
    for ROI in range(len(ROI_data)):
        y = ROI_data[ROI][:,:,b].ravel()
        KR = KernelReg(y,x,var_type='c')
        opt_bw += KR.bw/len(ROI_data)

    max_wva = np.zeros(len(ROI_data))
    for ROI in range(len(ROI_data)):
        y = ROI_data[ROI][:,:,b].ravel()
        KR = KernelReg(y,x,var_type='c', bw=opt_bw)
        max_wva[ROI] = np.argmax(KR.fit(test_x)[0])  # Find peak on fine grid
        smooth_wva[:, ROI, b] += KR.fit(unique_event_lengths)[0]

np.save(datadir + 'smooth_wva_split_merge_01_a1_prec_AG_bilmPFC',smooth_wva)

示例#34

0

显示文件

文件： visu_persec.py 项目： varun77/Big-Data

x2=xax2
y2= tweetatsec2
x3=xax3
y3= tweetatsec3
x4=xax4
y4= tweetatsec4
pyplot.xlabel('Second')
pyplot.ylabel('Total tweet')


pyplot.scatter(x,y,color='cyan')
pyplot.scatter(x2,y2,color='red')
pyplot.scatter(x3,y3,color='blue')
pyplot.scatter(x4,y4,color='green')

kr = KernelReg(y,x,'o')
kr2 = KernelReg(y2,x2,'o')
kr3 = KernelReg(y3,x3,'o')
kr4 = KernelReg(y4,x4,'o')
pyplot.plot(x, y, '+')
pyplot.plot(x2,y2,'+')
pyplot.plot(x3,y3,'+')
pyplot.plot(x4,y4,'+')

y_pred, y_std = kr.fit(x)
y2_pred, y2_std = kr2.fit(x2)
y3_pred, y3_std = kr3.fit(x3)
y4_pred, y4_std = kr4.fit(x4)

pyplot.plot(x, y_pred,'cyan',label='twitter')
pyplot.plot(x2,y2_pred,'red',label='facebook')

示例#35

0

显示文件

文件： statlearning.py 项目： unionpacificy/Pacificy

 def fit(self, X_train, y_train):
     # By default, this function will do a local linear regression
     self.regression = KernelReg(y_train, X_train, var_type='c')
     return self

示例#36

0

显示文件

    def __init__(self,
                 X,
                 causes,
                 effects,
                 admissable_set=[],
                 variable_types=None,
                 expectation=False,
                 density=True):
        """
        We want to calculate the causal effect of X and Y through
        back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z) 
        for some admissable set of control variables, Z.  First we 
        calculate the conditional density P(Y|X,Z), then the density
        P(Z).  We find the support of Z so we can properly sum over
        it later.  variable_types are a dictionary with the column name
        pointing to an element of set(['o', 'u', 'c']), for 'ordered',
        'unordered discrete', or 'continuous'.
        """
        conditional_density_vars = causes + admissable_set
        self.causes = causes
        self.effects = effects
        self.admissable_set = admissable_set
        self.conditional_density_vars = conditional_density_vars

        if variable_types:
            self.variable_types = variable_types
            dep_type = [variable_types[var] for var in effects]
            indep_type = [
                variable_types[var] for var in conditional_density_vars
            ]
            density_types = [variable_types[var] for var in admissable_set]
        else:
            self.variable_types = self.__infer_variable_types(X)

        if 'c' not in variable_types.values():
            bw = 'cv_ml'
        else:
            bw = 'normal_reference'

        if admissable_set:
            self.density = KDEMultivariate(X[admissable_set],
                                           var_type=''.join(density_types),
                                           bw=bw)

        self.conditional_density = KDEMultivariateConditional(
            endog=X[effects],
            exog=X[conditional_density_vars],
            dep_type=''.join(dep_type),
            indep_type=''.join(indep_type),
            bw=bw)
        if expectation:
            self.conditional_expectation = KernelReg(
                X[effects].values,
                X[conditional_density_vars].values,
                ''.join(indep_type),
                bw='cv_ls')

        self.support = self.__get_support(X)

        self.discrete_variables = [
            variable for variable, var_type in self.variable_types.items()
            if var_type in ['o', 'u']
        ]
        self.discrete_Z = list(
            set(self.discrete_variables).intersection(set(admissable_set)))
        self.continuous_variables = [
            variable for variable, var_type in self.variable_types.items()
            if var_type == 'c'
        ]
        self.continuous_Z = list(
            set(self.continuous_variables).intersection(set(admissable_set)))

示例#37

-1

显示文件

文件： blob_detector.py 项目： BerkeleyAutomation/line_cutting

    def __init__(self,
                 f,
                 f2,
                 pts3d,
                 left_pts,
                 right_pts,
                 oldpts3d,
                 safety_check=False):
        self.f = f
        self.f2 = f2
        self.safety_check = safety_check
        self.pts3d = np.matrix(pts3d)
        self.minimum = np.min(self.pts3d[:, 2])
        self.maximum = np.max(self.pts3d[:, 2])
        self.oldpts3d = oldpts3d
        self.left_pts = left_pts
        self.right_pts = right_pts
        pts2d = []
        ptsz = []
        f3 = open("../calibration_data/camera_matrix.p", "rb")
        self.cmat = pickle.load(f3)
        f3.close()

        for pt in pts3d:
            pts2d.append(pt[:2])
            ptsz.append(np.ceil(pt[2] * 1000000))
        self.neigh = KNeighborsClassifier(n_neighbors=2)
        self.neigh.fit(pts2d, ptsz)
        self.f = scipy.interpolate.Rbf(np.matrix(pts3d)[:, 0].ravel(),
                                       np.matrix(pts3d)[:, 1].ravel(),
                                       np.matrix(pts3d)[:, 2].ravel(),
                                       function='linear',
                                       epsilon=.1)
        pts3d = np.array(pts3d).T
        print pts3d.shape
        print pts3d[:2, :].shape, pts3d[2, :].shape
        self.f = KernelReg(pts3d[2, :], pts3d[:2, :], 'cc')