Exemplos de RBFSampler.fit_transform em Python, exemplos de sklearn.kernel_approximation.RBFSampler.fit_transform em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main.py Projeto: DexDexinWang/IMDB-MachineLearning

def run_ka(train_varnames, train_labels,test_varnames, test_labels):
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(train_varnames)
    X_features_test = rbf_feature.fit_transform(test_varnames)
    clf=SGDClassifier()
    result,accuracy=fit_predict(clf,"Kernel Approximation", X_features, train_labels,X_features_test, test_labels)
    return result,accuracy

Exemplo n.º 2

0

Exibir arquivo

    def sklearn_sol(self, train_matrix, val_matrix, emb_matrix, emb_matrix_te, gamma ,mapping_dim, seed): 

        rbf_feature = RBFSampler(gamma=gamma, n_components=mapping_dim, random_state=seed)
        emb_matrix = rbf_feature.fit_transform(emb_matrix.reshape(-1,3072))
        #rau = self.arg.rau
        rau = 0.0001
        #emb_matrix = emb_matrix[:len(self.t_data)]
        mu = np.mean(emb_matrix, axis=0)
        emb_matrix_1 = emb_matrix #- mu
        #emb_matrix_1 = emb_matrix
        emb_matrix = emb_matrix_1.T 
        #print(np.mean(emb_matrix))
        s = np.dot(emb_matrix, emb_matrix.T)
        a,b = s.shape
        identity = np.identity(a)
        s_inv = np.linalg.inv(s + rau * np.identity(a))

        output_mu = np.mean(train_matrix, axis=0)
        output_norm = train_matrix# - output_mu
        weights = np.dot(np.dot(s_inv, emb_matrix), output_norm)
        #weights = np.dot(np.dot(s_inv, emb_matrix), self.t_label)
        pred = np.dot(emb_matrix_1, weights) # + output_mu       
        
        emb_matrix_te = rbf_feature.fit_transform(emb_matrix_te.reshape(-1, 3072))

        pred = np.dot(emb_matrix_te, weights) #+ output_mu        

        mse_trace = []
        for i in range(len(self.v_data)):
            mse_trace.append(mean_squared_error(val_matrix[i].flatten(), pred[i]))
        return np.mean(mse_trace)

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self, df, validation_df, rbf_gamma, rbf_ncomponents,
                 representative_set_size, key_to_split_on, vals_to_split,
                 product_key_to_keep, with_replacement, is_categorical,
                 importance_weight_column_name):
        super().__init__(
            df=df,
            key_to_split_on=key_to_split_on,
            vals_to_split=vals_to_split,
            with_replacement=with_replacement,
            is_categorical=is_categorical,
            importance_weight_column_name=importance_weight_column_name)
        self.validation_df = deepcopy(validation_df)
        self.product_key_to_keep = product_key_to_keep

        self.gamma = rbf_gamma
        self.n_components = rbf_ncomponents
        self.representative_set_size = representative_set_size
        rbf_kernel = RBFSampler(gamma=self.gamma,
                                n_components=self.n_components)

        # Get only the features of the datasets
        cols_to_keep = set(self.df.columns) - {
            self.importance_weight_column_name, self.product_key_to_keep
        }
        tr_dataset_features = pd.get_dummies(self.df[cols_to_keep],
                                             columns=[self.key_to_split_on])
        val_dataset_features = pd.get_dummies(self.validation_df[cols_to_keep],
                                              columns=[self.key_to_split_on])

        # Compute all feature maps using RBF Sampler
        phi_train = rbf_kernel.fit_transform(tr_dataset_features)
        phi_validation = rbf_kernel.fit_transform(val_dataset_features)

        # Pre-computations
        T1 = phi_train @ phi_validation.T @ np.ones(len(self.validation_df))
        T2 = np.array(
            [phi_train[i, :].T @ phi_train[i, :] for i in range(len(self.df))])

        # Greedily select indices for dataset
        best_indices = []
        for i in range(1, self.representative_set_size + 1):
            phi_S = phi_train[best_indices, :]

            T3 = phi_train @ phi_S.T @ np.ones(i - 1) if len(
                phi_S) > 0 else np.zeros(len(self.df))
            objectives = 2. / (len(self.validation_df) *
                               i) * T1 - 1. / (i**2) * (T2 + 2 * T3)
            objectives[best_indices] = -np.inf
            best_indices.append(np.argmax(objectives))

        # Set our dataset as the selected indices
        self.df = self.df.iloc[sorted(best_indices)].reset_index(drop=True)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: estimation.py Projeto: fourpartswater/distil

class RBFSamplerSGDClassifierEstimator(BaseEstimator, TransformerMixin):
    def __init__(self,
                 gamma=1.0,
                 n_components=100,
                 random_state=None,
                 **kwargs):
        kwargs['random_state'] = random_state
        self.rbf_sampler = RBFSampler(gamma=gamma,
                                      n_components=n_components,
                                      random_state=random_state)
        self.sgdclassifier = SGDClassifier(**kwargs)

    def fit(self, X, y):
        X = self.rbf_sampler.fit_transform(X)
        self.sgdclassifier.fit(X, y)
        return self

    def transform(self, X, y=None):
        return np.sqrt(self.rbf_sampler.n_components) / np.sqrt(
            2.) * self.rbf_sampler.transform(X)

    def predict(self, X):
        return self.sgdclassifier.predict(self.transform(X))

    def decision_function(self, X):
        return self.sgdclassifier.decision_function(self.transform(X))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: estimation.py Projeto: fourpartswater/distil

class RBFSamplerSGDRegressorEstimator(BaseEstimator, TransformerMixin):
    def __init__(self,
                 gamma=1.0,
                 n_components=100,
                 random_state=None,
                 **kwargs):
        kwargs['random_state'] = random_state
        self.rbf_sampler = RBFSampler(gamma=gamma,
                                      n_components=n_components,
                                      random_state=random_state)
        self.sgdregressor = SGDRegressor(**kwargs)

    def fit(self, X, y):
        X = self.rbf_sampler.fit_transform(X)
        self.sgdregressor.fit(X, y)
        return self

    def transform(self, X, y=None):
        return np.sqrt(self.rbf_sampler.n_components) / np.sqrt(
            2.) * self.rbf_sampler.transform(X)

    def predict(self, X):
        return self.sgdregressor.predict(self.transform(X))


# TODO: Add kernel SVM
# TODO: Add kernel ridge regressor
# TODO: Add random forests / xgboost

Exemplo n.º 6

0

Exibir arquivo

Arquivo: expose_detector.py Projeto: JoanMora/anomaly-detection-NAB

class ExposeDetector(AnomalyDetector):
    """ This detector is an implementation of The EXPoSE (EXPected Similarity
  Estimation) algorithm as described in Markus Schneider, Wolfgang Ertel,
  Fabio Ramos, "Expected Similarity Estimation for Lage-Scale Batch and
  Streaming Anomaly Detection", arXiv 1601.06602 (2016).

  EXPoSE calculates the likelihood of a data point being normal by using
  the inner product of its feature map with kernel embedding of previous data
  points. This measures the similarity of a data point to previous points
  without assuming an underlying data distribution.

  There are three EXPoSE variants: incremental, windowing and decay. This
  implementation is based on EXPoSE with decay. All three variants have been
  tried on NAB but decay gives the best results.Parameters for this detector
  have been tuned to give the best performance.
  """
    def __init__(self, *args, **kwargs):
        super(ExposeDetector, self).__init__(*args, **kwargs)

        self.kernel = None
        self.previousExposeModel = []
        self.decay = 0.01
        self.timestep = 0

    def initialize(self):
        """Initializes RBFSampler for the detector"""
        self.kernel = RBFSampler(gamma=0.5,
                                 n_components=20000,
                                 random_state=290)

    def handleRecord(self, inputData):
        """ Returns a list [anomalyScore] calculated using a kernel based
    similarity method described in the comments below"""

        # Transform the input by approximating feature map of a Radial Basis
        # Function kernel using Random Kitchen Sinks approximation
        inputFeature = self.kernel.fit_transform(
            numpy.array([[inputData["value"]]]))

        # Compute expose model as a weighted sum of new data point's feature
        # map and previous data points' kernel embedding. Influence of older data
        # points declines with the decay factor.
        if self.timestep == 0:
            exposeModel = inputFeature
        else:
            exposeModel = ((self.decay * inputFeature) +
                           (1 - self.decay) * self.previousExposeModel)

        # Update previous expose model
        self.previousExposeModel = exposeModel

        # Compute anomaly score by calculating similarity of the new data point
        # with expose model. The similarity measure, calculated via inner
        # product, is the likelihood of data point being normal. Resulting
        # anomaly scores are in the range of -0.02 to 1.02.
        anomalyScore = numpy.asscalar(1 -
                                      numpy.inner(inputFeature, exposeModel))
        self.timestep += 1

        return [anomalyScore]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: hsic.py Projeto: jejjohnson/pysim

    def compute_kernel(self, X, Y=None, *args, **kwargs):

        # initialize RBF kernel
        rff_kernel = RBFSampler(*args, **kwargs)

        # transform data
        return rff_kernel.fit_transform(X)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: efficient_decomposable_gaussian.py Projeto: RomainBrault/Thesis

def NaiveDecomposableGaussianORFF(X, A, gamma=1.,
                                  D=100, eps=1e-5, random_state=0):
    r"""Return the Naive ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    A : {array-like}, shape = [n_targets, n_targets]
        Operator of the Decomposable kernel (positive semi-definite)
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    # Decompose A=BB^T
    u, s, v = svd(A, full_matrices=False, compute_uv=True)
    B = dot(diag(sqrt(s[s > eps])), v[s > eps, :])

    # Sample a RFF from the scalar Gaussian kernel
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)

    # Create the ORFF linear operator
    return matrix(kron(phiX, B))

Exemplo n.º 9

0

Exibir arquivo

Arquivo: efficient_curlfree_gaussian.py Projeto: RomainBrault/Thesis

def NaiveCurlFreeGaussianORFF(X, gamma=1.,
                              D=100, eps=1e-5, random_state=0):
    r"""Return the Naive ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D,
                       random_state=random_state)
    phiX = phi_s.fit_transform(X)
    phiX = (phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) *
            phi_s.random_weights_.reshape((1, -1, phiX.shape[1])))

    return matrix(phiX.reshape((-1, phiX.shape[2])))

Exemplo n.º 10

0

Exibir arquivo

def EfficientDivergenceFreeGaussianORFF(X,
                                        gamma=1.,
                                        D=100,
                                        eps=1e-5,
                                        random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)
    W = phi_s.random_weights_.reshape((1, -1, 1, phiX.shape[1]))
    Wn = norm(phi_s.random_weights_, axis=0).reshape((1, 1, 1, -1))
    return LinearOperator(
        (phiX.shape[0] * X.shape[1], phiX.shape[1] * X.shape[1]),
        matvec=lambda b: dot(_rebase(phiX, W, Wn), b),
        rmatvec=lambda r: dot(_rebase(phiX, W, Wn).T, r),
        dtype=float)

Exemplo n.º 11

0

Exibir arquivo

def NaiveDivergenceFreeGaussianORFF(X,
                                    gamma=1.,
                                    D=100,
                                    eps=1e-5,
                                    random_state=0):
    r"""Return the Naive ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)

    phiX = _rebase(phi_s.fit_transform(X),
                   phi_s.random_weights_.reshape((1, -1, 1, D)),
                   norm(phi_s.random_weights_, axis=0).reshape((1, 1, 1, -1)))

    return matrix(phiX)

Exemplo n.º 12

0

Exibir arquivo

def transform(x_original):
    #x_original = x_original.reshape([1, -1])
    #print x_original.shape
    rbf_features = RBFSampler(gamma=30, random_state=1, n_components=5300)
    x_trans = rbf_features.fit_transform(x_original)
    #x_trans = x_trans.reshape([-1])
    return x_trans

Exemplo n.º 13

0

Exibir arquivo

Arquivo: image_classifier.py Projeto: RyanMokarian/Facebook_Users_Profiling

 def kernel_estimation(df_gender):
     X_train, X_test, y_train, y_test = Utils.split_data(df_gender)
     rbf_feature = RBFSampler()
     X_features = rbf_feature.fit_transform(X_train)
     clf = SGDClassifier()
     clf.fit(X_features,y_train)
     print("Kernel Density acc: ", clf.score(X_features, y_train))

Exemplo n.º 14

0

Exibir arquivo

Arquivo: run_experiment.py Projeto: kaiserasif/Adversarial-Cost-sensitive-Sequence-Tagging

def preprocess(X_tr, X_ts, poly_degree=1):
    """
    If current directory contains RBFSampler.txt then 
    use RBFSampler, otherwise,
    Do polynomial transform
    also return the combined transform, incase needed

    features are normalized already in the source
    so, only polynomial transformation is done
    default is 1, since 561 fetures is already too many
    """
    rbf_path = os.path.join(os.getcwd(), 'RBFSampler.txt')
    if False and os.path.exists(
            rbf_path):  # disable RBFSample features again. Didn't help
        with open(rbf_path, 'rt') as f:
            kwargs = ast.literal_eval(f.read())
            transformer = RBFSampler(**kwargs)
    else:
        transformer = preprocessing.PolynomialFeatures(degree=poly_degree,
                                                       interaction_only=False)

    X_comb_tr = transformer.fit_transform(np.concatenate(X_tr, axis=0))
    X_comb_ts = transformer.transform(np.concatenate(X_ts, axis=0))

    X_tr = [transformer.transform(x) for x in X_tr]
    X_ts = [transformer.transform(x) for x in X_ts]

    return X_tr, X_ts, X_comb_tr, X_comb_ts, transformer

Exemplo n.º 15

0

Exibir arquivo

Arquivo: models.py Projeto: jbw900/uncover-ml

    class ClassifierRBF:
        def __init__(self,
                     gamma='auto',
                     n_components=100,
                     random_state=None,
                     **kwargs):
            self.gamma = gamma
            self.n_components = n_components
            self.random_state = random_state
            self.clf = classifier(**kwargs)

        def fit(self, X, y):
            if self.gamma == 'auto':
                D = X.shape[1]
                self.gamma = 1 / D
            self.rbf = RBFSampler(gamma=self.gamma,
                                  n_components=self.n_components,
                                  random_state=self.random_state)

            self.clf.fit(self.rbf.fit_transform(X), y)
            return self

        def predict(self, X):
            p = self.clf.predict(self.rbf.transform(X))
            return p

        def predict_proba(self, X):
            p = self.clf.predict_proba(self.rbf.transform(X))
            return p

Exemplo n.º 16

0

Exibir arquivo

Arquivo: toy_datasets.py Projeto: hcllaw/distBO

def dim_bw_dataset(n_train, n_test, dim, prob_type='regression', embed_size=10000, name=None,
                   preprocess='standardise', bw_set=None, noise_sd=0.5, seed=23):
    n_total = n_train + n_test
    rs = check_random_state(seed)
    X = rs.normal(loc=0.0, scale=1.0, size=(n_total, dim))
    data_x = preprocessing.scale(X)
    signal_x_bw = np.divide(X, np.array(bw_set))
    #  1 / (2 sigma^2) = gamma i.e sigma = 1 implies gamma = 0.5
    # sigma = sqrt( 1.0 / 2.0 * gamma)
    rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0)
    trans_signal_x_bw = rbf_feature.fit_transform(signal_x_bw)
    alpha = rs.normal(loc=0.0, scale=1.0, size=(200))
    y_0 = np.matmul(trans_signal_x_bw, alpha)
    if prob_type == 'regression':
        y_0 = standardise(y_0, low=0.0, high=1.0)
        y = y_0 + rs.normal(loc=0.0, scale=noise_sd, size=(n_total))
        label = standardise(y)
        dataset = data_split(data_x, label, n_train, n_test, rs)
    elif prob_type == 'classification':
        y_0 = standardise(y_0, low=-6.0, high=6.0)
        y = y_0
        prob = 1.0 / (1.0 + np.exp(-y))
        uni_values = rs.uniform(low=0.0, high=1.0, size=len(prob))
        label = (uni_values > prob).astype(int)
        train_x, test_x, train_y, test_y = train_test_split(data_x, label, stratify=label,
                                                            test_size=float(n_test)/n_total,
                                                            random_state=rs)
        dataset = data(train_x, test_x, train_y, test_y, name=name,
                       embed_size=embed_size, prob_type='classification')
    return dataset

Exemplo n.º 17

0

Exibir arquivo

def build_model(train_data_path='training-data-small.txt.bz2',
                scale='small',
                C=1,
                gamma=0.1,
                kernel='rbf',
                chunksize=1024000):
    """Return the trained model
    by given training data and parameters (optimized C, gamma)
    """
    if scale == 'small':
        if 'large' in train_data_path:
            raise ValueError("You can only choose small dataset in small scale")
        else:
            model = SVC(C=C, gamma=gamma, kernel=kernel)
            # load training data
            train_X, train_y = load_data(data_path=train_data_path)
            model.fit(train_X, train_y)
    else:
        if 'small' in train_data_path:
            raise ValueError("You can only choose large dataset in large scale")
        else:
            model = SGDClassifier()
            from sklearn.kernel_approximation import RBFSampler
            # kernel approximation
            rbf_feature = RBFSampler(gamma=gamma, random_state=1, n_components=1000)
            # incremental learning with SGDClassifier
            with bz2.open(train_data_path, 'r') as f:
                for chunk in chunk_file(f):
                    print(time.time())
                    train_y, X = parse_lines(chunk)
                    train_X = feature_hash(X)
                    train_X_rbf = rbf_feature.fit_transform(train_X)
                    model.partial_fit(train_X_rbf, train_y, classes=np.array([0, 1]))

    return model

Exemplo n.º 18

0

Exibir arquivo

Arquivo: computeFeatures.py Projeto: cphatak/SLADS-Net

def computePolyFeatures(Feature):

    rbf_feature = RBFSampler(gamma=0.01, n_components=50, random_state=1)
    PolyFeatures = rbf_feature.fit_transform(Feature)

    #    PolyFeatures = np.copy(Feature)

    return PolyFeatures

Exemplo n.º 19

0

Exibir arquivo

def kernel_approximation():
    X = [[0, 0], [1, 1], [1, 0], [0, 1]]
    y = [0, 0, 1, 1]
    rbf_feature = RBFSampler()
    X_features = rbf_feature.fit_transform(X)
    clf = SGDClassifier()
    clf.fit(X_features, y)
    clf.score(X_features, y)

Exemplo n.º 20

0

Exibir arquivo

class MaxvalueEntropySearch(object):
    def __init__(self, GPmodel):
        self.GPmodel = GPmodel
        self.y_max = max(GPmodel.yValues)
        self.d = GPmodel.dim

    def Sampling_RFM(self):
        #self.rbf_features = RBFSampler(gamma=1/(2*RBF(length_scale=1, length_scale_bounds=(1e-3, 1e2)).length_scale**2), n_components=1000, random_state=1)
        self.rbf_features = RBFSampler(
            gamma=1 / (2 * self.GPmodel.kernel.length_scale**2),
            n_components=1000,
            random_state=1)
        X_train_features = self.rbf_features.fit_transform(
            np.asarray(self.GPmodel.xValues))

        A_inv = np.linalg.inv((X_train_features.T).dot(X_train_features) +
                              np.eye(self.rbf_features.n_components) /
                              self.GPmodel.beta)
        self.weights_mu = A_inv.dot(X_train_features.T).dot(
            self.GPmodel.yValues)
        weights_gamma = A_inv / self.GPmodel.beta
        self.L = np.linalg.cholesky(weights_gamma)

    def weigh_sampling(self):
        random_normal_sample = np.random.normal(0, 1, np.size(self.weights_mu))
        self.sampled_weights = np.c_[self.weights_mu] + self.L.dot(
            np.c_[random_normal_sample])

    def f_regression(self, x):

        X_features = self.rbf_features.fit_transform(x.reshape(1, len(x)))
        return -(X_features.dot(self.sampled_weights))

    def single_acq(self, x, maximum):
        mean, std = self.GPmodel.getPrediction(x)
        mean = mean[0]
        std = std[0]
        if maximum < max(self.GPmodel.yValues) + 5 / self.GPmodel.beta:
            maximum = max(self.GPmodel.yValues) + 5 / self.GPmodel.beta

        normalized_max = (maximum - mean) / std
        pdf = norm.pdf(normalized_max)
        cdf = norm.cdf(normalized_max)
        if (cdf == 0):
            cdf = 1e-30
        return -(normalized_max * pdf) / (2 * cdf) + np.log(cdf)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_sgd_doubly.py Projeto: tenglinxi/pyrfm

def test_sgd_regressor_rbf(loss):
    rng = np.random.RandomState(0)
    transform = RBFSampler(n_components=100, gamma=10, random_state=0)
    X_trans = transform.fit_transform(X)
    y, coef = generate_target(X_trans, rng, -0.1, 0.1)
    y_train = y[:n_train]
    y_test = y[n_train:]
    _test_regressor(transform, y_train, y_test, X_trans, loss=loss)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: condexpose_detector.py Projeto: rvorias/NAB

class CondexposeDetector(AnomalyDetector):

  """ This is a modified EXPoSE detector that integrates a conditional
  temporal relation between two consequtive inputs.
  """

  def __init__(self, *args, **kwargs):
    super(CondexposeDetector, self).__init__(*args, **kwargs)

    self.kernel = None
    self.timestep = 0

  def initialize(self, gamma=None, fourierFeatures=None):
    """Initializes RBFSampler for the detector"""
    if gamma is None:
        self.gamma = 0.1
    else:
        self.gamma = gamma
    if fourierFeatures is None:
        self.fourierFeatures = 50
    else:
        self.fourierFeatures = fourierFeatures
        
    print('parameters -- gamma={} fourierFeatures={}'.format(self.gamma, self.fourierFeatures))

    self.kernel = RBFSampler(gamma=self.gamma, n_components=self.fourierFeatures, random_state=5)
    self.r = VRLS4(self.fourierFeatures)
    self.x_t = None

  def handleRecord(self, inputData):
    """ Returns a list [anomalyScore] calculated using a kernel based
    similarity method described in the comments below"""
    
    
    # Transform the input by approximating feature map of a Radial Basis
    # Function kernel using Random Kitchen Sinks approximation
    inputData = [inputData['v_{}'.format(i)] for i in range(len(inputData)-1)]
    inputData = (inputData-self.inputMin)/(self.inputMax-self.inputMin)
    #scaling step
    #todo: take outside and normalize all columns on their own
    assert (len(self.inputMin) == len(inputData)), 'normalization error, len diff'
    
    y_t = self.kernel.fit_transform(np.asarray([inputData]))
    
    if self.timestep == 0:
        self.x_t = y_t.copy()
    
    conditional_mean = (np.matmul(self.x_t,self.r.getCovar()))
    if i > 1:
        conditional_mean = conditional_mean/LA.norm(conditional_mean)
    anomalyScore = np.asscalar(1 - np.inner(y_t, conditional_mean))
    
    self.r.update(self.x_t.T,y_t.T)
    self.x_t = y_t.copy()
    
    self.timestep += 1

    return [anomalyScore]

Exemplo n.º 23

0

Exibir arquivo

Arquivo: utils.py Projeto: macsharma/T-EFS-LS-MCM

    def kernel_transform(self, X1, X2 = None, kernel_type = 'linear_primal', n_components = 100, gamma = 1.0):
        """
        Forms the kernel matrix using the samples X1
        Parameters:
        ----------
        X1: np.ndarray
            data (n_samples1,n_features) to form a kernel of shape (n_samples1,n_samples1)
        X2: np.ndarray
            data (n_samples2,n_features) to form a kernel of shape (n_samples1,n_samples2)
        kernel_type : str
            type of kernel to be used
        gamma: float
            kernel parameter
        Returns:
        -------
        X: np.ndarray
            the kernel of shape (n_samples,n_samples)
        """
        if(kernel_type == 'linear'):
            X = linear_kernel(X1,X2)
        elif(kernel_type == 'rbf'):  
            X = rbf_kernel(X1,X2,gamma) 
        elif(kernel_type == 'tanh'):
            X = sigmoid_kernel(X1,X2,-gamma) 
        elif(kernel_type == 'sin'):
#            X = np.sin(gamma*manhattan_distances(X1,X2))
            X = np.sin(gamma*pairwise_distances(X1,X2)**2)
        elif(kernel_type =='TL1'):                
            X = np.maximum(0,gamma - manhattan_distances(X1,X2)) 
        elif(kernel_type == 'rff_primal'):
            rbf_feature = RBFSampler(gamma=gamma, random_state=1, n_components = n_components)
            X = rbf_feature.fit_transform(X1)
        elif(kernel_type == 'nystrom_primal'):
            #cannot have n_components more than n_samples1
            if(n_components > X1.shape[0]):
                raise ValueError('n_samples should be greater than n_components')
            rbf_feature = Nystroem(gamma=gamma, random_state=1, n_components = n_components)
            X = rbf_feature.fit_transform(X1)
        elif(kernel_type == 'linear_primal'):                
            X = X1
        else:
            print('No kernel_type passed: using linear primal solver')
            X = X1
        return X

Exemplo n.º 24

0

Exibir arquivo

Arquivo: experiment.py Projeto: kgmacau/mclass-sky

    def __init__(self,
                 X,
                 y,
                 dataset,
                 policy_name,
                 scale=True,
                 n_splits=10,
                 passive=True,
                 n_jobs=-1,
                 overwrite=False,
                 gamma_percentile=90,
                 ts_sigma=0.02,
                 ts_tau=0.02,
                 ts_mu=0.5,
                 save_name=None,
                 candidate_pool_size=None):
        seed = RandomState(1234)
        self.X = np.asarray(X, dtype=np.float64)
        self.y = np.asarray(y)
        self.X = StandardScaler().fit_transform(self.X) if scale else self.X
        self.policy_name = policy_name
        self.dataset = dataset
        self.passive = passive
        self.n_jobs = n_jobs
        self.overwrite = overwrite
        self.ts_sigma = ts_sigma
        self.ts_tau = ts_tau
        self.ts_mu = ts_mu
        self.save_name = save_name
        self.candidate_pool_size = candidate_pool_size

        # estimate the kernel using the 90th percentile heuristic
        random_idx = seed.choice(X.shape[0], 1000)
        distances = pairwise_distances(self.X[random_idx], metric='l1')
        self.gamma = 1 / np.percentile(distances, 90)
        self.similarity_gamma = 1 / np.percentile(distances, gamma_percentile)
        transformer = RBFSampler(gamma=self.gamma,
                                 random_state=seed,
                                 n_components=100)
        self.X_transformed = transformer.fit_transform(self.X)

        n_samples = self.X.shape[0]
        train_size = min(10000, int(0.7 * n_samples))
        test_size = min(20000, n_samples - train_size)
        splitter = StratifiedShuffleSplit(n_splits=n_splits,
                                          train_size=train_size,
                                          test_size=test_size,
                                          random_state=seed)
        self.kfold = list(splitter.split(self.X, self.y))

        self.label_encoder = LabelEncoder()
        self.label_encoder.fit(y)

        if policy_name == 'COMB':
            assert len(self.label_encoder.classes_
                       ) == 2, 'COMB only works with binary classification.'

Exemplo n.º 25

0

Exibir arquivo

def test_regressor_rbf(normalize, loss):
    rng = np.random.RandomState(0)
    # approximate kernel mapping
    transformer = RBFSampler(n_components=100, random_state=0, gamma=10)
    X_trans = transformer.fit_transform(X)
    y, coef = generate_target(X_trans, rng, -0.1, 0.1)
    y_train = y[:n_train]
    y_test = y[n_train:]
    _test_regressor(transformer, X_train, y_train, X_test, y_test, X_trans,
                    normalize=normalize, loss=loss)

Exemplo n.º 26

0

Exibir arquivo

def example():
    from sklearn.kernel_approximation import RBFSampler
    from sklearn.linear_model import SGDClassifier
    X = [[0, 0], [1, 1], [1, 0], [0, 1]]
    y = [0, 0, 1, 1]
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(X)
    clf = SGDClassifier(max_iter=5)
    print(clf.fit(X_features, y))

    print(clf.score(X_features, y))

Exemplo n.º 27

0

Exibir arquivo

def approx_kernel(kernel_structure,data_x,data_y):
    #print("Approx kernel")
    #pdb.set_trace()
    if kernel_structure.iloc[0].loc['kernel_type']=='RBF':
        #pdb.set_trace()
        rbf_feature = RBFSampler(gamma=1,n_components=10,random_state=1)
        X_features = rbf_feature.fit_transform(data_x)
    if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2':
        chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1)
        X_features = chi2sampler.fit_transform(X, y)
    #todo implement the other methods
    return X_features

Exemplo n.º 28

0

Exibir arquivo

Arquivo: models.py Projeto: hcllaw/distBO

def ridge_gamma(data, log_gamma):
    alpha = 5.0e-07  #2.0e-06#6.25e-07 # sigma^2/n
    gamma = np.exp(log_gamma)
    print('Training with alpha:{}, gamma:{}'.format(alpha, gamma))
    np.random.seed(23)
    rbf_feature = RBFSampler(gamma=gamma, n_components=200)
    trans_tr_x = rbf_feature.fit_transform(data.train_x)
    trans_test_x = rbf_feature.transform(data.test_x)
    clf = Ridge(alpha=alpha)
    clf.fit(trans_tr_x, data.train_y)
    score = clf.score(trans_test_x, data.test_y)
    return max(score, -1.0)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: model_raw_values.py Projeto: ovchinnikov-vladislav/spam-detection

def runSVM(pickle_file,X_test_svm):
    full_name = glob("./pickles/"+pickle_file+"/best*")[0]
    with open(full_name,"rb") as f:
        model = pickle.load(f)
    if "rbf" in pickle_file:
        number = int(re.sub(".pickle","",re.sub(r".*best_model_","",full_name)))
        df = pd.read_csv(glob("./pickles/"+pickle_file+"/log*")[0])
        g = float(df[df["model"] == number]["gamma"])
        n = int(df[df["model"] == number]["n_components"])
        rbf_feature = RBFSampler(gamma=g,n_components=n)
        X_test_svm = rbf_feature.fit_transform(X_test_svm)
    out = model.decision_function(X_test_svm)
    np.save("./pickles/"+pickle_file+"/prob_map_test.npy", out)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: q_learning_semi_gradiente.py Projeto: boa50/jogo-da-velha-sgd

def rbf_projection_idea_main():
    # Exemplo clássico de utilizar kernel RBF para aumentar a dimensionalidade dos dados (similar à SVM)
    # Retirado da página do sklearn

    from sklearn.linear_model import SGDClassifier
    X = [[0, 0], [1, 1], [1, 0], [0, 1]]
    y = [0, 0, 1, 1]
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(X)
    clf = SGDClassifier(max_iter=5, tol=1e-3)
    clf.fit(X_features, y)
    SGDClassifier(max_iter=5)
    print('Score:', clf.score(X_features, y))

Exemplo n.º 31

0

Exibir arquivo

Arquivo: test_kernel_approximation.py Projeto: zhangwj0101/scikit-learn

def test_rbf_sampler():
    """test that RBFSampler approximates kernel on random data"""
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    # approximate kernel mapping
    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
    X_trans = rbf_transform.fit_transform(X)
    Y_trans = rbf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: test_sgd_doubly.py Projeto: tenglinxi/pyrfm

def test_sgd_classifier_rbf(loss):
    rng = np.random.RandomState(0)
    transform = RBFSampler(n_components=100, gamma=10, random_state=0)
    X_trans = transform.fit_transform(X)
    y, coef = generate_target(X_trans, rng, -0.1, 0.1)
    y_train = y[:n_train]
    y_test = y[n_train:]
    _test_classifier(transform,
                     np.sign(y_train),
                     np.sign(y_test),
                     X_trans,
                     max_iter=500,
                     eta0=.01,
                     loss=loss)

Exemplo n.º 33

0

Exibir arquivo

Arquivo: test_kernel_approximation.py Projeto: 1TTT9/scikit-learn

def test_rbf_sampler():
    # test that RBFSampler approximates kernel on random data
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    # approximate kernel mapping
    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
    X_trans = rbf_transform.fit_transform(X)
    Y_trans = rbf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert_less_equal(np.abs(np.mean(error)), 0.01)  # close to unbiased
    np.abs(error, out=error)
    assert_less_equal(np.max(error), 0.1)  # nothing too far off
    assert_less_equal(np.mean(error), 0.05)  # mean is fairly close

Exemplo n.º 34

0

Exibir arquivo

Arquivo: classifier.py Projeto: tuxedocat/Nyanco

 def trainSGD(self):
     sgd = SGDClassifier(
         loss=self.loss,
         penalty=self.reg,
         alpha=self.alpha,
         n_iter=self.epochs,
         shuffle=True,
         n_jobs=self.multicpu,
         class_weight="auto",
     )
     # print "Classifier (sklearn SGD): training the model \t(%s)"%self.dspath
     if self.kernel_approx is True:
         rbf_feature = RBFSampler(gamma=1, n_components=100.0, random_state=1)
         Xk = rbf_feature.fit_transform(self.X)
         self.glm = OneVsRestClassifier(sgd).fit(Xk, self.Y)
     else:
         self.glm = OneVsRestClassifier(sgd).fit(self.X, self.Y)
     print "Classifier (sklearn SGD): Done. \t(%s)" % self.dspath

Exemplo n.º 35

0

Exibir arquivo

Arquivo: train.py Projeto: SherazKhan/kaggleMEG

def train_models(X_train, y_train, X_test, y_test):
    clf = linear_model.SGDClassifier(penalty='elasticnet')
    print clf
    print "fitting a linear elasticnet (L1+L2 regularized linear classif.) with SGD"
    clf = clf.fit(X_train, y_train)
    print "score on the training set", clf.score(X_train, y_train)
    print "score on 80/20 split", clf.score(X_test, y_test)

    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_train_feats = rbf_feature.fit_transform(X_train)
    X_test_feats = rbf_feature.transform(X_test)
    print "fitting a linear elasticnet with SGD on RBF sampled features"
    clf = clf.fit(X_train_feats, y_train)
    print "score on the training set", clf.score(X_train_feats, y_train)
    print "score on 80/20 split", clf.score(X_test_feats, y_test)

    clf2 = RandomForestClassifier(max_depth=None, min_samples_split=3)
    print clf2
    print "fitting a random forest"
    clf2 = clf2.fit(X_train, y_train)
    print "score on the training set", clf2.score(X_train, y_train)
    print "score on 80/20 split", clf2.score(X_test, y_test)

    clf3 = svm.SVC(kernel='linear')
    print clf3
    print "fitting an SVM with a linear kernel"
    clf3 = clf3.fit(X_train, y_train)
    print "score on the training set", clf3.score(X_train, y_train)
    print "score on 80/20 split", clf3.score(X_test, y_test)

    clf4 = svm.SVC(kernel='rbf')
    print clf4
    print "fitting an SVM with an RBF-kernel"
    clf4 = clf4.fit(X_train, y_train)
    print "score on the training set", clf4.score(X_train, y_train)
    print "score on 80/20 split", clf4.score(X_test, y_test)

    clf5 = linear_model.LogisticRegression(penalty='l1', tol=0.01)
    print clf5
    print "fitting a logistic regression reg. with L1"
    clf5 = clf5.fit(X_train, y_train)
    print "score on the training set", clf5.score(X_train, y_train)
    print "score on 80/20 split", clf5.score(X_test, y_test)

Exemplo n.º 36

0

Exibir arquivo

Arquivo: experiment.py Projeto: davidjwu/mclass-sky

    def __init__(self, X, y, dataset, policy_name, scale=True, n_iter=10, passive=True):
        seed = RandomState(1234)
        self.X = np.asarray(X, dtype=np.float64)
        self.y = np.asarray(y)
        self.X = StandardScaler().fit_transform(self.X) if scale else self.X
        self.policy_name = policy_name
        self.dataset = dataset
        self.passive = passive

        # estimate the kernel using the 90th percentile heuristic
        random_idx = seed.choice(X.shape[0], 1000)
        distances = pairwise_distances(self.X[random_idx], metric='l1')
        self.gamma = 1 / np.percentile(distances, 90)
        transformer = RBFSampler(gamma=self.gamma, random_state=seed, n_components=100)
        self.X_transformed = transformer.fit_transform(self.X)

        n_samples = self.X.shape[0]
        train_size = min(10000, int(0.7 * n_samples))
        test_size = min(20000, n_samples - train_size)
        self.kfold = StratifiedShuffleSplit(self.y, n_iter=n_iter, test_size=test_size,
                                            train_size=train_size, random_state=seed)

Exemplo n.º 37

0

Exibir arquivo

Arquivo: efficient_decomposable_gaussian.py Projeto: RomainBrault/Thesis

def EfficientDecomposableGaussianORFF(X, A, gamma=1.,
                                      D=100, eps=1e-5, random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    A : {array-like}, shape = [n_targets, n_targets]
        Operator of the Decomposable kernel (positive semi-definite)
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : Linear Operator, callable
    """
    # Decompose A=BB^T
    u, s, v = svd(A, full_matrices=False, compute_uv=True)
    B = dot(diag(sqrt(s[s > eps])), v[s > eps, :])

    # Sample a RFF from the scalar Gaussian kernel
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)

    # Create the ORFF linear operator
    cshape = (D, B.shape[0])
    rshape = (X.shape[0], B.shape[1])
    return LinearOperator((phiX.shape[0] * B.shape[1], D * B.shape[0]),
                          matvec=lambda b: dot(phiX, dot(b.reshape(cshape),
                                               B)),
                          rmatvec=lambda r: dot(phiX.T, dot(r.reshape(rshape),
                                                B.T)),
                          dtype=float)

Exemplo n.º 38

0

Exibir arquivo

Arquivo: efficient_curlfree_gaussian.py Projeto: RomainBrault/Thesis

def EfficientCurlFreeGaussianORFF(X, gamma=1.,
                                  D=100, eps=1e-5, random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D,
                       random_state=random_state)
    phiX = phi_s.fit_transform(X)

    return LinearOperator((phiX.shape[0] * X.shape[1], phiX.shape[1]),
                          matvec=lambda b:
                          dot(phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) *
                          phi_s.random_weights_.reshape((1, -1,
                                                         phiX.shape[1])), b),
                          rmatvec=lambda r:
                          dot((phiX.reshape((phiX.shape[0], 1,
                                             phiX.shape[1])) *
                               phi_s.random_weights_.reshape((1, -1,
                                                              phiX.shape
                                                              [1]))).reshape
                          (phiX.shape[0] * X.shape[1], phiX.shape[1]).T, r),
                          dtype=float)

Exemplo n.º 39

0

Exibir arquivo

Arquivo: preprocess.py Projeto: pmiller10/handwriting_classification

 def rbf_kernel(self, matrix, n_components):
     rbf = RBFSampler(n_components = n_components)
     print rbf
     matrix_features = rbf.fit_transform(matrix)
     return matrix_features

Exemplo n.º 40

0

Exibir arquivo

Arquivo: predict.py Projeto: alod83/osiris

 #prop['class_' + ps] = []
 # restore classifier set from file
 classifier = joblib.load('data/' + algorithm + '-' + ps + '.pkl') 
 
 # restore robust scaler from file
 robust_scaler = joblib.load('data/rs-' + algorithm + '-' + ps + '.pkl') 
 
 # restore classes from file
 classes = joblib.load('data/classes-' + algorithm + '-' + ps + '.pkl') 
 
 
 cstatus = robust_scaler.transform(cstatus_orig)
 
 if algorithm == 'kernel-approx':
     rbf_feature = RBFSampler(gamma=1, random_state=1)
     cstatus = rbf_feature.fit_transform(cstatus)
     
 prob = None
 if algorithm == 'one-vs-rest' or algorithm == 'linear-svm':
     f = np.vectorize(platt_func)
     raw_predictions = classifier.decision_function(cstatus)
     platt_predictions = f(raw_predictions)
     prob = platt_predictions / platt_predictions.sum(axis=1)
     #prob = prob.tolist()
     
 else:
     prob = classifier.predict_proba(cstatus).tolist()
     
 
 for i in range(0,len(classes)):

Exemplo n.º 41

0

Exibir arquivo

Arquivo: pipeline.py Projeto: sidgan/LHCDataAnalysis

def main():
    type_of_problem = ""
    split = 0.3
    su_train = []
    su_test = []
    p = optparse.OptionParser()
    # take path of training data set
    p.add_option("--path_train", "-p", default="/afs/cern.ch/user/s/sganju/private/2014_target.csv")
    # what type of problem is it? regression/classification/clustering/dimensionality reduction
    p.add_option("--type_of_problem", "-t", default="c")
    # include cross validation true/false
    p.add_option("--cross_validation", "-v", default="True")
    # take the numerical values
    # p.add_option('--numerical_values', '-n')
    # specify target column
    p.add_option("--target", "-y")
    options, arguments = p.parse_args()

    num_values = "id cpu creator dataset dbs dtype era naccess nblk	nevt nfiles nlumis nrel nsites nusers parent primds proc_evts procds rel1_0 rel1_1 rel1_2 rel1_3 rel1_4	rel1_5 rel1_6 rel1_7 rel2_0 rel2_1 rel2_10 rel2_11 rel2_2 rel2_3 rel2_4 rel2_5 rel2_6 rel2_7 rel2_8 rel2_9 rel3_0 rel3_1 rel3_10 rel3_11 rel3_12 rel3_13 rel3_14 rel3_15 rel3_16 rel3_17 rel3_18 rel3_19 rel3_2 rel3_20 rel3_21 rel3_22 rel3_23 rel3_24 rel3_25 rel3_26 rel3_3 rel3_4 rel3_5 rel3_6 rel3_7 rel3_8 rel3_9 relt_0 relt_1 relt_2 rnaccess rnusers rtotcpu s_0 s_1 s_2 s_3 s_4size tier totcpu wct"
    num_values = num_values.split()

    # load from files
    train = pd.read_csv(options.path_train)

    # load target values
    target = train["target"]

    # TRAINING DATA SET
    data = train
    print "Performing imputation."
    imp = data.dropna().mean()
    test = data.fillna(imp)
    data = data.fillna(imp)

    print "Splitting the training data with %f." % split
    features_train, features_test, target_train, target_test = train_test_split(
        data, target, test_size=split, random_state=0
    )
    print "Generating Model"
    # diffrentiate on the basis of type of problem
    # RANDOM FOREST CLASSIFIER
    rf = RandomForestClassifier(n_estimators=100)
    rf = rf.fit(features_train, target_train)
    cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test)

    # Ada boost
    clf_ada = AdaBoostClassifier(n_estimators=100)
    params = {
        "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5],
        "max_features": [0.25, 0.50, 0.75, 1],
        "max_depth": [3, 4, 5],
    }
    gs = GridSearchCV(clf_ada, params, cv=5, scoring="accuracy", n_jobs=4)
    clf_ada.fit(features_train, target_train)
    cal_score("ADABOOST", clf_ada, features_test, target_test)

    # RANDOM FOREST CLASSIFIER
    rf = RandomForestClassifier(n_estimators=100)
    rf = rf.fit(features_train, target_train)
    cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test)
    # predictions = rf.predict_proba(test)
    # Gradient Boosting
    gb = GradientBoostingClassifier(n_estimators=100, subsample=0.8)
    params = {
        "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5],
        "max_features": [0.25, 0.50, 0.75, 1],
        "max_depth": [3, 4, 5],
    }
    gs = GridSearchCV(gb, params, cv=5, scoring="accuracy", n_jobs=4)
    gs.fit(features_train, target_train)
    cal_score("GRADIENT BOOSTING", gs, features_test, target_test)
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(data)

    # SGD CLASSIFIER
    clf = SGDClassifier(
        alpha=0.0001,
        class_weight=None,
        epsilon=0.1,
        eta0=0.0,
        fit_intercept=True,
        l1_ratio=0.15,
        learning_rate="optimal",
        loss="hinge",
        n_iter=5,
        n_jobs=1,
        penalty="l2",
        power_t=0.5,
        random_state=None,
        shuffle=True,
        verbose=0,
        warm_start=False,
    )
    clf.fit(features_train, target_train)
    cal_score("SGD Regression", clf, features_test, target_test)

    # KN Classifier
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(features_train, target_train)
    cal_score("KN CLASSIFICATION", neigh, features_test, target_test)
    # predictions = neigh.predict_proba(test)

    # Decision Tree classifier
    clf_tree = tree.DecisionTreeClassifier(max_depth=10)
    clf_tree.fit(features_train, target_train)
    cal_score("DECISION TREE CLASSIFIER", clf_tree, features_test, target_test)

Exemplo n.º 42

0

Exibir arquivo

Arquivo: do_sgd.py Projeto: liaoyt/sf_crime_classification

		f.write(header)
		size = header.count(',')
		for (id, label) in zip(ids, labels):
			f.write('%d' % int(id))
			for i in range(0, size):
				if i == label:
					f.write(',1')
				else:
					f.write(',0')
			f.write('\n')


if __name__ == '__main__':
	# get X and y
	train_x, train_y = loadDataHelper('train_data.txt')
	test_x, test_id = loadDataHelper('test_data.txt')
	print('train size: %d %d' % (len(train_x), len(train_y)))
	print('test size: %d %d' % (len(test_x), len(test_id)))


	rbf_feature = RBFSampler(gamma=1, random_state=1)
	X_features = rbf_feature.fit_transform(train_x)
	model = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True)
	# model = SGDClassifier()
	model.fit(X_features, train_y)
	print(model)

	X_features = rbf_feature.fit_transform(test_x)
	predicted = model.predict(X_features)
	saveResult('result-sgd.csv', test_id, predicted)

Exemplo n.º 43

0

Exibir arquivo

Arquivo: train.py Projeto: alod83/osiris

 
 X_test = X[0:nr_test]
 Y_test = Y[0:nr_test]
 X_train = X[nr_test+1:len(X)]
 Y_train = Y[nr_test+1:len(X)]
  
 X_train = robust_scaler.fit_transform(X_train)
 
 # save standard scaler
 joblib.dump(robust_scaler, base_path + 'data/rs-' + algorithm + '-' + str(ps[psi]) + '.pkl')    
 
 X_test = robust_scaler.transform(X_test)
 
 if algorithm == 'kernel-approx':
     rbf_feature = RBFSampler(gamma=1, random_state=1)
     X_train = rbf_feature.fit_transform(X_train)
     X_test = rbf_feature.fit_transform(X_test)
 elif algorithm == 'mlp':
 	n_output = len(set(Y))
 	#n_output = 2460
 	n_input = len(X_train[0]) + 1
 	n_neurons = int(round(sqrt(n_input*n_output)))
 	print "N input" , n_input
 	print "N output" , n_output
 	print "N neurons", n_neurons
 	classifier = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(n_input, n_neurons, n_output), random_state=1)
     
 if classifier is not None or exists_be_file is True:
     
     if cv is True:
         gs = GridSearchCV(classifier, parameters)

Exemplo n.º 44

0

Exibir arquivo

Arquivo: opencosmics.py Projeto: sidgan/opencosmics

		'max_features': [.25,.50,.75,1],
		'max_depth': [3,4,5],
		}
gs = GridSearchCV(gb, params, cv=5, scoring ='accuracy', n_jobs=4)
gs.fit(features_train, target_train)
#predictions = gs.predict_proba(test)
#print predictions

cal_score("GRADIENT BOOSTING",gs, features_test, target_test)
		#sorted(gs.grid_scores_, key = lambda x: x.mean_validation_score)
		#print gs.best_score_
		#print gs.best_params_
		#predictions = gs.predict_proba(test)
		#KERNEL APPROXIMATIONS - RBF 		
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(data)
		
#SGD CLASSIFIER		
clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
      		fit_intercept=True, l1_ratio=0.15, learning_rate='optimal',
       loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5,
       random_state=None, shuffle=True, verbose=0,
       warm_start=False)
clf.fit(features_train, target_train)
cal_score("SGD Regression",clf, features_test, target_test)

#KN Classifier
neigh = KNeighborsClassifier(n_neighbors = 1)
neigh.fit(features_train, target_train)
cal_score("KN CLASSIFICATION",neigh, features_test, target_test)

Exemplo n.º 45

0

Exibir arquivo

Arquivo: processdata.py Projeto: ahendy0/Holdem

    classa = [0, 1, 2, 3, 4]

    num = len(x) - 10000
    xtest, ytest, =  x[num:], y[num:]

    x, y = x[:num], y[:num]

    print x[:10], y[:10]
    clf = clf.fit(x, y)

    clf2_RFC = RandomForestClassifier(random_state=0, class_weight=({1:0.25, 2:0.56, 3:0.17, 4:0.02}))
    clf2_RFC = clf2_RFC.fit(x, y)

    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(x)
    X_test = rbf_feature.fit_transform(xtest)


    clfK = linear_model.SGDClassifier()
    clfK.fit(x, y)
    print "SGD classifier", clfK.score(xtest, ytest)

    #DECISION TREEE
    clft = tree.DecisionTreeClassifier( max_depth= 7)
    clft.fit(x, y)
    print "Tree", clft.score(xtest, ytest)


    #gen image
    fname = ["stack size: ", "num called: " , "num to call: " , "raise",  "bet: " , "hand eval: " , "card info: " , "potsize: " ]

Exemplo n.º 46

0

Exibir arquivo

Arquivo: mapper.py Projeto: cassiesu/data_mining_exercise

def transform(x_original, make_np=True):
    orig = x_original
    MEAN = [ 0.00213536,  0.00324656,  0.00334724,  0.00175428,  0.00349227,
          0.0035413 ,  0.00188289,  0.00216241,  0.00184026,  0.00351317,
          0.00520942,  0.00450718,  0.00346782,  0.00300477,  0.00223811,
          0.00180039,  0.00216675,  0.00381716,  0.00258565,  0.00291358,
          0.00616643,  0.00237084,  0.00440006,  0.00729192,  0.00369302,
          0.00058215,  0.00312047,  0.00629086,  0.00184585,  0.0018266 ,
          0.00329771,  0.00352135,  0.00246634,  0.00261958,  0.00357113,
          0.00307333,  0.00211512,  0.00125184,  0.00212255,  0.00307451,
          0.00171408,  0.0126576 ,  0.00252346,  0.00528872,  0.0026387 ,
          0.00283739,  0.00394586,  0.00207473,  0.00307515,  0.002017  ,
          0.00408066,  0.00185709,  0.00316201,  0.00349098,  0.00415104,
          0.00348125,  0.00069981,  0.00128145,  0.0023404 ,  0.00396659,
          0.00240324,  0.01251434,  0.00125352,  0.00266113,  0.00435828,
          0.00066137,  0.00221134,  0.00083185,  0.00278664,  0.00118505,
          0.00335414,  0.00340527,  0.0026939 ,  0.00096786,  0.00214149,
          0.0026521 ,  0.00155538,  0.00300255,  0.0040405 ,  0.00275396,
          0.00077404,  0.00257667,  0.00268743,  0.00279948,  0.0018655 ,
          0.00239569,  0.0032419 ,  0.00288355,  0.00123361,  0.00220135,
          0.0021836 ,  0.00225123,  0.00366629,  0.00279189,  0.00058814,
          0.00310452,  0.00276981,  0.00128716,  0.00074161,  0.00358908,
          0.003292  ,  0.00233592,  0.00317694,  0.00381526,  0.00269197,
          0.00098085,  0.00231831,  0.00133682,  0.00460957,  0.00387842,
          0.0004473 ,  0.0015644 ,  0.00247717,  0.00179484,  0.00281831,
          0.00053689,  0.00415889,  0.00232736,  0.00361601,  0.00192624,
          0.00224487,  0.00210838,  0.00140079,  0.00608319,  0.00211861,
          0.00230604,  0.00124033,  0.0029389 ,  0.00227564,  0.00086638,
          0.0035496 ,  0.00228789,  0.00361703,  0.00270277,  0.00196611,
          0.00206865,  0.00146788,  0.00019011,  0.00222272,  0.00351472,
          0.00305718,  0.00239471,  0.00040766,  0.00299186,  0.00368983,
          0.00244158,  0.00084154,  0.00109796,  0.00278565,  0.00135904,
          0.00424855,  0.00323784,  0.00255397,  0.00234946,  0.00210558,
          0.00291688,  0.00172516,  0.00284473,  0.00308164,  0.00316225,
          0.0041659 ,  0.00055891,  0.00303591,  0.00028217,  0.00261526,
          0.00196658,  0.00264379,  0.00018002,  0.00227361,  0.00190785,
          0.00344782,  0.00305479,  0.00057851,  0.00115452,  0.00365707,
          0.0009598 ,  0.00184313,  0.00286183,  0.00400594,  0.0003848 ,
          0.00086102,  0.00277779,  0.00214625,  0.00329827,  0.00129511,
          0.00114751,  0.00249452,  0.00236266,  0.00353646,  0.00319208,
          0.00540883,  0.00323167,  0.00299791,  0.00025745,  0.00227873,
          0.00228826,  0.0040653 ,  0.00238598,  0.00483883,  0.00054585,
          0.00091663,  0.00037232,  0.0008229 ,  0.00073563,  0.00283771,
          0.0035899 ,  0.00578833,  0.0032107 ,  0.0014048 ,  0.00401052,
          0.002748  ,  0.00229416,  0.00130351,  0.00308403,  0.00146506,
          0.00188529,  0.00236308,  0.00259649,  0.00185155,  0.00230195,
          0.00421584,  0.00231917,  0.00227335,  0.00296253,  0.00077996,
          0.0001668 ,  0.00069015,  0.00220702,  0.00238395,  0.00034903,
          0.00303323,  0.00407338,  0.00178655,  0.00456887,  0.00254606,
          0.00215019,  0.00306377,  0.00134979,  0.00112832,  0.00350681,
          0.00253643,  0.00431348,  0.00094915,  0.00150396,  0.00043838,
          0.00207101,  0.00301119,  0.00057716,  0.00062709,  0.00543404,
          0.00061686,  0.00237189,  0.00522715,  0.00321869,  0.00172645,
          0.00244482,  0.00334951,  0.00183201,  0.00038157,  0.0023022 ,
          0.00418559,  0.00329119,  0.00411452,  0.00089033,  0.00283673,
          0.00210368,  0.00222242,  0.00213262,  0.0033576 ,  0.00250707,
          0.00423595,  0.00237407,  0.00127654,  0.00387341,  0.00216695,
          0.00325004,  0.00246333,  0.00396034,  0.0031676 ,  0.00354552,
          0.00227099,  0.00205363,  0.00128859,  0.00290737,  0.00301655,
          0.00319576,  0.00072449,  0.00230528,  0.00326406,  0.00283315,
          0.00338869,  0.00212552,  0.00135612,  0.00250613,  0.00045907,
          0.0014009 ,  0.00177951,  0.00042544,  0.00073249,  0.00303487,
          0.0013664 ,  0.00248306,  0.00025601,  0.00435174,  0.00443799,
          0.00479944,  0.0009997 ,  0.00275155,  0.00286969,  0.00244896,
          0.00177604,  0.00278218,  0.00078876,  0.00142078,  0.00186949,
          0.0018215 ,  0.0027254 ,  0.00316367,  0.00192957,  0.00176559,
          0.00289111,  0.00048977,  0.00411342,  0.00130383,  0.00250934,
          0.00324275,  0.00159243,  0.00334068,  0.00324279,  0.00158259,
          0.00041714,  0.00161102,  0.00145149,  0.00222112,  0.00296289,
          0.00282892,  0.00123731,  0.00281891,  0.00016613,  0.0014267 ,
          0.00262089,  0.00367506,  0.00281706,  0.00318947,  0.00090315,
          0.00230826,  0.00310803,  0.00889549,  0.00197781,  0.00160006,
          0.00307063,  0.00176858,  0.00252353,  0.00141795,  0.00047073,
          0.00241224,  0.00165672,  0.00138939,  0.00257068,  0.00148445,
          0.00193734,  0.004368  ,  0.00247817,  0.00249266,  0.00329317,
          0.00078468,  0.00045822,  0.00259324,  0.00298367,  0.00335009,
          0.00307879,  0.00325237,  0.00254531,  0.00749495,  0.0026701 ,
          0.00100689,  0.00184948,  0.00317616,  0.00255977,  0.00112342,
          0.00165774,  0.00227449,  0.00064219,  0.00269639,  0.00114312,
          0.00203549,  0.00064574,  0.00130932,  0.00304631,  0.00131053,
          0.00174587,  0.0027975 ,  0.00461148,  0.0015227 ,  0.0027072 ,
          0.00210673,  0.00323388,  0.00028426,  0.00113429,  0.00315131]

    VAR = [  3.87111312e-06,   1.29838726e-05,   1.23895436e-05,
           5.11051819e-06,   1.87834728e-05,   5.81101229e-05,
           1.22431672e-05,   3.14238203e-06,   6.15186426e-06,
           1.16054974e-05,   2.61629851e-05,   1.51823678e-05,
           3.20501352e-05,   6.75625364e-06,   6.90383937e-06,
           7.10772563e-06,   3.93108356e-06,   1.38147699e-05,
           9.45390664e-06,   6.18869987e-06,   1.23460353e-03,
           5.15741591e-06,   1.27185867e-05,   7.62148434e-05,
           9.61369316e-06,   3.59794999e-06,   4.49714597e-05,
           1.15313013e-04,   2.51027515e-06,   3.23518027e-06,
           1.15175054e-05,   5.55007797e-05,   3.61287015e-06,
           4.24901217e-06,   1.57731133e-05,   8.83739880e-06,
           4.11832891e-06,   4.51594425e-06,   5.66233716e-06,
           2.76312055e-05,   3.10286633e-05,   2.06523833e-04,
           4.99679342e-06,   3.59423460e-05,   5.53408014e-06,
           5.02979264e-06,   2.29845095e-05,   3.52580303e-06,
           4.74110466e-06,   2.77776825e-06,   1.15279947e-05,
           4.78634098e-06,   8.24242505e-06,   1.65141090e-05,
           1.84669015e-05,   1.65851869e-05,   9.69125917e-07,
           4.07269628e-06,   4.79411492e-06,   7.95185399e-06,
           6.05491604e-06,   2.30133633e-04,   2.43045915e-06,
           9.99138675e-06,   1.61846281e-05,   1.36250194e-06,
           3.83900385e-06,   4.03501076e-06,   4.49190746e-06,
           2.20133970e-06,   1.40571788e-05,   1.23973871e-05,
           1.91642968e-05,   1.83384119e-06,   3.55110501e-06,
           6.38707023e-06,   7.58389225e-06,   9.66052931e-06,
           1.33459561e-05,   6.01834583e-06,   1.75975058e-06,
           9.93625536e-06,   5.57880408e-06,   5.20632392e-06,
           2.63891241e-06,   4.96341232e-06,   1.35361419e-05,
           5.09588225e-06,   2.13213362e-06,   3.67884149e-06,
           4.02580880e-06,   3.36118966e-06,   1.23913905e-05,
           1.19327162e-05,   1.33013390e-06,   1.56844681e-05,
           5.05235129e-06,   3.27510379e-06,   4.18496352e-06,
           1.32615022e-05,   8.00089632e-06,   5.24889508e-06,
           7.61725520e-06,   2.45732025e-05,   4.73942392e-06,
           3.26874106e-06,   4.19502445e-06,   4.67408597e-06,
           4.07529951e-05,   1.85623369e-05,   1.42640177e-06,
           9.02420306e-06,   3.99465979e-06,   2.91695819e-06,
           7.51525182e-06,   3.28339831e-06,   9.23579413e-06,
           8.82938566e-06,   1.67017625e-05,   7.18046179e-06,
           6.67502140e-06,   4.53568390e-06,   4.59241197e-06,
           9.71055426e-05,   4.06108283e-06,   3.21309715e-06,
           2.83145362e-06,   1.30979068e-05,   4.30934096e-06,
           1.33494112e-06,   1.23067054e-05,   4.55467345e-06,
           4.16151366e-05,   4.39300907e-06,   3.81081336e-06,
           3.57599046e-06,   2.44792045e-06,   1.04884156e-06,
           5.66646773e-06,   1.38454953e-05,   7.03958785e-06,
           7.96561298e-06,   1.15832827e-06,   5.34098000e-06,
           1.08664502e-05,   5.33706713e-06,   1.58029233e-06,
           4.16948014e-06,   1.10410603e-05,   3.08923185e-06,
           3.60056097e-05,   1.35575315e-05,   7.21297470e-06,
           5.46186866e-06,   3.83067878e-06,   4.93382163e-06,
           8.74249160e-06,   6.95763983e-06,   8.57639945e-06,
           1.99238085e-05,   2.06143616e-05,   4.15158574e-06,
           6.98539924e-06,   7.29978665e-07,   1.05324242e-05,
           4.03610511e-06,   4.54024757e-06,   1.12380259e-06,
           7.25149490e-06,   4.68609708e-06,   4.47583007e-05,
           5.73128000e-06,   1.55383559e-06,   6.10201277e-06,
           1.56226083e-05,   2.07417481e-06,   3.92362694e-06,
           5.07511158e-06,   1.91527526e-05,   1.23196439e-06,
           2.78105795e-06,   6.20886459e-06,   9.77619759e-06,
           4.54569998e-05,   3.69801329e-06,   3.90055801e-06,
           8.95043365e-06,   4.62714915e-06,   8.59072207e-06,
           7.93476416e-06,   2.94461267e-05,   1.27513460e-05,
           6.37168538e-06,   1.42869302e-06,   3.88169829e-06,
           3.73479924e-06,   3.41961106e-05,   5.99249536e-06,
           3.52894229e-05,   3.60535269e-06,   1.97432492e-06,
           1.08726206e-06,   6.34745318e-06,   1.85853697e-06,
           4.88355657e-06,   1.45421337e-05,   4.71209759e-05,
           9.75886239e-06,   1.92188254e-06,   2.44175182e-05,
           6.48665880e-06,   3.77833988e-06,   4.94021824e-06,
           1.11375076e-05,   2.48913056e-06,   7.50221434e-06,
           7.71706724e-06,   4.40449246e-06,   5.01260110e-06,
           7.55913298e-06,   9.61114153e-06,   4.71524238e-06,
           5.71612330e-06,   5.35067657e-06,   1.24371020e-06,
           1.05315411e-06,   3.93981671e-06,   4.10917913e-06,
           4.50131192e-06,   1.41029887e-06,   5.21404239e-06,
           3.10300539e-05,   2.86295992e-06,   3.14574375e-05,
           4.13089781e-06,   3.94511845e-06,   5.21837923e-06,
           1.86040011e-06,   4.33877122e-06,   6.79169351e-06,
           7.34233345e-06,   2.46684357e-05,   6.04518227e-06,
           3.50075336e-06,   1.22008735e-06,   3.82670787e-06,
           1.29928488e-05,   1.30317263e-06,   1.82923403e-06,
           1.68159694e-04,   1.39570985e-06,   6.82018782e-06,
           2.77705938e-05,   5.50219803e-06,   6.94297855e-06,
           5.56691651e-06,   4.40913139e-05,   8.64954832e-06,
           1.13623461e-06,   3.91895303e-06,   2.90528320e-05,
           8.95829181e-06,   2.13802762e-05,   1.45383845e-06,
           2.19748855e-05,   2.92403666e-06,   4.11580346e-06,
           3.79422424e-06,   1.01354981e-05,   1.12666398e-05,
           2.12954971e-05,   4.73278161e-06,   2.26826965e-06,
           2.45301255e-05,   5.86185180e-06,   6.92235736e-06,
           8.42678526e-06,   2.47795958e-05,   6.25412728e-06,
           1.41974527e-05,   3.95337688e-06,   7.16912125e-06,
           2.00884144e-06,   2.00349034e-05,   5.97662651e-06,
           3.01450892e-05,   4.63002816e-06,   4.09857661e-06,
           1.23373959e-05,   5.62286236e-06,   1.23868932e-05,
           7.79128188e-06,   4.02737664e-06,   4.26867074e-06,
           1.30633550e-06,   2.16092242e-06,   2.53344988e-06,
           1.55130629e-06,   1.20587686e-06,   8.47719131e-06,
           1.72865161e-06,   8.85885938e-06,   1.36250583e-06,
           3.02467214e-05,   2.85941868e-05,   1.68684969e-05,
           2.17024274e-06,   9.09429716e-06,   1.12517072e-05,
           5.39997088e-06,   3.16738113e-06,   7.44227101e-06,
           1.39521345e-06,   1.80325624e-06,   3.23437991e-06,
           4.12906812e-06,   6.51981136e-06,   7.28606378e-06,
           4.44469608e-06,   4.00705337e-06,   1.34244753e-05,
           1.34953189e-06,   3.86701616e-05,   4.30733919e-06,
           4.29618197e-06,   1.67568650e-05,   5.39451612e-06,
           8.50733433e-06,   1.04900918e-05,   4.68246794e-06,
           2.92591087e-06,   2.54589900e-06,   6.68970689e-06,
           3.68698856e-06,   5.70542637e-06,   1.57329410e-05,
           3.45199222e-06,   7.27799975e-06,   8.64176250e-07,
           5.59882582e-06,   4.16052401e-06,   1.73753080e-05,
           7.85748797e-06,   6.46626446e-06,   2.23241624e-06,
           6.79217908e-06,   6.18545939e-06,   5.41203600e-04,
           2.75355566e-06,   5.01654998e-06,   9.55004050e-06,
           3.36241075e-06,   4.95540827e-06,   4.38650100e-06,
           2.19975452e-06,   4.99878215e-06,   2.08615031e-06,
           6.57349770e-06,   6.07825138e-06,   1.82116637e-05,
           3.98356104e-06,   3.02862803e-05,   1.45275531e-05,
           1.80111343e-05,   1.81263109e-05,   1.37630960e-06,
           1.01588605e-06,   1.09961427e-05,   7.09189456e-06,
           8.63553483e-06,   1.28377215e-05,   1.15539997e-05,
           4.30247032e-06,   3.69651334e-05,   1.13411365e-05,
           1.43191945e-06,   2.76733205e-06,   7.03730009e-06,
           4.93027252e-06,   2.72768641e-06,   3.15867713e-06,
           3.51786262e-06,   1.33668414e-06,   5.15268762e-06,
           2.24808552e-06,   3.91888753e-06,   1.96848802e-06,
           5.96948656e-06,   6.72807533e-06,   2.52024742e-06,
           4.64795350e-06,   6.00152269e-06,   4.42994740e-05,
           2.59223022e-06,   4.76032620e-06,   3.15249648e-06,
           1.02942457e-05,   7.54992395e-07,   2.48130225e-06,
           5.97253972e-06];

    x_original = np.array(x_original)
    x_original -= MEAN
    x_original /= VAR

    def extend_x(arr, additions=True, extension=True):
        if extension:
            x.extend(arr)
        if additions:
            x.append(scipy.std(arr))
            x.append(scipy.var(arr))
            x.append(sum(arr) / len(arr))
            x.append(sum(np.abs(arr)) / len(arr))
            x.append(min(arr))
            x.append(max(arr))
            x.append(scipy.mean(arr))
            x.append(scipy.median(arr))

    x = []

    extend_x(x_original)
    extend_x(np.abs(x_original))
    # extend_x(np.sqrt(np.abs(x_original)))

    # sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1)
    # zzz1 = sampler1.fit_transform(np.abs(np.array(orig)))[0]

    # sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1)
    # zzz2 = sampler2.fit_transform(np.abs(np.array(x)))[0]

    sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
    zzz3 = sampler3.fit_transform(np.array(x))[0]


    extend_x(list(zzz1))
    extend_x(list(zzz2))
    extend_x(list(zzz3))

    if make_np:
        return np.array(x)
    
    return x

Exemplo n.º 47

0

Exibir arquivo

Arquivo: expose_detector.py Projeto: Aleyasen/NAB

class ExposeDetector(AnomalyDetector):

  """ This detector is an implementation of The EXPoSE (EXPected Similarity
  Estimation) algorithm as described in Markus Schneider, Wolfgang Ertel,
  Fabio Ramos, "Expected Similarity Estimation for Lage-Scale Batch and
  Streaming Anomaly Detection", arXiv 1601.06602 (2016).

  EXPoSE calculates the likelihood of a data point being normal by using
  the inner product of its feature map with kernel embedding of previous data
  points. This measures the similarity of a data point to previous points
  without assuming an underlying data distribution.

  There are three EXPoSE variants: incremental, windowing and decay. This
  implementation is based on EXPoSE with decay. All three variants have been
  tried on NAB but decay gives the best results.Parameters for this detector
  have been tuned to give the best performance.
  """

  def __init__(self, *args, **kwargs):
    super(ExposeDetector, self).__init__(*args, **kwargs)

    self.kernel = None
    self.previousExposeModel = []
    self.decay = 0.01
    self.timestep = 0


  def initialize(self):
    """Initializes RBFSampler for the detector"""
    self.kernel = RBFSampler(gamma=0.5,
                             n_components=20000,
                             random_state=290)


  def handleRecord(self, inputData):
    """ Returns a list [anomalyScore] calculated using a kernel based
    similarity method described in the comments below"""

    # Transform the input by approximating feature map of a Radial Basis
    # Function kernel using Random Kitchen Sinks approximation
    inputFeature = self.kernel.fit_transform(
      numpy.array([[inputData["value"]]]))

    # Compute expose model as a weighted sum of new data point's feature
    # map and previous data points' kernel embedding. Influence of older data
    # points declines with the decay factor.
    if self.timestep == 0:
      exposeModel = inputFeature
    else:
      exposeModel = ((self.decay * inputFeature) + (1 - self.decay) *
                     self.previousExposeModel)

    # Update previous expose model
    self.previousExposeModel = exposeModel

    # Compute anomaly score by calculating similarity of the new data point
    # with expose model. The similarity measure, calculated via inner
    # product, is the likelihood of data point being normal. Resulting
    # anomaly scores are in the range of -0.02 to 1.02.
    anomalyScore = numpy.asscalar(1 - numpy.inner(inputFeature, exposeModel))
    self.timestep += 1

    return [anomalyScore]

Exemplo n.º 48

0

Exibir arquivo

Arquivo: mapper.py Projeto: lukaselmer/ethz-data-mining

def transform(x_original, make_np=True):
    orig = x_original
    variances_str = "0.0021246993507595866 0.0032713784391997795 0.0033522806931598247 0.0017432450192796278 0.0034743692038798537 0.003637888546929857 0.0019210039127597624 0.0021841610994196136 0.0018762718393396005 0.0034590054363498003 0.0052604099446999682 0.004508790286140099 0.0035272400244497799 0.0030404807453598324 0.0022447918038096385 0.0017851536926196112 0.0021643550482296344 0.0037976255097098874 0.0025753731081197833 0.0029230906247597055 0.0060828219621099217 0.0023575999971396813 0.0043864294801700945 0.0071589655821691772 0.0036986840015399082 0.00057556662468004468 0.0030184163825898096 0.0062797556933995476 0.0018388575003994976 0.0018222650139394971 0.0032805952842698042 0.0035132540814598752 0.0024659598304896477 0.0026319448493497136 0.003572205969799843 0.0030648003435798008 0.0021365654833496528 0.0012356635529695108 0.0021261889005796605 0.0030134591283298012 0.0016100815367798148 0.012523000339860027 0.002519218599329652 0.0052571679389798714 0.0026606913287896975 0.0028296754183797139 0.0039323969569099605 0.0020691205227195992 0.0030826525382697508 0.0020232189983895653 0.0040679867872599708 0.0018371556472196301 0.0031808009477497599 0.0034889724135098699 0.0041241983089198644 0.003466312111199805 0.00070525738208999413 0.0012962120699994075 0.0023748498468496439 0.0039468429845199238 0.0024428431670496745 0.012215355168679928 0.0012535008249493743 0.0026764566235297597 0.0043243784063398552 0.00065200872076008631 0.0022265717804095869 0.00081018893256987797 0.0027757838127496974 0.0011937874021293784 0.0033124457059298595 0.0033779817461398022 0.0026583629339898352 0.00096654598538961438 0.0021773139189896237 0.002624655562289701 0.0015705430665195477 0.0030252402714297136 0.0040940954038199478 0.0027594978981697318 0.00079096095234988185 0.0026036506797997572 0.0027190828795197546 0.0027920414767097406 0.0018699793252895133 0.002401434445989645 0.0031948320317497989 0.0028928477797297309 0.001254727068959367 0.0022096979193596154 0.0021709718136396741 0.0022725767293796106 0.0036734258169697923 0.0028088068982497589 0.00058128786511008252 0.0030860261422598389 0.0028005311404197221 0.0013144850578592786 0.00075680244248994735 0.003594669478579891 0.0032807255223097792 0.0023280524667396774 0.00318162350717981 0.0038591178877899067 0.0027019215482496691 0.00097254474824969451 0.0023080437106096615 0.0013613457456093684 0.0045951612643399054 0.0038485342457099387 0.00043219164003003777 0.001528150938759669 0.0024822021413396867 0.0018061700621995042 0.0028432498431096936 0.00055539853847006056 0.004173783897349969 0.0023134058954397316 0.0035923805665898493 0.001944158411359583 0.0022174885522996423 0.0021200232347196586 0.0014086675440495285 0.0060588732600395838 0.0020999206563196006 0.002311535350179601 0.0012081675861494046 0.0029662122591298679 0.0023064668532896651 0.00086526146860972403 0.0035453290259598483 0.0022721631862096265 0.003677016888759915 0.0027193153269396897 0.0019698620481495626 0.002072663196939612 0.0014700221401894075 0.00017158202360999703 0.0022463464680696336 0.0035194326419099174 0.0030686680423197867 0.002374867405639663 0.00042710055163003362 0.0030035550561797468 0.0037270432987298683 0.0024282900953096712 0.00086048954793975898 0.0011186456857096038 0.0027912283038996942 0.0013746583237494142 0.0043072999357398533 0.0032034503423598666 0.0025760441755196838 0.0023421858856196836 0.002131599313139612 0.0029099423010796777 0.0016998768135196812 0.0028229397603697181 0.0030535556897598208 0.003180828002529861 0.0041489816552998261 0.00056885910910004086 0.0030288286590998306 0.0002859839918500021 0.0025907458249397565 0.0019840401991995621 0.0026709580203396733 0.00018365706286999837 0.0023102737736697076 0.0019214511389595858 0.0035872736249698512 0.0030397738456597189 0.00058895044087008347 0.0011302665188195724 0.0036135427626998772 0.00096930456685965713 0.0018706273234795688 0.0028471338214996859 0.0040263350593498478 0.00041504772780003257 0.00088363138039978097 0.0027967429290597077 0.0021579785680196756 0.0032100556617598404 0.0012821952431594156 0.0011697489935395071 0.0024514963691797428 0.0024098468797296444 0.0035879574826698079 0.003169685177989759 0.0053416716965498916 0.0031958328667698248 0.003017081933489743 0.00025151594039000199 0.0022886760678696417 0.0022956408480896266 0.0041254424031998971 0.0023694221563096735 0.0047916681473398276 0.00051616048678002784 0.0009364954557196728 0.00036740167022002141 0.00078959433233993142 0.0007410161818699483 0.0028233597298397656 0.0035765694441198263 0.0057271246152496317 0.0031925037529198339 0.0014168537242193022 0.0040282638127298667 0.0027408330144697043 0.0022817147531596685 0.0013110019340695283 0.0031049698000498423 0.0014794847673093696 0.0019060075812395761 0.0023860511557697102 0.0025873434738996485 0.0018797211826496064 0.0022561836261797042 0.0041991871207300085 0.0023698767044296855 0.0022702994190196093 0.0029535219055797368 0.00079702808800984168 0.00017141315798999718 0.00071072931258999632 0.0022027503444296218 0.0023522028982396696 0.00034261818457001714 0.0030124186968896794 0.0040563730303498731 0.0018014168708095377 0.0045389503904098493 0.0025631905209596659 0.0021709940360196437 0.0031014667275497628 0.0013724805472092871 0.0011206960384995625 0.0035493743115597959 0.0025190975770797062 0.0042803605014598489 0.0009058812431398496 0.0015261301214595528 0.00043206103726003953 0.002057161621769605 0.0029676093005998037 0.00059245340563008307 0.00060303803797007931 0.0055309290333298089 0.0006063130560400961 0.0024001375326397033 0.0051570050648799921 0.003216390780179791 0.001684353076369542 0.0024379539857596923 0.0033096221900098537 0.001808786421229587 0.00036056760674001951 0.0022999638755596282 0.0042300521607298008 0.0033374014801298532 0.0041061077925497727 0.00090300806356967953 0.0027771389140698217 0.0020966763969595594 0.0022364965134396191 0.0021630341014396426 0.003362866027789783 0.0025631540862897312 0.004191926116449857 0.0023811464991296992 0.0013004726735392649 0.0038548863857898333 0.0021571933421396868 0.0032544925816697214 0.0024967996225797357 0.0039128733433798774 0.0032033546653597454 0.0035349716580698469 0.0022774309789496266 0.0020827816616296431 0.001280163236199224 0.0029566993924298487 0.0030278382394197082 0.0031458574724698 0.00073484413224997748 0.0023053594018396508 0.0032629178035998552 0.0028317322999097433 0.0033847674035998084 0.0021507182045496622 0.0013635142890994728 0.0025417732184397166 0.00046798538031004748 0.0014196850140693168 0.001775496716359453 0.00041823802366003517 0.00072539019745996431 0.0030393665008997704 0.0013936213581092793 0.0024650105378997201 0.0002448311107500009 0.0043274930097698871 0.0045290280761799487 0.0047295668273101684 0.0010168427077595955 0.0027754963934396339 0.0028934546900597821 0.0024947583902996968 0.0017947966152195337 0.002808371739829744 0.00080562592018981933 0.0014184058297892733 0.0018558152750695453 0.0018534208896895739 0.0027403346575797425 0.0031581041628497997 0.0019250669095596151 0.0017553527272695774 0.002912743471719791 0.00051881062016005577 0.0041509390442198381 0.0013269250644194269 0.002515913493569724 0.0032034703723998357 0.0015867479873494805 0.0033147417203898185 0.0032343107633697474 0.0016084849715195411 0.00041333437351003248 0.0015982072633194113 0.0014028860576195891 0.0022158183125796393 0.0029487353931697447 0.0028615529172198303 0.0012540566466694289 0.0028261495420197243 0.00017822631116999813 0.0014531231202394163 0.0025906615127396855 0.0036318312786498171 0.002825987395589701 0.0032132990932597881 0.00093148496318973544 0.0022986618991797251 0.0031201742482197584 0.0088757592945090114 0.0019739854059195429 0.0015964743898695729 0.0030620168350797899 0.0017549143672195243 0.0025403744949397296 0.0013998610671793503 0.00050658872377004334 0.0024219329259397276 0.0016578000335194041 0.0014255931402395057 0.0025947821308797258 0.0015455710208097471 0.0019424337106196282 0.0043638276133198444 0.0024791513534598046 0.002471546965979776 0.0032594199180097532 0.00081732890395981583 0.00047798563291005168 0.0026265644132597047 0.0029957660721997665 0.0033466747844698567 0.0030434931783497998 0.0032186603864098446 0.0025580746428896777 0.0074381240438289309 0.0026177068932397522 0.0010374525766094667 0.0018484145568895259 0.0032105816832397539 0.0025588880273796702 0.0011027058149395553 0.00165028316301944 0.0022621210840096185 0.00063843135713010388 0.002677249425599694 0.0011529594838495104 0.0020757956716295806 0.00063164132836008679 0.0012984328854694727 0.0030668599805997697 0.0013209850432293402 0.0017350537225995246 0.0027999960618096992 0.0045968238896799086 0.0015396509469794125 0.0026842448170297231 0.0020969214423495791 0.0032249556936598013 0.00029111348006000424 0.0011551860431694666 0.0031812251568797824"
    #mn = np.array(map(float, means_str.split(" ")))
    #mn = np.fromstring(means_str)
    variances = np.fromstring(variances_str, sep=' ')
    means_str = "3.8753948237858108e-06 1.2972946111794674e-05 1.2594051521366083e-05 5.0841523278404734e-06 1.8774317409263048e-05 6.2913210996917487e-05 1.269807222669888e-05 3.2193349475262057e-06 6.5226200570272061e-06 1.1473588836338628e-05 2.7180466935587737e-05 1.4762302565458717e-05 3.3722317512532468e-05 6.8216505240041436e-06 7.1028116499628903e-06 6.5493827073439618e-06 3.80367131264172e-06 1.4028847130371071e-05 9.3773632055309283e-06 6.493323349342037e-06 0.0012533506935897218 4.9911335763841195e-06 1.2793399333055094e-05 7.251611930188133e-05 9.5489822043414659e-06 3.8895300628186868e-06 4.173457402556971e-05 0.00011347419063456421 2.5715278760111459e-06 3.2518257183024889e-06 1.1746203655396577e-05 5.564016383146592e-05 3.6296631509353909e-06 4.3289811407316681e-06 1.6025500646546836e-05 8.7246747361516438e-06 4.2410327327645271e-06 4.3732089713098806e-06 5.9073865563619062e-06 2.4944097977347468e-05 2.6986158170267078e-05 0.00019357426874984057 5.1764074423215301e-06 3.5213588425492417e-05 5.6548098935816624e-06 4.9935937088475483e-06 2.3828362907972465e-05 3.521023866293484e-06 4.9870702736337188e-06 2.7658266039366798e-06 1.1424139609302174e-05 4.6380793952958809e-06 8.1857174384998292e-06 1.6642225648910047e-05 1.8268643132929127e-05 1.5473118685259949e-05 9.7616078787441458e-07 4.1097607144367696e-06 5.0459663323074957e-06 8.1752036387080678e-06 6.2517426726346483e-06 0.00021128251533625498 2.4441154311918049e-06 1.0193291769369655e-05 1.6000078417860217e-05 1.3360615760691735e-06 3.9318274983244583e-06 3.7424801978201094e-06 4.5859948912655592e-06 2.1863893895928264e-06 1.4465960374765088e-05 1.226800721873276e-05 1.8464105024954982e-05 1.6648636202068534e-06 3.6936226607579947e-06 6.5624020308052344e-06 8.1339303452353934e-06 9.5047711128428641e-06 1.4246167594118415e-05 6.0140973294197884e-06 1.8256200156735017e-06 1.0903757639504039e-05 5.5080914174679564e-06 5.2142169736994904e-06 2.6292604236996645e-06 4.9623024158512934e-06 1.3171420269231491e-05 5.1064782563443342e-06 2.2201233797532346e-06 3.5523146873797785e-06 4.0447453033151591e-06 3.4393314844283629e-06 1.2283374778942664e-05 1.2292876875817127e-05 1.3500473667799135e-06 1.5982740863426082e-05 5.1149263226338105e-06 3.6545265412690049e-06 4.4324293930103502e-06 1.3464151551507424e-05 8.2607323905827565e-06 5.3487969307959027e-06 7.699747933440781e-06 2.6028092053793074e-05 4.6160336251911396e-06 3.4679078250202434e-06 4.1733322591036512e-06 5.3685295356327671e-06 4.1690461279070458e-05 1.8175584863744415e-05 1.4529974714941822e-06 8.9646541680474962e-06 3.8638936584656166e-06 2.9622882868516527e-06 7.0496709821419259e-06 3.1582263769680431e-06 9.405912339046591e-06 9.0755581225100531e-06 1.6325319116706371e-05 7.4249528783198223e-06 6.4142049677004635e-06 4.5308256388559377e-06 4.3379101302365048e-06 0.00010082767573262403 3.8073220474859233e-06 3.2462395975613701e-06 2.7311928376618711e-06 1.3798802536934602e-05 4.3141812822167945e-06 1.3418830948478911e-06 1.2429912124862659e-05 4.5075176921294976e-06 5.1646366657811792e-05 4.5044907401523191e-06 3.8984503442526084e-06 3.5443432542494581e-06 2.4525978397502771e-06 9.3143290305042167e-07 5.5977615024444758e-06 1.4190797086073543e-05 6.9561233764939789e-06 8.0114861452901582e-06 1.2454920191746878e-06 5.5587154982870272e-06 1.0799672251505274e-05 5.2959102834492533e-06 1.5685688647449261e-06 4.0529428722210623e-06 1.1678512895855624e-05 3.2192802988981066e-06 3.7209970472627806e-05 1.3342539819491425e-05 7.8622903069455567e-06 5.2192321914900928e-06 3.9052134579505441e-06 5.0680769571043553e-06 7.9552828837898563e-06 6.7762118492538826e-06 8.5875102642240075e-06 2.0992545616427373e-05 2.0487505271743291e-05 4.3745997535029968e-06 7.1046977878669946e-06 7.7167495498190023e-07 1.0141932308464567e-05 4.2219873766408028e-06 4.5710190852658248e-06 1.1970402654479661e-06 7.6102614732724262e-06 4.6239298630603015e-06 4.9995946799371758e-05 5.7956634809724437e-06 1.5024720589152287e-06 6.0635032731039673e-06 1.5391627780641011e-05 2.178652052162647e-06 4.2030056647134055e-06 5.0822379579415565e-06 1.9836303495641017e-05 1.8930994307717652e-06 3.0604158961858623e-06 6.5280625603827021e-06 1.0265727137904331e-05 4.0302422231094213e-05 3.7750836192671517e-06 4.0367914908354297e-06 8.0446362665366717e-06 4.7656248380853414e-06 8.6978972436276061e-06 7.9679700766206762e-06 2.9451374286812033e-05 1.3111273739035649e-05 6.6028118897700181e-06 1.4941804584231896e-06 3.9528326512917906e-06 3.847295383196301e-06 3.5756600152130488e-05 6.10565382283349e-06 3.5891435340776665e-05 3.6066217532076844e-06 2.0888559126779404e-06 1.0755002920858641e-06 5.8998610038911923e-06 1.9512692088167549e-06 5.0713400804749472e-06 1.4585512351101608e-05 4.822908984311966e-05 9.8016096252778945e-06 1.9911328814957375e-06 2.4764204976600043e-05 6.4805250037636707e-06 3.7935478658080509e-06 5.1083549212952252e-06 1.1189053457458745e-05 2.5200508287861594e-06 7.8373349366817099e-06 7.9847294470099685e-06 4.3095275213819756e-06 5.0268163315597379e-06 7.4832981742681862e-06 9.2408501776852945e-06 4.784135850487231e-06 5.6532252724841891e-06 5.3930817570733614e-06 1.2687973462442569e-06 1.0372124095824449e-06 4.1435096417718113e-06 3.9981959056867675e-06 4.3520178967986713e-06 1.4659748060826231e-06 5.3366902864809163e-06 3.1416765924193689e-05 2.9749844077512922e-06 3.1381515491784522e-05 4.3260417959669591e-06 4.197030498717592e-06 5.306570430382929e-06 1.8883854746421685e-06 4.1937519548496871e-06 6.9194038197555032e-06 7.1767073252994241e-06 2.4833484439498967e-05 5.8383610252210572e-06 3.6243330253608428e-06 1.18902799300137e-06 3.8963636200265115e-06 1.2883918919165478e-05 1.3605525456692033e-06 1.7407965336936251e-06 0.00016857768522088627 1.4100686994311071e-06 7.1668903489840609e-06 2.7108318215380169e-05 5.4590558436375845e-06 6.2033647867466643e-06 5.6859033955868132e-06 4.3241078188076546e-05 9.0432098151017242e-06 1.0594888618529579e-06 4.0484870451845699e-06 2.9548153849811755e-05 9.410471996079331e-06 2.1009809505791367e-05 1.4939978216919125e-06 2.1026313371938338e-05 2.912760631269843e-06 4.1130865661336849e-06 4.0964425120045752e-06 1.0334704132812778e-05 1.1639088987295558e-05 2.0866544215744135e-05 4.7665503013673322e-06 2.4282885077105844e-06 2.4696946110127049e-05 5.8943453758772547e-06 7.0559765519393299e-06 8.6495232917104309e-06 2.4674527585132413e-05 6.5466440985476235e-06 1.4291488938382783e-05 4.0363838996778781e-06 7.5171096440058871e-06 1.7659216070078882e-06 2.3552682868282767e-05 6.0075484731317116e-06 2.9678689121826856e-05 4.5688281985000224e-06 4.2587818969459276e-06 1.2282850125910679e-05 5.6981633973611215e-06 1.2193919548692016e-05 7.7909581862542261e-06 4.1995999932004883e-06 4.2310001927379966e-06 1.4034983645177226e-06 2.2253775626039904e-06 2.5484625453534006e-06 1.5024773624760737e-06 1.1886813960082901e-06 8.943485028332714e-06 1.802211533446878e-06 8.7804607030574995e-06 1.4714171056899874e-06 3.039182778117474e-05 2.9469599285561173e-05 1.6190782721728404e-05 2.1980748656966054e-06 9.1492500963304843e-06 1.3139192984142854e-05 5.5841754669416901e-06 3.2663084979403296e-06 7.8300182408015622e-06 1.4650747681293603e-06 1.8418132244867557e-06 3.1634249051793445e-06 4.2879811205541378e-06 6.821776038991282e-06 7.2547994800721606e-06 4.5762861000866325e-06 4.0033741553487421e-06 1.3944663969273685e-05 1.5205123797572826e-06 3.7950333845819879e-05 4.6914603422440762e-06 4.3642212832058213e-06 1.6888537402380868e-05 5.3097299301474431e-06 8.5974973592752354e-06 1.0183715675617148e-05 4.4233012671049924e-06 2.8020268713604479e-06 2.4903519176724564e-06 6.0367933560789913e-06 3.6066482258866671e-06 5.5465358638439433e-06 1.6406145480373579e-05 3.6475034103942783e-06 7.545922378704344e-06 1.0510117913470496e-06 6.383917613657175e-06 4.1469930879045612e-06 1.5104979761103841e-05 7.9249357960338965e-06 6.6303162793237734e-06 2.3058946881412919e-06 6.93384276789908e-06 6.2217404008410318e-06 0.00053927751612010478 2.7688222907463807e-06 5.1593082062395665e-06 9.4327080926443393e-06 3.3336519843947502e-06 5.1198323130590842e-06 4.5094438342118166e-06 2.6237274608190453e-06 5.1693775448212788e-06 2.1082108591551617e-06 6.8329929120308474e-06 6.2018823452726071e-06 2.1240415994925091e-05 4.0243827456115514e-06 3.0522891049621393e-05 1.4011920974680818e-05 1.7239640547533074e-05 1.7993086639426091e-05 1.4355334226673438e-06 1.1012319919274514e-06 1.0614538321433708e-05 7.2890435254277739e-06 8.5872764781091643e-06 1.3084966706891505e-05 1.1094006709484758e-05 4.2456925142930984e-06 3.6872244517667462e-05 1.0859154502284048e-05 1.5319903891298572e-06 2.7727900163087534e-06 7.2483213769211959e-06 5.1159362377455894e-06 2.6822480525986132e-06 2.889767166323531e-06 3.55288675821463e-06 1.3380456162305463e-06 5.2278105015869388e-06 2.3031150972921671e-06 4.1508531796520333e-06 1.8528326040776206e-06 6.3815646996712558e-06 6.9338240811962186e-06 2.5793558575700516e-06 4.3737400474318956e-06 6.0837447954729297e-06 4.7903414469400619e-05 2.8013740155544375e-06 4.7622560053896967e-06 3.250652556381526e-06 9.5664014501971676e-06 8.2542503434926804e-07 2.5912870572853299e-06 6.0526418572379129e-06"
    #variances = np.array(map(float, variances_str.split(" ")))
    means = np.fromstring(means_str, sep=' ')

    x_original = np.array(x_original)
    #x_original -= means
    #x_original /= variances
    x_original -= means
    x_original /= variances

    #x_original = np.delete(xxxx_original, features_ordered_by_importance2[-1:])
    #most_important_features1 = np.delete(x_original, features_ordered_by_importance2[5:])

    x = []

    def sqr(x):
        return x * x

    def sqr3(x):
        return x * x * x

    def e_pow(x):
        return math.exp(x)

    def me_pow(x):
        return math.exp(-x)

    def fred(x):
        return round(math.fabs(x) * 1000)

    def extend_x(arr, additions=True, extension=True):
        if extension:
            x.extend(arr)
        if additions:
            x.append(scipy.std(arr))
            x.append(scipy.var(arr))
            x.append(sum(arr) / len(arr))
            x.append(sum(np.abs(arr)) / len(arr))
            x.append(min(arr))
            x.append(max(arr))
            x.append(scipy.mean(arr))
            x.append(scipy.median(arr))


    def count_smaller_ratio(arr, delta):
        return sum(1 if el <= delta else 0 for el in arr) / len(arr)


    if True:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))

        #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
        #zzz = rbf_feature.fit_transform(np.array(x))[0]
        #extend_x(list(zzz))

    if False:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))

        sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1)
        zzz1 = sampler1.fit_transform(np.array(orig))[0]

        #sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1)
        #zzz2 = sampler2.fit_transform(np.array([i + 1.0 for i in x]))[0]

        sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
        zzz3 = sampler3.fit_transform(np.array(x))[0]
        x = []
        extend_x(x_original)
        #extend_x(np.abs(x_original))
        #extend_x(np.sqrt(np.abs(x_original)))

        extend_x(list(zzz1))
        #extend_x(list(zzz2))
        extend_x(list(zzz3))

    if False:
        #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=100)
        #zzz = rbf_feature.fit_transform(np.array(x_original))[0]
        #extend_x(list(zzz))
        pass

    if False:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))


    #for i in x_original:
    #    print i
    #
    # x.append(count_smaller_ratio(x_original, 0.1))
    # x.append(count_smaller_ratio(x_original, 0.2))
    # x.append(count_smaller_ratio(x_original, 0.3))
    # x.append(count_smaller_ratio(x_original, 0.4))
    # x.append(count_smaller_ratio(x_original, 0.5))
    # x.append(count_smaller_ratio(x_original, 0.6))
    # x.append(count_smaller_ratio(x_original, 0.7))
    # x.append(count_smaller_ratio(x_original, 0.8))
    # x.append(count_smaller_ratio(x_original, 0.9))
    # x.append(count_smaller_ratio(x_original, 1.0))
    # x.append(count_smaller_ratio(x_original, -0.1))
    # x.append(count_smaller_ratio(x_original, -0.2))
    # x.append(count_smaller_ratio(x_original, -0.3))
    # x.append(count_smaller_ratio(x_original, -0.4))
    # x.append(count_smaller_ratio(x_original, -0.5))
    # x.append(count_smaller_ratio(x_original, -0.6))
    # x.append(count_smaller_ratio(x_original, -0.7))
    # x.append(count_smaller_ratio(x_original, -0.8))
    # x.append(count_smaller_ratio(x_original, -0.9))
    # x.append(count_smaller_ratio(x_original, -1.0))

    #x.append(count_smaller_ratio(x_original, 0.01))
    #x.append(count_smaller_ratio(x_original, 0.001))for i in x_original: print i
    #x.append(count_smaller_ratio(x_original, 0.0001))
    #x.append(count_smaller_ratio(x_original, 0.00001))
    #x.append(count_smaller_ratio(x_original, 0.000001))
    #x.append(count_smaller_ratio(x_original, 0.00000000001))

    # Do something with most_important_features1

    #extend_x(np.expm1(x_original))
    #extend_x(np.square(x_original))
    #extend_x(map(me_pow, x_original))
    #extend_x(np.sqrt(np.sqrt(np.abs(x_original))))
    #extend_x((np.sqrt(np.sqrt(orig)) - np.sqrt(np.sqrt(means))) / np.sqrt(np.sqrt(variances)))

    #extend_x([(-1 if i < 0 else (0 if i == 0 else 1)) for i in x_original])
    #x.append(sum([i if i > 0 else 0 for i in x_original]) / len(x_original))
    #x.append(sum([i if i < 0 else 0 for i in x_original]) / len(x_original))
    #extend_x(np.tanh(x_original))
    #extend_x(np.cos(x_original))

    #extend_x(map(e_pow, x_original))

    #extend_x(np.sqrt())
    #extend_x(np.sqrt(np.abs(x_original)))
    #extend_x((np.sqrt(orig) - np.sqrt(means)) / np.sqrt(variances))
    #extend_x(map(e_pow, x_original))
    #extend_x(map(sqr, map(e_pow, x_original)))

    #x.append(sum(np.abs(x_original)) / len(x_original))
    #x.append(1.)


    #x.extend(map(math.sin, x_original))
    #x.extend(map(math.sin, map(math.sqrt, x_original)))

    #extend_x(map(math.sqrt, map(e_pow, x_original)))
    #extend_x(map(math.sqrt, map(math.sqrt, x_original)))


    #x.extend(map(fred, x_original))
    #x.extend(map(sqr3, x_original))
    #x.extend(map(me_pow, x_original))
    #x.extend(map(math.log, x_original))

    if make_np:
        return np.array(x)

    return x

Exemplo n.º 49

0

Exibir arquivo

Arquivo: approx_kernel_with_nystrom.py Projeto: juliaprocess/ml_examples

#	K(x, y) = exp(-gamma ||x-y||^2)
#	sigma = sqrt( 1/(2*gamma) )
#	gamma = 1/(2*sigma^2)

num_of_samples = 14000
X = np.random.random((num_of_samples,5))
sampling_percentage = 0.05





start_time = time.time() 
RFF = RBFSampler(gamma=1,n_components= int(num_of_samples*sampling_percentage))
V = RFF.fit_transform(X)
RFF_estimated_kernel = V.dot(V.T)
print("--- RFF Time : %s seconds ---" % (time.time() - start_time))




start_time = time.time() 
N = Nystroem(gamma=1,n_components= int(num_of_samples*sampling_percentage))
V = N.fit_transform(X)
estimated_kernel = V.dot(V.T)
print("--- Nystrom Time : %s seconds ---" % (time.time() - start_time))


start_time = time.time() 
real_kernel = sklearn.metrics.pairwise.rbf_kernel(X, gamma=1)