def NaiveDecomposableGaussianORFF(X, A, gamma=1.,
                                  D=100, eps=1e-5, random_state=0):
    r"""Return the Naive ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    A : {array-like}, shape = [n_targets, n_targets]
        Operator of the Decomposable kernel (positive semi-definite)
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    # Decompose A=BB^T
    u, s, v = svd(A, full_matrices=False, compute_uv=True)
    B = dot(diag(sqrt(s[s > eps])), v[s > eps, :])

    # Sample a RFF from the scalar Gaussian kernel
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)

    # Create the ORFF linear operator
    return matrix(kron(phiX, B))
def NaiveCurlFreeGaussianORFF(X, gamma=1.,
                              D=100, eps=1e-5, random_state=0):
    r"""Return the Naive ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D,
                       random_state=random_state)
    phiX = phi_s.fit_transform(X)
    phiX = (phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) *
            phi_s.random_weights_.reshape((1, -1, phiX.shape[1])))

    return matrix(phiX.reshape((-1, phiX.shape[2])))
def test_rbf_sampler():
    """test that RBFSampler approximates kernel on random data"""
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    # approximate kernel mapping
    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
    X_trans = rbf_transform.fit_transform(X)
    Y_trans = rbf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)
def test_rbf_sampler():
    # test that RBFSampler approximates kernel on random data
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    # approximate kernel mapping
    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
    X_trans = rbf_transform.fit_transform(X)
    Y_trans = rbf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert_less_equal(np.abs(np.mean(error)), 0.01)  # close to unbiased
    np.abs(error, out=error)
    assert_less_equal(np.max(error), 0.1)  # nothing too far off
    assert_less_equal(np.mean(error), 0.05)  # mean is fairly close
Пример #5
0
 def trainSGD(self):
     sgd = SGDClassifier(
         loss=self.loss,
         penalty=self.reg,
         alpha=self.alpha,
         n_iter=self.epochs,
         shuffle=True,
         n_jobs=self.multicpu,
         class_weight="auto",
     )
     # print "Classifier (sklearn SGD): training the model \t(%s)"%self.dspath
     if self.kernel_approx is True:
         rbf_feature = RBFSampler(gamma=1, n_components=100.0, random_state=1)
         Xk = rbf_feature.fit_transform(self.X)
         self.glm = OneVsRestClassifier(sgd).fit(Xk, self.Y)
     else:
         self.glm = OneVsRestClassifier(sgd).fit(self.X, self.Y)
     print "Classifier (sklearn SGD): Done. \t(%s)" % self.dspath
Пример #6
0
def train_models(X_train, y_train, X_test, y_test):
    clf = linear_model.SGDClassifier(penalty='elasticnet')
    print clf
    print "fitting a linear elasticnet (L1+L2 regularized linear classif.) with SGD"
    clf = clf.fit(X_train, y_train)
    print "score on the training set", clf.score(X_train, y_train)
    print "score on 80/20 split", clf.score(X_test, y_test)

    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_train_feats = rbf_feature.fit_transform(X_train)
    X_test_feats = rbf_feature.transform(X_test)
    print "fitting a linear elasticnet with SGD on RBF sampled features"
    clf = clf.fit(X_train_feats, y_train)
    print "score on the training set", clf.score(X_train_feats, y_train)
    print "score on 80/20 split", clf.score(X_test_feats, y_test)

    clf2 = RandomForestClassifier(max_depth=None, min_samples_split=3)
    print clf2
    print "fitting a random forest"
    clf2 = clf2.fit(X_train, y_train)
    print "score on the training set", clf2.score(X_train, y_train)
    print "score on 80/20 split", clf2.score(X_test, y_test)

    clf3 = svm.SVC(kernel='linear')
    print clf3
    print "fitting an SVM with a linear kernel"
    clf3 = clf3.fit(X_train, y_train)
    print "score on the training set", clf3.score(X_train, y_train)
    print "score on 80/20 split", clf3.score(X_test, y_test)

    clf4 = svm.SVC(kernel='rbf')
    print clf4
    print "fitting an SVM with an RBF-kernel"
    clf4 = clf4.fit(X_train, y_train)
    print "score on the training set", clf4.score(X_train, y_train)
    print "score on 80/20 split", clf4.score(X_test, y_test)

    clf5 = linear_model.LogisticRegression(penalty='l1', tol=0.01)
    print clf5
    print "fitting a logistic regression reg. with L1"
    clf5 = clf5.fit(X_train, y_train)
    print "score on the training set", clf5.score(X_train, y_train)
    print "score on 80/20 split", clf5.score(X_test, y_test)
Пример #7
0
    def __init__(self, X, y, dataset, policy_name, scale=True, n_iter=10, passive=True):
        seed = RandomState(1234)
        self.X = np.asarray(X, dtype=np.float64)
        self.y = np.asarray(y)
        self.X = StandardScaler().fit_transform(self.X) if scale else self.X
        self.policy_name = policy_name
        self.dataset = dataset
        self.passive = passive

        # estimate the kernel using the 90th percentile heuristic
        random_idx = seed.choice(X.shape[0], 1000)
        distances = pairwise_distances(self.X[random_idx], metric='l1')
        self.gamma = 1 / np.percentile(distances, 90)
        transformer = RBFSampler(gamma=self.gamma, random_state=seed, n_components=100)
        self.X_transformed = transformer.fit_transform(self.X)

        n_samples = self.X.shape[0]
        train_size = min(10000, int(0.7 * n_samples))
        test_size = min(20000, n_samples - train_size)
        self.kfold = StratifiedShuffleSplit(self.y, n_iter=n_iter, test_size=test_size,
                                            train_size=train_size, random_state=seed)
def EfficientDecomposableGaussianORFF(X, A, gamma=1.,
                                      D=100, eps=1e-5, random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    A : {array-like}, shape = [n_targets, n_targets]
        Operator of the Decomposable kernel (positive semi-definite)
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : Linear Operator, callable
    """
    # Decompose A=BB^T
    u, s, v = svd(A, full_matrices=False, compute_uv=True)
    B = dot(diag(sqrt(s[s > eps])), v[s > eps, :])

    # Sample a RFF from the scalar Gaussian kernel
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)

    # Create the ORFF linear operator
    cshape = (D, B.shape[0])
    rshape = (X.shape[0], B.shape[1])
    return LinearOperator((phiX.shape[0] * B.shape[1], D * B.shape[0]),
                          matvec=lambda b: dot(phiX, dot(b.reshape(cshape),
                                               B)),
                          rmatvec=lambda r: dot(phiX.T, dot(r.reshape(rshape),
                                                B.T)),
                          dtype=float)
def EfficientCurlFreeGaussianORFF(X, gamma=1.,
                                  D=100, eps=1e-5, random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : array
    """
    phi_s = RBFSampler(gamma=gamma, n_components=D,
                       random_state=random_state)
    phiX = phi_s.fit_transform(X)

    return LinearOperator((phiX.shape[0] * X.shape[1], phiX.shape[1]),
                          matvec=lambda b:
                          dot(phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) *
                          phi_s.random_weights_.reshape((1, -1,
                                                         phiX.shape[1])), b),
                          rmatvec=lambda r:
                          dot((phiX.reshape((phiX.shape[0], 1,
                                             phiX.shape[1])) *
                               phi_s.random_weights_.reshape((1, -1,
                                                              phiX.shape
                                                              [1]))).reshape
                          (phiX.shape[0] * X.shape[1], phiX.shape[1]).T, r),
                          dtype=float)
Пример #10
0
    def __init__(self, name: str, input_size: int, filter_size: int,
                 gamma: float, m: int, R: float, r: int, lr: float):

        self.name = name
        self.input_size = input_size    
        self.filter_size = filter_size
        self.patch_size = filter_size ** 2
        self.output_size = self.input_size - self.filter_size + 1
        self.n_patchs = self.output_size ** 2        
        self.m = m
        self.R = R
        self.lr = lr
        
        self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1)
        self.svd = TruncatedSVD(n_components=r)
Пример #11
0
    def get_orff_map(self, X, D=100, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        self.r = 1
        if not hasattr(self, 'Xb_'):
            self.phi_ = RBFSampler(gamma=self.gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X)
            self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0],
                                          1, self.Xb_.shape[1])) *
                        self.phi_.random_weights_.reshape((1, -1,
                                                           self.Xb_.shape[1])))
            self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2]))

        D = self.phi_.n_components
        if X is self.Xb_:
            return LinearOperator(self.Xb_.shape,
                                  matvec=lambda b: dot(self.Xb_ * b),
                                  rmatvec=lambda r: dot(self.Xb_.T * r))
        else:
            Xb = self.phi_.transform(X)
            # TODO:
            # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1]))
            # wn = np.linalg.norm(w)
            # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) *
            #       wn * np.eye()w np.dot(w.T, w) / wn)
            Xb = Xb.reshape((-1, Xb.shape[2]))
            return LinearOperator(Xb.shape,
                                  matvec=lambda b: dot(Xb, b),
                                  rmatvec=lambda r: dot(Xb.T, r))
                                       random_state=42,
                                       ngram_range=(2, 4))

# Fit the rbf_sampler with the similarity matrix.
column_transformer = make_column_transformer(
    (similarity_encoder, ['NONPROPRIETARYNAME']),
    (OneHotEncoder(handle_unknown='ignore'), ['DOSAGEFORMNAME', 'ROUTENAME']),
    sparse_threshold=1)

transformed_categories = column_transformer.fit_transform(X_encoder)

# gamma is a parameter of the rbf function, that sets how fast the similarity
# between two points should decrease as the distance between them rises. It
# is data-specific, and needs to be chosen carefully, for example using
# cross-validation.
rbf_sampler = RBFSampler(gamma=0.5, n_components=n_out_rbf, random_state=42)
rbf_sampler.fit(transformed_categories)


def encode(X, y_int, one_hot_encoder, column_transformer, rbf_sampler):
    X_sim_encoded = column_transformer.transform(X)

    X_highdim = rbf_sampler.transform(X_sim_encoded.toarray())

    y_onehot = one_hot_encoder.transform(y_int.reshape(-1, 1))

    return X_highdim, y_onehot


# The inputs and labels of the val and test sets have to be pre-processed the
# same way the training set was processed:
def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #method = args[0]
    #RBF_components = args[1]
    #MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(process_index+100)
    #process_index = 0
    #np.random.seed(process_index + 100)
    #vel_var = args[5]
    #num_targets = args[6]

    method = 0
    RBF_components = 20
    MLP_neurons = 50
    vel_var = .001
    num_targets = min(6,max(2,np.random.poisson(3)))
    num_targets = np.random.randint(2,10)
    #num_targets = 4


    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2])
        #params[0]["weight2"] = np.array([[  3.97573312,   0.4639474 ,   2.27280486,  12.9085868 ,
         #   3.45722461,   6.36735166],
         #[-11.87940874,   2.59549414,  -5.68556954,   2.87746786,
          #  7.08059984,   5.5631133 ]])

        params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187,
                           -4.30451483, 10.09592853],
                         [13.33104057, 13.60537864, 3.46939294, 0.8446329,
                         -14.79733566, -4.78599648]])

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    weight_saver2_1 = []
    weight_saver2_2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = init_max_target
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(20,init_max_target)
        
        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        #TEMP
        #x = [2000,-2000]
        #y = [2000,2000]
        #xdot = [1,1]
        #ydot = [-1,-1]

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_MLP_state = []
        episode_actions = []
        avg_uncertainty= []
        max_uncertainty = []

        while episode_condition:
            temp_m = []
            input_state_temp = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []
            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


                #Refactor states based on the usage
                if method==0 or method==2:
                    input_state = current_state
                    input_state_temp.append(input_state) #store input-sates
                elif method==1:
                    #Generate states for the RBF input
                    input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                    input_state = list(input_state[0])


                target_actions.append(s.generate_action(params,input_state,.01))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                #if len(uncertainty[i])<window_size+window_lag:
                 #   temp_reward.append(0)
                #else:
                 #   current_avg = np.mean(uncertainty[i][-window_size:])
                  #  prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag])
                   # if current_avg<prev_avg or uncertainty[i][-1]<.1:
                    #if current_avg < prev_avg:
                    #    temp_reward.append(1)
                    #else:
                     #   temp_reward.append(0)

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
             #   reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions
            #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer

            backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6
            backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6

            episode_state_out_layer.append(normalized_state)
            episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets
            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False


        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000)
            internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)
                backpropagated_terms = episode_state[e]

                #calculate gradient
                if method==0:
                    deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector
                    internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6
                    internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6
                    internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2])

                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return
                    adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e]
                    params[0]['weight2'] += rate * adjustment_term_out_layer
                    params[0]['weight'] += internal_rate* adjustment_term_internal_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :])
            flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :])
            temp1 = []
            [temp1.append(str(x)) for x in flatted_weights1]
            temp2 = []
            [temp2.append(str(x)) for x in flatted_weights2]

            weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n")
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            weight_saver1.append(params[0]['weight'][0][0])
            weight_saver2.append(params[0]['weight'][1][0])

            weight_saver2_1.append(params[0]['weight2'][0][0])
            weight_saver2_2.append(params[0]['weight2'][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)
Пример #14
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=11)

# Average CV score on the training set was:0.6341268075639599
exported_pipeline = make_pipeline(
    RBFSampler(gamma=0.9),
    XGBClassifier(learning_rate=0.01,
                  max_depth=4,
                  min_child_weight=7,
                  n_estimators=100,
                  nthread=1,
                  subsample=0.55))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #15
0
def approximate_smooth_weight_kernel_ridge_regression(
        data, labels, ind_mat, opts):  #, ITERS=100, THRESH=1e-6):
    """
        Define a multitask regression problem in which tasks are locally smooth (e.g. bin size prediction), and introduce a penalty in which weights of regressors of related tasks are encouraged to be similar. Instead of optimizing the dual and using explicit kernel matrices, random kitchen sinks style kernel approximations are used.
    """

    from sklearn.metrics import mean_squared_error, r2_score
    from sklearn.metrics.pairwise import rbf_kernel
    from sklearn.kernel_approximation import RBFSampler
    D = opts['APPROXIMATION_DIM']
    sm = RBFSampler(gamma=1. / opts['kpar']**2,
                    n_components=D,
                    random_state=666)

    def retrieve_neigh_norm(W, ind_w, ss):
        L = W.shape[1]
        hs = int(np.floor(ss / 2))
        if ind_w < hs:
            W_subs = W[:, 0:(ind_w + hs + 1)]
        elif ind_w > (L - hs - 1):
            W_subs = W[:, (ind_w - hs):L]
        else:
            W_subs = W[:, (ind_w - hs):(ind_w + hs + 1)]
        return 1 / (ss) * np.sum(W_subs, axis=1)

    W = np.random.rand(D, opts['TASKS'])

    # init stuff
    k = 0
    epsi = np.Inf
    loss = []
    while (epsi > opts['THRESH']) & (k < opts['ITERS']):
        # schedule = np.random.permutation(range(T))
        schedule = range(opts['TASKS'])
        W_old = W.copy()
        t_loss = 0
        t_norm_w = 0

        for ind_w in schedule:
            X_Y = data.assign(y=labels.iloc[:, ind_w]).copy()
            trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1])
            trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1]
            N = np.sum(ind_mat[:, ind_w] == 1)

            W_mt_n = retrieve_neigh_norm(W, ind_w, opts['WIN_SIZE'])

            A = np.dot(
                trn_X.T,
                trn_X) + opts['par1'] * np.eye(D) + opts['par2'] * np.eye(D)
            A = np.linalg.inv(A)
            B = np.dot(trn_X.T, trn_Y) + opts['par2'] * W_mt_n
            W[:, ind_w] = np.dot(A, B)

            t_loss += 1 / N * np.sum(
                (trn_Y.values - np.dot(trn_X, W[:, ind_w]))**
                2)  # + np.linalg.norm(W[:,ind_w])
            t_norm_w += np.linalg.norm(W[:, ind_w])

        emp_err = 1 / opts['TASKS'] * t_loss
        epsi = np.abs(np.sum(np.sum(W - W_old)))
        loss.append(emp_err)
        # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/opts["TASKS"] * t_norm_w}')

        k += 1

    if k == opts['ITERS']:
        print(
            f'converged in iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/opts["TASKS"] * t_norm_w}'
        )

    y_hat = np.zeros((opts['DATA_SIZE'][0], opts['TASKS']))
    pred_stats = {}
    pred_stats['tr_RMSE'] = []  #np.inf*np.ones((T))
    pred_stats['tr_R2'] = []  #np.inf*np.ones((T))
    pred_stats['va_RMSE'] = []  #np.inf*np.ones((T))
    pred_stats['va_R2'] = []  #np.inf*np.ones((T))

    # if opts['VAL_MODE']:
    for ind_w in range(opts['TASKS']):
        X_Y = data.assign(y=labels.iloc[:, ind_w]).copy()
        trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1])
        trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1]

        pred_tr_Y = np.dot(trn_X, W[:, ind_w])

        pred_stats['tr_RMSE'].append(
            np.sqrt(mean_squared_error(trn_Y, pred_tr_Y)))
        pred_stats['tr_R2'].append(r2_score(trn_Y, pred_tr_Y))

        y_hat[ind_mat[:, ind_w] == 1, ind_w] = pred_tr_Y

        if np.any(ind_mat[:, ind_w] == 2):
            val_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 2, :-1])
            val_Y = X_Y.iloc[ind_mat[:, ind_w] == 2, -1]

            pred_va_Y = np.dot(val_X, W[:, ind_w])

            pred_stats['va_RMSE'].append(
                np.sqrt(mean_squared_error(val_Y, pred_va_Y)))
            pred_stats['va_R2'].append(r2_score(val_Y, pred_va_Y))

            y_hat[ind_mat[:, ind_w] == 2, ind_w] = pred_va_Y

    return W, loss, ind_mat, pred_stats, y_hat
Пример #16
0
def rbk_RFF(X, σ, ɲ=1000):
    γ = 1.0 / (2 * σ * σ)
    rff = RBFSampler(gamma=γ, n_components=ɲ, random_state=None)
    Φₓ = rff.fit_transform(X)
    Ƙ = Φₓ.dot(Φₓ.T)
    return Ƙ
Пример #17
0
def main():
    type_of_problem = ""
    split = 0.3
    su_train = []
    su_test = []
    p = optparse.OptionParser()
    # take path of training data set
    p.add_option("--path_train", "-p", default="/afs/cern.ch/user/s/sganju/private/2014_target.csv")
    # what type of problem is it? regression/classification/clustering/dimensionality reduction
    p.add_option("--type_of_problem", "-t", default="c")
    # include cross validation true/false
    p.add_option("--cross_validation", "-v", default="True")
    # take the numerical values
    # p.add_option('--numerical_values', '-n')
    # specify target column
    p.add_option("--target", "-y")
    options, arguments = p.parse_args()

    num_values = "id cpu creator dataset dbs dtype era naccess nblk	nevt nfiles nlumis nrel nsites nusers parent primds proc_evts procds rel1_0 rel1_1 rel1_2 rel1_3 rel1_4	rel1_5 rel1_6 rel1_7 rel2_0 rel2_1 rel2_10 rel2_11 rel2_2 rel2_3 rel2_4 rel2_5 rel2_6 rel2_7 rel2_8 rel2_9 rel3_0 rel3_1 rel3_10 rel3_11 rel3_12 rel3_13 rel3_14 rel3_15 rel3_16 rel3_17 rel3_18 rel3_19 rel3_2 rel3_20 rel3_21 rel3_22 rel3_23 rel3_24 rel3_25 rel3_26 rel3_3 rel3_4 rel3_5 rel3_6 rel3_7 rel3_8 rel3_9 relt_0 relt_1 relt_2 rnaccess rnusers rtotcpu s_0 s_1 s_2 s_3 s_4size tier totcpu wct"
    num_values = num_values.split()

    # load from files
    train = pd.read_csv(options.path_train)

    # load target values
    target = train["target"]

    # TRAINING DATA SET
    data = train
    print "Performing imputation."
    imp = data.dropna().mean()
    test = data.fillna(imp)
    data = data.fillna(imp)

    print "Splitting the training data with %f." % split
    features_train, features_test, target_train, target_test = train_test_split(
        data, target, test_size=split, random_state=0
    )
    print "Generating Model"
    # diffrentiate on the basis of type of problem
    # RANDOM FOREST CLASSIFIER
    rf = RandomForestClassifier(n_estimators=100)
    rf = rf.fit(features_train, target_train)
    cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test)

    # Ada boost
    clf_ada = AdaBoostClassifier(n_estimators=100)
    params = {
        "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5],
        "max_features": [0.25, 0.50, 0.75, 1],
        "max_depth": [3, 4, 5],
    }
    gs = GridSearchCV(clf_ada, params, cv=5, scoring="accuracy", n_jobs=4)
    clf_ada.fit(features_train, target_train)
    cal_score("ADABOOST", clf_ada, features_test, target_test)

    # RANDOM FOREST CLASSIFIER
    rf = RandomForestClassifier(n_estimators=100)
    rf = rf.fit(features_train, target_train)
    cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test)
    # predictions = rf.predict_proba(test)
    # Gradient Boosting
    gb = GradientBoostingClassifier(n_estimators=100, subsample=0.8)
    params = {
        "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5],
        "max_features": [0.25, 0.50, 0.75, 1],
        "max_depth": [3, 4, 5],
    }
    gs = GridSearchCV(gb, params, cv=5, scoring="accuracy", n_jobs=4)
    gs.fit(features_train, target_train)
    cal_score("GRADIENT BOOSTING", gs, features_test, target_test)
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(data)

    # SGD CLASSIFIER
    clf = SGDClassifier(
        alpha=0.0001,
        class_weight=None,
        epsilon=0.1,
        eta0=0.0,
        fit_intercept=True,
        l1_ratio=0.15,
        learning_rate="optimal",
        loss="hinge",
        n_iter=5,
        n_jobs=1,
        penalty="l2",
        power_t=0.5,
        random_state=None,
        shuffle=True,
        verbose=0,
        warm_start=False,
    )
    clf.fit(features_train, target_train)
    cal_score("SGD Regression", clf, features_test, target_test)

    # KN Classifier
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(features_train, target_train)
    cal_score("KN CLASSIFICATION", neigh, features_test, target_test)
    # predictions = neigh.predict_proba(test)

    # Decision Tree classifier
    clf_tree = tree.DecisionTreeClassifier(max_depth=10)
    clf_tree.fit(features_train, target_train)
    cal_score("DECISION TREE CLASSIFIER", clf_tree, features_test, target_test)
from sklearn.kernel_approximation import RBFSampler

matplotlib.style.use('ggplot')

env = gym.envs.make("MountainCar-v0")

# Feature Preprocessing: Normalize to zero mean and unit variance# Featu
# We use a few samples from the observation space to do this
observation_examples = np.array([env.observation_space.sample() for x in range(10000)])
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(observation_examples)

# Used to convert a state to a featurized representation.
# We use RBF kernels with different variances to cover different parts of the space
featurizer = sklearn.pipeline.FeatureUnion([
        ("rbf1", RBFSampler(gamma=5.0, n_components=100)),
        ("rbf2", RBFSampler(gamma=2.0, n_components=100)),
        ("rbf3", RBFSampler(gamma=1.0, n_components=100)),
        ("rbf4", RBFSampler(gamma=0.5, n_components=100))
        ])
featurizer.fit(scaler.transform(observation_examples))

class Estimator():
    """
    Value Function approximator.
    """

    def __init__(self):
        # We create a separate model for each action in the environment's
        # action space. Alternatively we could somehow encode the action
        # into the features, but this way it's easier to code up.
Пример #19
0
     elapTimeNys[i] = timeit.default_timer() - startTime
     XtrainT = kpls.transform(ktrain)
     XtestT = kpls.transform(ktest)
     
     if n==573:
         kplsScoresNys[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1073:
         kplsScoresNys[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1573:
         kplsScoresNys[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
 
 # RBF sampler method
 elapTimeRBFS = np.zeros(np.shape(nComponents))
 kplsScoresRBFS = np.zeros((2,3))
 for i,n in enumerate(nComponents):
     rbfs = RBFSampler(n_components=n,gamma=gamma)
     rbfs.fit(Xtrain)
     ktrain = rbfs.transform(Xtrain)
     ktest = rbfs.transform(Xtest)
     startTime = timeit.default_timer()
     kpls.fit(ktrain,Ytrain)
     elapTimeRBFS[i] = timeit.default_timer() - startTime
     XtrainT = kpls.transform(ktrain)
     XtestT = kpls.transform(ktest)
     
     if n==573:
         kplsScoresRBFS[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1073:
         kplsScoresRBFS[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1573:
         kplsScoresRBFS[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
Пример #20
0
import sys
import os
import numpy as np
import itertools
from sklearn.svm import LinearSVC
from sklearn.kernel_approximation import RBFSampler
from sklearn.kernel_approximation import AdditiveChi2Sampler

pycharm_mode = True
N_FEATURES = 400  # Dimension of the original data.
BATCH_SIZE = 30000

chi = AdditiveChi2Sampler()
chi.fit(np.zeros(N_FEATURES).ravel())
rbf = RBFSampler(gamma=1, random_state=1337, n_components=5500)
rbf.fit(np.zeros(1200).ravel())

def transform(x_original):
    return rbf.transform(chi.transform(x_original)).ravel()

def lines(source):
    for line in source:
        line = line.strip()
        (label, x_string) = line.split(" ", 1)
        label = int(label)
        x_original = np.fromstring(x_string, sep=' ')
        yield label, transform(x_original)

def main():
    if pycharm_mode:
Пример #21
0
class RBFDivFreeKernel(object):
    r"""
    Divergence-free Operator-Valued Kernel of the form:

    .. math::
        X \mapsto K_X(Y) = exp(-\gamma||X-Y||^2)A_{X,Y},

    where,

    .. math::
        A_{X,Y} = 2\gamma(X-Y)(X-T)^T+((d-1)-2\gamma||X-Y||^2 I).

    Attributes
    ----------
    gamma : {float}
        RBF kernel parameter.

    References
    ----------

    See also
    --------

    RBFDivFreeKernelMap
        Divergence-free Kernel map

    Examples
    --------
    >>> import operalib as ovk
    >>> import numpy as np
    >>> X = np.random.randn(100, 2)
    >>> K = ovk.RBFDivFreeKernel(1.)
    >>> # The kernel matrix as a linear operator
    >>> K(X, X)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    <200x200 _CustomLinearOperator with dtype=float64>
    """

    def __init__(self, gamma):
        """Initialize the Decomposable Operator-Valued Kernel.

        Parameters
        ----------
        gamma : {float}, shape = [n_targets, n_targets]
            RBF kernel parameter.
        """
        self.gamma = gamma

    def get_kernel_map(self, X):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: Y \mapsto K(X, Y)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable

        .. math::
            K_x: Y \mapsto K(X, Y).
        """
        from .kernel_maps import RBFDivFreeKernelMap
        return RBFDivFreeKernelMap(X, self.gamma)

    def get_orff_map(self, X, D=100, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        self.r = 1
        if not hasattr(self, 'Xb_'):
            self.phi_ = RBFSampler(gamma=self.gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X)
            self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0],
                                          1, self.Xb_.shape[1])) *
                        self.phi_.random_weights_.reshape((1, -1,
                                                           self.Xb_.shape[1])))
            self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2]))

        D = self.phi_.n_components
        if X is self.Xb_:
            return LinearOperator(self.Xb_.shape,
                                  matvec=lambda b: dot(self.Xb_ * b),
                                  rmatvec=lambda r: dot(self.Xb_.T * r))
        else:
            Xb = self.phi_.transform(X)
            # TODO:
            # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1]))
            # wn = np.linalg.norm(w)
            # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) *
            #       wn * np.eye()w np.dot(w.T, w) / wn)
            Xb = Xb.reshape((-1, Xb.shape[2]))
            return LinearOperator(Xb.shape,
                                  matvec=lambda b: dot(Xb, b),
                                  rmatvec=lambda r: dot(Xb.T, r))

    def __call__(self, X, Y=None):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise.}
               \end{cases}

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples1, n_features]
            Samples.

        Y : {array-like, sparse matrix}, shape = [n_samples2, n_features],
                                          default = None
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable or LinearOperator

        .. math::
            K_x: \begin{cases}
            Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
            K(X, Y) \enskip\text{otherwise}
            \end{cases}
        """
        Kmap = self.get_kernel_map(X)
        if Y is None:
            return Kmap
        else:
            return Kmap(Y)
Пример #22
0
    def get_orff_map(self, X, D=100, eps=1e-5, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        u, s, v = svd(self.A, full_matrices=False, compute_uv=True)
        self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :])
        self.r = self.B_.shape[0]

        if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'):
            if self.scalar_kernel_params is None:
                gamma = 1.
            else:
                gamma = self.scalar_kernel_params['gamma']
            self.phi_ = RBFSampler(gamma=gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self,
                                                                   'Xb_'):
            if self.scalar_kernel_params is None:
                skew = 1.
            else:
                skew = self.scalar_kernel_params['skew']
            self.phi_ = SkewedChi2Sampler(skewedness=skew,
                                          n_components=D,
                                          random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif not hasattr(self, 'Xb_'):
            raise NotImplementedError('ORFF map for kernel is not '
                                      'implemented yet')

        D = self.phi_.n_components
        if X is self.Xb_:
            cshape = (D, self.r)
            rshape = (self.Xb_.shape[0], self.p)
            oshape = (self.Xb_.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(self.Xb_,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))
        else:
            Xb = self.phi_.transform(X)
            cshape = (D, self.r)
            rshape = (X.shape[0], self.p)
            oshape = (Xb.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(Xb,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))
Пример #23
0
class DecomposableKernel(object):
    r"""
    Decomposable Operator-Valued Kernel of the form:

    .. math::
        X, Y \mapsto K(X, Y) = k_s(X, Y) A

    where A is a symmetric positive semidefinite operator acting on the
    outputs.

    Attributes
    ----------
    A : {array, LinearOperator}, shape = [n_targets, n_targets]
        Linear operator acting on the outputs

    scalar_kernel : {callable}
        Callable which associate to the training points X the Gram matrix.

    scalar_kernel_params : {mapping of string to any}
        Additional parameters (keyword arguments) for kernel function passed as
        callable object.

    References
    ----------

    See also
    --------

    DecomposableKernelMap
        Decomposable Kernel map

    Examples
    --------
    >>> import operalib as ovk
    >>> import numpy as np
    >>> X = np.random.randn(100, 10)
    >>> K = ovk.DecomposableKernel(np.eye(2))
    >>> # The kernel matrix as a linear operator
    >>> K(X, X)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    <200x200 _CustomLinearOperator with dtype=float64>
    """

    def __init__(self, A, scalar_kernel=rbf_kernel, scalar_kernel_params=None):
        """Initialize the Decomposable Operator-Valued Kernel.

        Parameters
        ----------

        A : {array, LinearOperator}, shape = [n_targets, n_targets]
            Linear operator acting on the outputs

        scalar_kernel : {callable}
            Callable which associate to the training points X the Gram matrix.

        scalar_kernel_params : {mapping of string to any}, optional
            Additional parameters (keyword arguments) for kernel function
            passed as callable object.
        """
        self.A = A
        self.scalar_kernel = scalar_kernel
        self.scalar_kernel_params = scalar_kernel_params
        self.p = A.shape[0]

    def get_kernel_map(self, X):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: Y \mapsto K(X, Y)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable

        .. math::
            K_x: Y \mapsto K(X, Y).
        """
        from .kernel_maps import DecomposableKernelMap
        return DecomposableKernelMap(X, self.A,
                                     self.scalar_kernel,
                                     self.scalar_kernel_params)

    def get_orff_map(self, X, D=100, eps=1e-5, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        u, s, v = svd(self.A, full_matrices=False, compute_uv=True)
        self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :])
        self.r = self.B_.shape[0]

        if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'):
            if self.scalar_kernel_params is None:
                gamma = 1.
            else:
                gamma = self.scalar_kernel_params['gamma']
            self.phi_ = RBFSampler(gamma=gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self,
                                                                   'Xb_'):
            if self.scalar_kernel_params is None:
                skew = 1.
            else:
                skew = self.scalar_kernel_params['skew']
            self.phi_ = SkewedChi2Sampler(skewedness=skew,
                                          n_components=D,
                                          random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif not hasattr(self, 'Xb_'):
            raise NotImplementedError('ORFF map for kernel is not '
                                      'implemented yet')

        D = self.phi_.n_components
        if X is self.Xb_:
            cshape = (D, self.r)
            rshape = (self.Xb_.shape[0], self.p)
            oshape = (self.Xb_.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(self.Xb_,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))
        else:
            Xb = self.phi_.transform(X)
            cshape = (D, self.r)
            rshape = (X.shape[0], self.p)
            oshape = (Xb.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(Xb,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))

    def __call__(self, X, Y=None):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise.}
               \end{cases}

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples1, n_features]
            Samples.

        Y : {array-like, sparse matrix}, shape = [n_samples2, n_features],
                                          default = None
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable or LinearOperator

            .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise}
               \end{cases}
        """
        Kmap = self.get_kernel_map(X)
        if Y is None:
            return Kmap
        else:
            return Kmap(Y)
Пример #24
0
 #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
 
 X_test = X[0:nr_test]
 Y_test = Y[0:nr_test]
 X_train = X[nr_test+1:len(X)]
 Y_train = Y[nr_test+1:len(X)]
  
 X_train = robust_scaler.fit_transform(X_train)
 
 # save standard scaler
 joblib.dump(robust_scaler, base_path + 'data/rs-' + algorithm + '-' + str(ps[psi]) + '.pkl')    
 
 X_test = robust_scaler.transform(X_test)
 
 if algorithm == 'kernel-approx':
     rbf_feature = RBFSampler(gamma=1, random_state=1)
     X_train = rbf_feature.fit_transform(X_train)
     X_test = rbf_feature.fit_transform(X_test)
 elif algorithm == 'mlp':
 	n_output = len(set(Y))
 	#n_output = 2460
 	n_input = len(X_train[0]) + 1
 	n_neurons = int(round(sqrt(n_input*n_output)))
 	print "N input" , n_input
 	print "N output" , n_output
 	print "N neurons", n_neurons
 	classifier = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(n_input, n_neurons, n_output), random_state=1)
     
 if classifier is not None or exists_be_file is True:
     
     if cv is True:
    # Create the Gym environment
    env = gym.make('TurtleBot3ObstacleAvoidance-v1')

    # Loads parameters from the ROS param server. Parameters are stored in a
    # .yaml file inside the /config directory. They are loaded at runtime by
    # the launch file:
    lr = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/learning_rate")
    epsilon = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/epsilon")
    gamma = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/gamma")
    epsilon_discount = rospy.get_param(
        "/turtlebot3_obstacle_avoidance_v1/epsilon_discount")
    min_epsilon = rospy.get_param(
        "/turtlebot3_obstacle_avoidance_v1/min_epsilon")
    nepisodes = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/nepisodes")

    rbf_samplers = [("rbf1", RBFSampler(gamma=0.05, n_components=1000)),
                    ("rbf2", RBFSampler(gamma=0.1, n_components=1000)),
                    ("rbf3", RBFSampler(gamma=0.5, n_components=1000)),
                    ("rbf4", RBFSampler(gamma=1.0, n_components=1000)),
                    ("rbf5", RBFSampler(gamma=2.0, n_components=1000)),
                    ("rbf6", RBFSampler(gamma=5.0, n_components=1000))]
    observation_examples = np.array(
        [env.observation_space.sample() for x in range(20000)])

    # Initialises Q-Learning
    qlearn = qlearnRBF.QLearnRBF(n_actions=env.action_space.n,
                                 epsilon=epsilon,
                                 lr=lr,
                                 gamma=gamma,
                                 rbf_samplers=rbf_samplers,
                                 observation_examples=observation_examples)
    elif trans_name == 'pca':
        from sklearn.decomposition import PCA

        qt = PCA()
    elif trans_name == 'nystronem':
        from sklearn.kernel_approximation import Nystroem

        qt = Nystroem()
    elif trans_name == 'kernel_pca':
        from solnml.components.feature_engineering.transformations.utils import KernelPCA

        qt = KernelPCA()
    elif trans_name == 'kitchen_sink':
        from sklearn.kernel_approximation import RBFSampler

        qt = RBFSampler()
    elif trans_name == 'lda':
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

        qt = LinearDiscriminantAnalysis()
    else:
        raise ValueError('Unsupported transformation name: %s!' % trans_name)

    qt.fit(X, y)
    print(X.shape)

    # Case1: transform and split.
    x1 = qt.transform(X)
    _, x1_, _, _ = train_valid_split_X(x1, y)

    # Case2: split and transform.
    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='sparse'
        )

    train_data_n = len(os.listdir(train_data_dir + '/1')) + len(os.listdir(train_data_dir + '/0')) + len(os.listdir(train_data_dir + '/2'))

    chi_feature = AdditiveChi2Sampler()

    clf = SGDClassifier(class_weight={0:1.0, 1:1.2, 2:1.0})

    classes_ = np.array([0, 1, 2])

    rbf_feature = RBFSampler(gamma=4.0, n_components=3000)
    #rbf_feature = Nystroem(n_components=100, gamma=1.0, random_state=1)

    """
    feature_train_stack = np.zeros((100, 2048)) - 1
    label_train_stack = np.zeros((100, 1)) - 1
    for i in range(train_data_n // batch_size):
    #for i in range(2):
        print("======= data reading! =======")
        print("batch No." + str(i) )
        feature_train, label_train = get_feature_and_label(model, train_generator, i)
        feature_train  = normalize(feature_train)
        #print(feature_train.shape)
        
        #feature_train = rbf_feature.fit_transform(feature_train)
        #feature_train = chi_feature.fit_transform(feature_train, label_train)
Пример #28
0
    return X_train, X_test, y_train, y_test


ESTIMATORS = {
    "dummy":
    DummyClassifier(),
    "CART":
    DecisionTreeClassifier(),
    "ExtraTrees":
    ExtraTreesClassifier(),
    "RandomForest":
    RandomForestClassifier(),
    "Nystroem-SVM":
    make_pipeline(Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)),
    "SampledRBF-SVM":
    make_pipeline(RBFSampler(gamma=0.015, n_components=1000),
                  LinearSVC(C=100)),
    "LogisticRegression-SAG":
    LogisticRegression(solver="sag", tol=1e-1, C=1e4),
    "LogisticRegression-SAGA":
    LogisticRegression(solver="saga", tol=1e-1, C=1e4),
    "MultilayerPerceptron":
    MLPClassifier(
        hidden_layer_sizes=(100, 100),
        max_iter=400,
        alpha=1e-4,
        solver="sgd",
        learning_rate_init=0.2,
        momentum=0.9,
        verbose=1,
        tol=1e-4,
Пример #29
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=2)

# Average CV score on the training set was:0.6341119762699294
exported_pipeline = make_pipeline(
    RBFSampler(gamma=0.1),
    XGBClassifier(learning_rate=0.001,
                  max_depth=2,
                  min_child_weight=13,
                  n_estimators=100,
                  nthread=1,
                  subsample=0.9500000000000001))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #30
0
 def initialize(self):
   """Initializes RBFSampler for the detector"""
   self.kernel = RBFSampler(gamma=0.5,
                            n_components=20000,
                            random_state=290)
 def rbf_kernel(self, matrix, n_components):
     rbf = RBFSampler(n_components = n_components)
     print rbf
     matrix_features = rbf.fit_transform(matrix)
     return matrix_features 
Пример #32
0
		'learning_rate': [.05, .1,.2,.3,2,3, 5],
		'max_features': [.25,.50,.75,1],
		'max_depth': [3,4,5],
		}
gs = GridSearchCV(gb, params, cv=5, scoring ='accuracy', n_jobs=4)
gs.fit(features_train, target_train)
#predictions = gs.predict_proba(test)
#print predictions

cal_score("GRADIENT BOOSTING",gs, features_test, target_test)
		#sorted(gs.grid_scores_, key = lambda x: x.mean_validation_score)
		#print gs.best_score_
		#print gs.best_params_
		#predictions = gs.predict_proba(test)
		#KERNEL APPROXIMATIONS - RBF 		
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(data)
		
#SGD CLASSIFIER		
clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
      		fit_intercept=True, l1_ratio=0.15, learning_rate='optimal',
       loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5,
       random_state=None, shuffle=True, verbose=0,
       warm_start=False)
clf.fit(features_train, target_train)
cal_score("SGD Regression",clf, features_test, target_test)

#KN Classifier
neigh = KNeighborsClassifier(n_neighbors = 1)
neigh.fit(features_train, target_train)
cal_score("KN CLASSIFICATION",neigh, features_test, target_test)
Пример #33
0
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
import pandas as pd

data = pd.read_csv('crop_tsc_balanced_imputed_2015.csv',
                   index_col=None,
                   header=None)
X = data.iloc[:, 0:8]
y = data.iloc[:, 9]

rbf_feature = RBFSampler(gamma=10, random_state=1)
X_features = rbf_feature.fit_transform(X)
clf = SGDClassifier(max_iter=1000)
clf.fit(X_features, y)
print(clf.score(X_features, y))

df = pd.DataFrame(X_features)
df.insert(100, 100, y)
print(df.head())
df.to_csv("crop_tsc_balanced_imputed_rbf_2015.csv", header=False, index=False)
Пример #34
0
    ytest = None

    classa = [0, 1, 2, 3, 4]

    num = len(x) - 10000
    xtest, ytest, =  x[num:], y[num:]

    x, y = x[:num], y[:num]

    print x[:10], y[:10]
    clf = clf.fit(x, y)

    clf2_RFC = RandomForestClassifier(random_state=0, class_weight=({1:0.25, 2:0.56, 3:0.17, 4:0.02}))
    clf2_RFC = clf2_RFC.fit(x, y)

    rbf_feature = RBFSampler(gamma=1, random_state=1)
    X_features = rbf_feature.fit_transform(x)
    X_test = rbf_feature.fit_transform(xtest)


    clfK = linear_model.SGDClassifier()
    clfK.fit(x, y)
    print "SGD classifier", clfK.score(xtest, ytest)

    #DECISION TREEE
    clft = tree.DecisionTreeClassifier( max_depth= 7)
    clft.fit(x, y)
    print "Tree", clft.score(xtest, ytest)


    #gen image
Пример #35
0
 def __init__(self, state_dim, action_dim, number_of_features):
     Policy.__init__(self, state_dim, action_dim)
     self.rbf_feature = RBFSampler(gamma=25.,
                                   n_components=number_of_features)
     self.rbf_feature.fit(np.random.randn(action_dim, state_dim))
#	Calculation of a Kernel

#	K(x, y) = exp(-gamma ||x-y||^2)
#	sigma = sqrt( 1/(2*gamma) )
#	gamma = 1/(2*sigma^2)

num_of_samples = 14000
X = np.random.random((num_of_samples,5))
sampling_percentage = 0.05





start_time = time.time() 
RFF = RBFSampler(gamma=1,n_components= int(num_of_samples*sampling_percentage))
V = RFF.fit_transform(X)
RFF_estimated_kernel = V.dot(V.T)
print("--- RFF Time : %s seconds ---" % (time.time() - start_time))




start_time = time.time() 
N = Nystroem(gamma=1,n_components= int(num_of_samples*sampling_percentage))
V = N.fit_transform(X)
estimated_kernel = V.dot(V.T)
print("--- Nystrom Time : %s seconds ---" % (time.time() - start_time))


start_time = time.time() 
Пример #37
0
def compute_approximate_HSIC(X,
                             Y,
                             ncom=100,
                             gamma=[None, None],
                             ntrials=100,
                             random_state=1,
                             sigma_prior=1):
    """
        Using approximations, computes the HSIC score between two different data series. Apprixmations are subsampling for the RBF kernel bandwidth selection and random kitchen sinks to approximate kernels (adn therefore directly using inner products to estimate the cross-covariance operator in the approximate RKHS)

        :param X, Y: (mutlivariate) data series
        :param ncom: number of components to use in the random kernel approximation
        :param gamma: bandwidth for the RBF kernels
        :param ntrials: number of trials, on which HSIC is averaged
        :param random_state: set initial random state for reproducibility
        :param sigma_prior: scaling for the sigmas
    """

    from sklearn.kernel_approximation import RBFSampler
    import random
    if random_state is not None:
        random.seed(random_state)

    def centering(K):
        """
            center kernel matrix
        """
        n = K.shape[0]
        unit = np.ones([n, n])
        I = np.eye(n)
        Q = I - unit / n
        return np.dot(np.dot(Q, K), Q)

    def rbf(X, sigma=None):
        """
            define RBF kernel + its parameter
        """
        GX = np.dot(X, X.T)
        KX = np.diag(GX) - GX + (np.diag(GX) - GX).T
        if sigma is None:
            mdist = np.median(KX[KX != 0])
            sigma = np.sqrt(mdist)
        KX *= -0.5 / sigma / sigma
        np.exp(KX, KX)
        return KX

    if gamma[0] is None:

        if X.shape[0] > 1000:
            yy = np.random.choice(len(X), 1000)
            x_ = X[yy]
            del yy
        else:
            x_ = X

        GX = np.dot(x_, x_.T)
        KX = np.diag(GX) - GX + (np.diag(GX) - GX).T
        mdist = np.median(KX[KX != 0])
        gamma[0] = 1 / (np.sqrt(sigma_prior * mdist)**2)
        del GX, KX, mdist

    if gamma[1] is None:
        if Y.shape[0] > 1000:
            yy = np.random.choice(len(Y), 1000)
            y_ = Y[yy]
            del yy
        else:
            y_ = Y

        GY = np.dot(y_, y_.T)
        KY = np.diag(GY) - GY + (np.diag(GY) - GY).T
        mdist = np.median(KY[KY != 0])
        gamma[1] = 1 / (np.sqrt(sigma_prior * mdist)**2)
        del GY, KY, mdist

    hs_a = 0
    rbf_feature_x = RBFSampler(gamma=gamma[0],
                               random_state=random_state,
                               n_components=ncom)
    rbf_feature_y = RBFSampler(gamma=gamma[1],
                               random_state=random_state,
                               n_components=ncom)

    for trial in range(ntrials):
        if (X.shape[0] < 1) | (Y.shape[0] < 1):
            continue

        X_f = rbf_feature_x.fit_transform(X)
        X_f -= np.mean(X_f, axis=0)
        Y_f = rbf_feature_y.fit_transform(Y)
        Y_f -= np.mean(Y_f, axis=0)

        A = X_f.T.dot(Y_f)
        B = Y_f.T.dot(X_f)
        C = A.dot(B)
        hs_a += 1 / X_f.shape[0]**2 * np.trace(C)

    return hs_a / ntrials
Пример #38
0
		f.write(header)
		size = header.count(',')
		for (id, label) in zip(ids, labels):
			f.write('%d' % int(id))
			for i in range(0, size):
				if i == label:
					f.write(',1')
				else:
					f.write(',0')
			f.write('\n')


if __name__ == '__main__':
	# get X and y
	train_x, train_y = loadDataHelper('train_data.txt')
	test_x, test_id = loadDataHelper('test_data.txt')
	print('train size: %d %d' % (len(train_x), len(train_y)))
	print('test size: %d %d' % (len(test_x), len(test_id)))


	rbf_feature = RBFSampler(gamma=1, random_state=1)
	X_features = rbf_feature.fit_transform(train_x)
	model = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True)
	# model = SGDClassifier()
	model.fit(X_features, train_y)
	print(model)

	X_features = rbf_feature.fit_transform(test_x)
	predicted = model.predict(X_features)
	saveResult('result-sgd.csv', test_id, predicted)
Пример #39
0
def smooth_weight_approximate_gaussian_process_regression(
        data, labels, ind_mat, opts):  #, ITERS=100, THRESH=1e-6):
    """
        Define a Bayesian multitask regression problem in which tasks are locally smooth (e.g. bin size prediction), and introduce a penalty in which weights of regressors of related tasks are encouraged to be similar.
        See https://icml.cc/Conferences/2005/proceedings/papers/128_GaussianProcesses_YuEtAl.pdf
    """

    import scipy.stats as stats
    from sklearn.metrics import mean_squared_error, r2_score
    from sklearn.kernel_approximation import RBFSampler

    D = opts['approximation_dim']

    sm = RBFSampler(gamma=1. / opts['kpar']**2,
                    n_components=D,
                    random_state=666)

    T = opts['TASKS']
    W = np.random.rand(D, T)
    tau = opts['par1']
    pi = opts['par2']

    C_w = stats.invwishart.rvs(tau, np.eye(D))
    mu_w = np.random.multivariate_normal(np.zeros((D)),
                                         1 / pi * C_w)  # np.ones((D,1))

    sigma = D / T
    k = 0
    epsi = 1000
    loss = []

    while (epsi > opts['THRESH']) & (k < opts['ITERS']):
        # E-step:
        schedule = range(T)
        W_old = W.copy()
        C_w_temp = 0
        w_l_ce_temp = 0
        sigma_l = 0
        n_l = 0
        t_loss = 0
        t_norm_w = 0
        for ind_w in schedule:
            # print(ind_w)
            X_Y = data.assign(y=labels.iloc[:, ind_w]).copy()
            trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1])
            trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1]

            N = trn_X.shape[0]
            A = 1 / sigma * np.dot(trn_X.T, trn_X) + np.linalg.inv(C_w)
            A = np.linalg.inv(A)
            # print(A)
            C_w_temp += A

            B = 1 / sigma * np.dot(trn_X.T, trn_Y).T + np.dot(
                mu_w.T, np.linalg.inv(C_w))
            W[:, ind_w] = np.dot(A, np.squeeze(B))

            w_ = W[:, ind_w] - mu_w
            w_l_ce_temp += np.dot(w_, w_.T)

            n_l += N
            sigma_l += np.sum((trn_Y - trn_X.dot(W[:, ind_w]))**2) + np.trace(
                np.dot(trn_X, np.dot(C_w, trn_X.T)))

            t_loss += 1 / N * np.sum(
                (trn_Y.values - np.dot(trn_X, W[:, ind_w]))**
                2)  # + np.linalg.norm(W[:,ind_w])
            t_norm_w += np.linalg.norm(W[:, ind_w])

        # M-step
        mu_w = 1 / (pi + T) * np.sum(W, axis=1)
        C_w = 1 / (tau + T) * (pi * np.dot(mu_w, mu_w.T) + tau * np.eye(D) +
                               w_l_ce_temp)  # + tau*np.eye(D)
        sigma = 1 / n_l * sigma_l

        emp_err = 1 / T * t_loss
        epsi = np.abs(np.sum(np.sum(W - W_old)))
        loss.append(emp_err)
        # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}')
        k += 1

        # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}')

    print(
        f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}'
    )

    y_hat = np.zeros((opts['DATA_SIZE'][0], T))
    pred_stats = {}
    pred_stats['tr_RMSE'] = []  #np.inf*np.ones((T))
    pred_stats['tr_R2'] = []  #np.inf*np.ones((T))
    pred_stats['va_RMSE'] = []  #np.inf*np.ones((T))
    pred_stats['va_R2'] = []  #np.inf*np.ones((T))

    # if opts['VAL_MODE']:
    for ind_w in range(T):
        X_Y = data.assign(y=labels.iloc[:, ind_w]).copy()
        trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1])
        trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1]
        pred_tr_Y = np.dot(trn_X, W[:, ind_w])

        pred_stats['tr_RMSE'].append(
            np.sqrt(mean_squared_error(trn_Y, pred_tr_Y)))
        pred_stats['tr_R2'].append(r2_score(trn_Y, pred_tr_Y))

        y_hat[ind_mat[:, ind_w] == 1, ind_w] = pred_tr_Y

        if np.any(ind_mat[:, ind_w] == 2):

            val_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 2, :-1])
            val_Y = X_Y.iloc[ind_mat[:, ind_w] == 2, -1]
            pred_va_Y = np.dot(val_X, W[:, ind_w])

            pred_stats['va_RMSE'].append(
                np.sqrt(mean_squared_error(val_Y, pred_va_Y)))
            pred_stats['va_R2'].append(r2_score(val_Y, pred_va_Y))

            y_hat[ind_mat[:, ind_w] == 2, ind_w] = pred_va_Y

    return W, loss, ind_mat, pred_stats, y_hat
Пример #40
0
def transform(x_original, make_np=True):
    orig = x_original
    MEAN = [ 0.00213536,  0.00324656,  0.00334724,  0.00175428,  0.00349227,
          0.0035413 ,  0.00188289,  0.00216241,  0.00184026,  0.00351317,
          0.00520942,  0.00450718,  0.00346782,  0.00300477,  0.00223811,
          0.00180039,  0.00216675,  0.00381716,  0.00258565,  0.00291358,
          0.00616643,  0.00237084,  0.00440006,  0.00729192,  0.00369302,
          0.00058215,  0.00312047,  0.00629086,  0.00184585,  0.0018266 ,
          0.00329771,  0.00352135,  0.00246634,  0.00261958,  0.00357113,
          0.00307333,  0.00211512,  0.00125184,  0.00212255,  0.00307451,
          0.00171408,  0.0126576 ,  0.00252346,  0.00528872,  0.0026387 ,
          0.00283739,  0.00394586,  0.00207473,  0.00307515,  0.002017  ,
          0.00408066,  0.00185709,  0.00316201,  0.00349098,  0.00415104,
          0.00348125,  0.00069981,  0.00128145,  0.0023404 ,  0.00396659,
          0.00240324,  0.01251434,  0.00125352,  0.00266113,  0.00435828,
          0.00066137,  0.00221134,  0.00083185,  0.00278664,  0.00118505,
          0.00335414,  0.00340527,  0.0026939 ,  0.00096786,  0.00214149,
          0.0026521 ,  0.00155538,  0.00300255,  0.0040405 ,  0.00275396,
          0.00077404,  0.00257667,  0.00268743,  0.00279948,  0.0018655 ,
          0.00239569,  0.0032419 ,  0.00288355,  0.00123361,  0.00220135,
          0.0021836 ,  0.00225123,  0.00366629,  0.00279189,  0.00058814,
          0.00310452,  0.00276981,  0.00128716,  0.00074161,  0.00358908,
          0.003292  ,  0.00233592,  0.00317694,  0.00381526,  0.00269197,
          0.00098085,  0.00231831,  0.00133682,  0.00460957,  0.00387842,
          0.0004473 ,  0.0015644 ,  0.00247717,  0.00179484,  0.00281831,
          0.00053689,  0.00415889,  0.00232736,  0.00361601,  0.00192624,
          0.00224487,  0.00210838,  0.00140079,  0.00608319,  0.00211861,
          0.00230604,  0.00124033,  0.0029389 ,  0.00227564,  0.00086638,
          0.0035496 ,  0.00228789,  0.00361703,  0.00270277,  0.00196611,
          0.00206865,  0.00146788,  0.00019011,  0.00222272,  0.00351472,
          0.00305718,  0.00239471,  0.00040766,  0.00299186,  0.00368983,
          0.00244158,  0.00084154,  0.00109796,  0.00278565,  0.00135904,
          0.00424855,  0.00323784,  0.00255397,  0.00234946,  0.00210558,
          0.00291688,  0.00172516,  0.00284473,  0.00308164,  0.00316225,
          0.0041659 ,  0.00055891,  0.00303591,  0.00028217,  0.00261526,
          0.00196658,  0.00264379,  0.00018002,  0.00227361,  0.00190785,
          0.00344782,  0.00305479,  0.00057851,  0.00115452,  0.00365707,
          0.0009598 ,  0.00184313,  0.00286183,  0.00400594,  0.0003848 ,
          0.00086102,  0.00277779,  0.00214625,  0.00329827,  0.00129511,
          0.00114751,  0.00249452,  0.00236266,  0.00353646,  0.00319208,
          0.00540883,  0.00323167,  0.00299791,  0.00025745,  0.00227873,
          0.00228826,  0.0040653 ,  0.00238598,  0.00483883,  0.00054585,
          0.00091663,  0.00037232,  0.0008229 ,  0.00073563,  0.00283771,
          0.0035899 ,  0.00578833,  0.0032107 ,  0.0014048 ,  0.00401052,
          0.002748  ,  0.00229416,  0.00130351,  0.00308403,  0.00146506,
          0.00188529,  0.00236308,  0.00259649,  0.00185155,  0.00230195,
          0.00421584,  0.00231917,  0.00227335,  0.00296253,  0.00077996,
          0.0001668 ,  0.00069015,  0.00220702,  0.00238395,  0.00034903,
          0.00303323,  0.00407338,  0.00178655,  0.00456887,  0.00254606,
          0.00215019,  0.00306377,  0.00134979,  0.00112832,  0.00350681,
          0.00253643,  0.00431348,  0.00094915,  0.00150396,  0.00043838,
          0.00207101,  0.00301119,  0.00057716,  0.00062709,  0.00543404,
          0.00061686,  0.00237189,  0.00522715,  0.00321869,  0.00172645,
          0.00244482,  0.00334951,  0.00183201,  0.00038157,  0.0023022 ,
          0.00418559,  0.00329119,  0.00411452,  0.00089033,  0.00283673,
          0.00210368,  0.00222242,  0.00213262,  0.0033576 ,  0.00250707,
          0.00423595,  0.00237407,  0.00127654,  0.00387341,  0.00216695,
          0.00325004,  0.00246333,  0.00396034,  0.0031676 ,  0.00354552,
          0.00227099,  0.00205363,  0.00128859,  0.00290737,  0.00301655,
          0.00319576,  0.00072449,  0.00230528,  0.00326406,  0.00283315,
          0.00338869,  0.00212552,  0.00135612,  0.00250613,  0.00045907,
          0.0014009 ,  0.00177951,  0.00042544,  0.00073249,  0.00303487,
          0.0013664 ,  0.00248306,  0.00025601,  0.00435174,  0.00443799,
          0.00479944,  0.0009997 ,  0.00275155,  0.00286969,  0.00244896,
          0.00177604,  0.00278218,  0.00078876,  0.00142078,  0.00186949,
          0.0018215 ,  0.0027254 ,  0.00316367,  0.00192957,  0.00176559,
          0.00289111,  0.00048977,  0.00411342,  0.00130383,  0.00250934,
          0.00324275,  0.00159243,  0.00334068,  0.00324279,  0.00158259,
          0.00041714,  0.00161102,  0.00145149,  0.00222112,  0.00296289,
          0.00282892,  0.00123731,  0.00281891,  0.00016613,  0.0014267 ,
          0.00262089,  0.00367506,  0.00281706,  0.00318947,  0.00090315,
          0.00230826,  0.00310803,  0.00889549,  0.00197781,  0.00160006,
          0.00307063,  0.00176858,  0.00252353,  0.00141795,  0.00047073,
          0.00241224,  0.00165672,  0.00138939,  0.00257068,  0.00148445,
          0.00193734,  0.004368  ,  0.00247817,  0.00249266,  0.00329317,
          0.00078468,  0.00045822,  0.00259324,  0.00298367,  0.00335009,
          0.00307879,  0.00325237,  0.00254531,  0.00749495,  0.0026701 ,
          0.00100689,  0.00184948,  0.00317616,  0.00255977,  0.00112342,
          0.00165774,  0.00227449,  0.00064219,  0.00269639,  0.00114312,
          0.00203549,  0.00064574,  0.00130932,  0.00304631,  0.00131053,
          0.00174587,  0.0027975 ,  0.00461148,  0.0015227 ,  0.0027072 ,
          0.00210673,  0.00323388,  0.00028426,  0.00113429,  0.00315131]

    VAR = [  3.87111312e-06,   1.29838726e-05,   1.23895436e-05,
           5.11051819e-06,   1.87834728e-05,   5.81101229e-05,
           1.22431672e-05,   3.14238203e-06,   6.15186426e-06,
           1.16054974e-05,   2.61629851e-05,   1.51823678e-05,
           3.20501352e-05,   6.75625364e-06,   6.90383937e-06,
           7.10772563e-06,   3.93108356e-06,   1.38147699e-05,
           9.45390664e-06,   6.18869987e-06,   1.23460353e-03,
           5.15741591e-06,   1.27185867e-05,   7.62148434e-05,
           9.61369316e-06,   3.59794999e-06,   4.49714597e-05,
           1.15313013e-04,   2.51027515e-06,   3.23518027e-06,
           1.15175054e-05,   5.55007797e-05,   3.61287015e-06,
           4.24901217e-06,   1.57731133e-05,   8.83739880e-06,
           4.11832891e-06,   4.51594425e-06,   5.66233716e-06,
           2.76312055e-05,   3.10286633e-05,   2.06523833e-04,
           4.99679342e-06,   3.59423460e-05,   5.53408014e-06,
           5.02979264e-06,   2.29845095e-05,   3.52580303e-06,
           4.74110466e-06,   2.77776825e-06,   1.15279947e-05,
           4.78634098e-06,   8.24242505e-06,   1.65141090e-05,
           1.84669015e-05,   1.65851869e-05,   9.69125917e-07,
           4.07269628e-06,   4.79411492e-06,   7.95185399e-06,
           6.05491604e-06,   2.30133633e-04,   2.43045915e-06,
           9.99138675e-06,   1.61846281e-05,   1.36250194e-06,
           3.83900385e-06,   4.03501076e-06,   4.49190746e-06,
           2.20133970e-06,   1.40571788e-05,   1.23973871e-05,
           1.91642968e-05,   1.83384119e-06,   3.55110501e-06,
           6.38707023e-06,   7.58389225e-06,   9.66052931e-06,
           1.33459561e-05,   6.01834583e-06,   1.75975058e-06,
           9.93625536e-06,   5.57880408e-06,   5.20632392e-06,
           2.63891241e-06,   4.96341232e-06,   1.35361419e-05,
           5.09588225e-06,   2.13213362e-06,   3.67884149e-06,
           4.02580880e-06,   3.36118966e-06,   1.23913905e-05,
           1.19327162e-05,   1.33013390e-06,   1.56844681e-05,
           5.05235129e-06,   3.27510379e-06,   4.18496352e-06,
           1.32615022e-05,   8.00089632e-06,   5.24889508e-06,
           7.61725520e-06,   2.45732025e-05,   4.73942392e-06,
           3.26874106e-06,   4.19502445e-06,   4.67408597e-06,
           4.07529951e-05,   1.85623369e-05,   1.42640177e-06,
           9.02420306e-06,   3.99465979e-06,   2.91695819e-06,
           7.51525182e-06,   3.28339831e-06,   9.23579413e-06,
           8.82938566e-06,   1.67017625e-05,   7.18046179e-06,
           6.67502140e-06,   4.53568390e-06,   4.59241197e-06,
           9.71055426e-05,   4.06108283e-06,   3.21309715e-06,
           2.83145362e-06,   1.30979068e-05,   4.30934096e-06,
           1.33494112e-06,   1.23067054e-05,   4.55467345e-06,
           4.16151366e-05,   4.39300907e-06,   3.81081336e-06,
           3.57599046e-06,   2.44792045e-06,   1.04884156e-06,
           5.66646773e-06,   1.38454953e-05,   7.03958785e-06,
           7.96561298e-06,   1.15832827e-06,   5.34098000e-06,
           1.08664502e-05,   5.33706713e-06,   1.58029233e-06,
           4.16948014e-06,   1.10410603e-05,   3.08923185e-06,
           3.60056097e-05,   1.35575315e-05,   7.21297470e-06,
           5.46186866e-06,   3.83067878e-06,   4.93382163e-06,
           8.74249160e-06,   6.95763983e-06,   8.57639945e-06,
           1.99238085e-05,   2.06143616e-05,   4.15158574e-06,
           6.98539924e-06,   7.29978665e-07,   1.05324242e-05,
           4.03610511e-06,   4.54024757e-06,   1.12380259e-06,
           7.25149490e-06,   4.68609708e-06,   4.47583007e-05,
           5.73128000e-06,   1.55383559e-06,   6.10201277e-06,
           1.56226083e-05,   2.07417481e-06,   3.92362694e-06,
           5.07511158e-06,   1.91527526e-05,   1.23196439e-06,
           2.78105795e-06,   6.20886459e-06,   9.77619759e-06,
           4.54569998e-05,   3.69801329e-06,   3.90055801e-06,
           8.95043365e-06,   4.62714915e-06,   8.59072207e-06,
           7.93476416e-06,   2.94461267e-05,   1.27513460e-05,
           6.37168538e-06,   1.42869302e-06,   3.88169829e-06,
           3.73479924e-06,   3.41961106e-05,   5.99249536e-06,
           3.52894229e-05,   3.60535269e-06,   1.97432492e-06,
           1.08726206e-06,   6.34745318e-06,   1.85853697e-06,
           4.88355657e-06,   1.45421337e-05,   4.71209759e-05,
           9.75886239e-06,   1.92188254e-06,   2.44175182e-05,
           6.48665880e-06,   3.77833988e-06,   4.94021824e-06,
           1.11375076e-05,   2.48913056e-06,   7.50221434e-06,
           7.71706724e-06,   4.40449246e-06,   5.01260110e-06,
           7.55913298e-06,   9.61114153e-06,   4.71524238e-06,
           5.71612330e-06,   5.35067657e-06,   1.24371020e-06,
           1.05315411e-06,   3.93981671e-06,   4.10917913e-06,
           4.50131192e-06,   1.41029887e-06,   5.21404239e-06,
           3.10300539e-05,   2.86295992e-06,   3.14574375e-05,
           4.13089781e-06,   3.94511845e-06,   5.21837923e-06,
           1.86040011e-06,   4.33877122e-06,   6.79169351e-06,
           7.34233345e-06,   2.46684357e-05,   6.04518227e-06,
           3.50075336e-06,   1.22008735e-06,   3.82670787e-06,
           1.29928488e-05,   1.30317263e-06,   1.82923403e-06,
           1.68159694e-04,   1.39570985e-06,   6.82018782e-06,
           2.77705938e-05,   5.50219803e-06,   6.94297855e-06,
           5.56691651e-06,   4.40913139e-05,   8.64954832e-06,
           1.13623461e-06,   3.91895303e-06,   2.90528320e-05,
           8.95829181e-06,   2.13802762e-05,   1.45383845e-06,
           2.19748855e-05,   2.92403666e-06,   4.11580346e-06,
           3.79422424e-06,   1.01354981e-05,   1.12666398e-05,
           2.12954971e-05,   4.73278161e-06,   2.26826965e-06,
           2.45301255e-05,   5.86185180e-06,   6.92235736e-06,
           8.42678526e-06,   2.47795958e-05,   6.25412728e-06,
           1.41974527e-05,   3.95337688e-06,   7.16912125e-06,
           2.00884144e-06,   2.00349034e-05,   5.97662651e-06,
           3.01450892e-05,   4.63002816e-06,   4.09857661e-06,
           1.23373959e-05,   5.62286236e-06,   1.23868932e-05,
           7.79128188e-06,   4.02737664e-06,   4.26867074e-06,
           1.30633550e-06,   2.16092242e-06,   2.53344988e-06,
           1.55130629e-06,   1.20587686e-06,   8.47719131e-06,
           1.72865161e-06,   8.85885938e-06,   1.36250583e-06,
           3.02467214e-05,   2.85941868e-05,   1.68684969e-05,
           2.17024274e-06,   9.09429716e-06,   1.12517072e-05,
           5.39997088e-06,   3.16738113e-06,   7.44227101e-06,
           1.39521345e-06,   1.80325624e-06,   3.23437991e-06,
           4.12906812e-06,   6.51981136e-06,   7.28606378e-06,
           4.44469608e-06,   4.00705337e-06,   1.34244753e-05,
           1.34953189e-06,   3.86701616e-05,   4.30733919e-06,
           4.29618197e-06,   1.67568650e-05,   5.39451612e-06,
           8.50733433e-06,   1.04900918e-05,   4.68246794e-06,
           2.92591087e-06,   2.54589900e-06,   6.68970689e-06,
           3.68698856e-06,   5.70542637e-06,   1.57329410e-05,
           3.45199222e-06,   7.27799975e-06,   8.64176250e-07,
           5.59882582e-06,   4.16052401e-06,   1.73753080e-05,
           7.85748797e-06,   6.46626446e-06,   2.23241624e-06,
           6.79217908e-06,   6.18545939e-06,   5.41203600e-04,
           2.75355566e-06,   5.01654998e-06,   9.55004050e-06,
           3.36241075e-06,   4.95540827e-06,   4.38650100e-06,
           2.19975452e-06,   4.99878215e-06,   2.08615031e-06,
           6.57349770e-06,   6.07825138e-06,   1.82116637e-05,
           3.98356104e-06,   3.02862803e-05,   1.45275531e-05,
           1.80111343e-05,   1.81263109e-05,   1.37630960e-06,
           1.01588605e-06,   1.09961427e-05,   7.09189456e-06,
           8.63553483e-06,   1.28377215e-05,   1.15539997e-05,
           4.30247032e-06,   3.69651334e-05,   1.13411365e-05,
           1.43191945e-06,   2.76733205e-06,   7.03730009e-06,
           4.93027252e-06,   2.72768641e-06,   3.15867713e-06,
           3.51786262e-06,   1.33668414e-06,   5.15268762e-06,
           2.24808552e-06,   3.91888753e-06,   1.96848802e-06,
           5.96948656e-06,   6.72807533e-06,   2.52024742e-06,
           4.64795350e-06,   6.00152269e-06,   4.42994740e-05,
           2.59223022e-06,   4.76032620e-06,   3.15249648e-06,
           1.02942457e-05,   7.54992395e-07,   2.48130225e-06,
           5.97253972e-06];

    x_original = np.array(x_original)
    x_original -= MEAN
    x_original /= VAR

    def extend_x(arr, additions=True, extension=True):
        if extension:
            x.extend(arr)
        if additions:
            x.append(scipy.std(arr))
            x.append(scipy.var(arr))
            x.append(sum(arr) / len(arr))
            x.append(sum(np.abs(arr)) / len(arr))
            x.append(min(arr))
            x.append(max(arr))
            x.append(scipy.mean(arr))
            x.append(scipy.median(arr))

    x = []

    extend_x(x_original)
    extend_x(np.abs(x_original))
    # extend_x(np.sqrt(np.abs(x_original)))

    # sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1)
    # zzz1 = sampler1.fit_transform(np.abs(np.array(orig)))[0]

    # sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1)
    # zzz2 = sampler2.fit_transform(np.abs(np.array(x)))[0]

    sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
    zzz3 = sampler3.fit_transform(np.array(x))[0]


    extend_x(list(zzz1))
    extend_x(list(zzz2))
    extend_x(list(zzz3))

    if make_np:
        return np.array(x)
    
    return x
Пример #41
0
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=28)

# Average CV score on the training set was:0.6521171672228402
exported_pipeline = make_pipeline(
    RBFSampler(gamma=0.55),
    RandomForestClassifier(bootstrap=False,
                           criterion="gini",
                           max_features=0.4,
                           min_samples_leaf=19,
                           min_samples_split=13,
                           n_estimators=100))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #42
0
class CCNNLayer:

    def __init__(self, name: str, input_size: int, filter_size: int,
                 gamma: float, m: int, R: float, r: int, lr: float):

        self.name = name
        self.input_size = input_size    
        self.filter_size = filter_size
        self.patch_size = filter_size ** 2
        self.output_size = self.input_size - self.filter_size + 1
        self.n_patchs = self.output_size ** 2        
        self.m = m
        self.R = R
        self.lr = lr
        
        self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1)
        self.svd = TruncatedSVD(n_components=r)


    def initPars(self, n_classes: int, batch_size: int):

        self.n_classes = n_classes        
        self.batch_size = batch_size
        self.lr /= batch_size
        
        self.A = np.random.normal(0, 0.1, size=(n_classes, self.n_patchs, self.m))
        
        
    def getZMatrix(self, X):
        """
        Input: (n_instances, n_channels, input_size, input_size)
        
        Output: (n_instances, n_patchs, m)
        """
        
        Z = view_as_windows(X, (1, X.shape[1], self.filter_size, self.filter_size))
        Z = Z.reshape(np.prod(Z.shape[:4]), np.prod(Z.shape[4:]))
        Q = self.rbf_feature.transform(Z).astype(np.float16)
        
        return Q.reshape(X.shape[0], self.n_patchs, -1)


    def predict(self, X, transform: bool=False):
        """
        Input: (batch_size, n_channels, input_size, input_size)
        
        Transformed input: (batch_size, n_patchs, m)
        
        Output: (batch_size, n_classes)
        """

        Z = self.getZMatrix(X) if transform else X
        p = np.exp(np.tensordot(Z, self.A, axes=[(1, 2), (1, 2)]))

        return (p.T / np.sum(p, axis=1)).T


    def fit(self, X, ylabel, n_epoch: int):

        assert X.shape[2] == X.shape[3] == self.input_size
        
        n = X.shape[0]
        self.rbf_feature.fit(np.zeros((1, X.shape[1] * self.filter_size ** 2)))
        
        print("Preparing patches...")
        
        Z_batches = [self.getZMatrix(X[i: i + self.batch_size]) 
                     for i in range(0, n, self.batch_size)]
        y_batches = ylabel.reshape(-1, self.batch_size)
        
        print("Starting PSGD...")
        
        loss = np.inf
        rhat = self.m

        for epoch in range(n_epoch):
            print("{0}: Epoch {1}: loss = {2}, r_hat = {3}".format(self.name, epoch + 1, loss / n, rhat))
            loss = 0
            for i, (Z_batch, y_batch) in enumerate(zip(Z_batches, y_batches)):
                p_batch = self.predict(Z_batch)
                loss += np.sum(-np.log(p_batch[np.arange(self.batch_size), y_batch]))
                dL_batch = -p_batch
                dL_batch[np.arange(self.batch_size), y_batch] += 1

                self.A += self.lr * np.tensordot(dL_batch, Z_batch, axes=[0, 0])
  
            A_unfold = self.A.reshape(-1, self.A.shape[2]).T
            U = self.svd.fit_transform(A_unfold)
            self.U = U.copy()
            d = np.linalg.norm(U, axis=0)
            U *= 1 / d            
            d_cum = np.cumsum(d) 
            rhat = np.searchsorted(d_cum - self.R > np.append(d[1:] * np.arange(1, d.size), 0), True) + 1

            if rhat >= d.size:
                print("Warning: Hard-thresholding applied")
                                
            if rhat <= d.size:                
                scale = np.maximum(0, d - (d_cum[rhat - 1] - self.R) / rhat)
                U = U[:, :rhat]
                d = d[:rhat]                  
                self.U = U * scale[:rhat] 

            self.A = ((self.U * (1 / d)) @ (U.T @ A_unfold)).T.reshape(*self.A.shape)
        
        Z_batches = None
        y_batches = None
        
        
            
    def transform(self, X):
        """
        Input: (batch_size, n_channels, input_size, input_size)
        
        Output: (batch_size, n_output_channels, output_size, output_size)
        """
        
        Z = np.rollaxis(np.tensordot(self.U, self.getZMatrix(X), axes=[0, 2]), 0, 2)

        return Z.reshape(Z.shape[0], Z.shape[1], self.output_size, self.output_size)
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was: -3.6343523092856613
exported_pipeline = make_pipeline(
    StackingEstimator(
        estimator=KNeighborsRegressor(n_neighbors=48, p=1, weights="uniform")),
    RobustScaler(), MinMaxScaler(),
    StackingEstimator(estimator=LinearSVR(C=25.0,
                                          dual=True,
                                          epsilon=0.01,
                                          loss="epsilon_insensitive",
                                          tol=0.0001)),
    StackingEstimator(estimator=DecisionTreeRegressor(
        max_depth=8, min_samples_leaf=17, min_samples_split=9)),
    FeatureAgglomeration(affinity="l2", linkage="average"),
    RBFSampler(gamma=0.75),
    StackingEstimator(estimator=LinearSVR(C=1.0,
                                          dual=True,
                                          epsilon=1.0,
                                          loss="squared_epsilon_insensitive",
                                          tol=0.1)),
    StackingEstimator(
        estimator=KNeighborsRegressor(n_neighbors=9, p=1, weights="uniform")),
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    SelectPercentile(score_func=f_regression, percentile=26), StandardScaler(),
    PCA(iterated_power=7, svd_solver="randomized"),
    StackingEstimator(estimator=LinearSVR(C=10.0,
                                          dual=True,
                                          epsilon=0.01,
                                          loss="squared_epsilon_insensitive",
                                          tol=1e-05)), ZeroCount(),
Пример #44
0
from sklearn.kernel_approximation import RBFSampler
from torch.distributions import MultivariateNormal
from scipy.stats import multivariate_normal
import numpy as np
import linear_trpo_config as C
import torch

rbf_feature = RBFSampler(gamma=1, n_components = C.extracted_feature_size, random_state=12345)


def extract_features(state, num_actions):
    """ This function computes the RFF features for a state for all the discrete actions
    :param state: column vector of the state we want to compute phi(s,a) of (shape |S|x1)
    :param num_actions: number of discrete actions you want to compute the RFF features for
    :return: phi(s,a) for all the actions (shape 100x|num_actions|)
    """
    s = state.reshape(1, -1)
    s = np.repeat(s, num_actions, 0)
    a = np.arange(0, num_actions).reshape(-1, 1)
    sa = np.concatenate([s,a], -1)
    feats = rbf_feature.fit_transform(sa)
    feats = feats.T
    return feats


def compute_action_distribution(theta, phis, mode):
    """ compute probability distrubtion over actions
    :param theta: model parameter (shape d x 1)
    :param phis: RFF features of the state and actions (shape d x |A|)
    """
Пример #45
0
scaler_action = sklearn.preprocessing.StandardScaler()
scaler_action.fit(action_examples)


# featurizer_action = sklearn.pipeline.FeatureUnion([
#         ("rbf1", RBFSampler(gamma=5.0, n_components=100)),
#         ("rbf2", RBFSampler(gamma=2.0, n_components=100)),
#         ("rbf3", RBFSampler(gamma=1.0, n_components=100)),
#         ("rbf4", RBFSampler(gamma=0.5, n_components=100))
#         ])
# featurizer_action.fit(scaler_action.transform(action_examples))



featurizer_action = sklearn.pipeline.FeatureUnion([
        ("rbf1", RBFSampler(gamma=5.0, n_components=1)),
        ("rbf2", RBFSampler(gamma=2.0, n_components=1))
        ])
featurizer_action.fit(scaler_action.transform(action_examples))



def featurize_action(action):
    # action = np.array([action])
    scaled = scaler_action.transform([action])
    featurized_action = featurizer_action.transform(scaled)
    return featurized_action[0]



Пример #46
0
def real_data_error_profile(data_name, sketch_size):
    '''
    Use polynomial feature map which generates {feature_size + degree \choose degree} new 
    features
    '''
    # * Experimental parameters
    n = 20000
    trials = 5
    fd_iterations = 15
    rp_iterations = 45
    # ds = DataFactory(n=n)
    if data_name == 'CoverType':
        _ = np.load('../../datasets/covertype.npy')
        _X, _y = _[:, :-1], _[:, -1]
        feature_expansion = 'Polynomial'
        features = [2]
    elif data_name == 'w8a':
        _ = np.load('../../datasets/w8a.npy')
        _X, _y = _[:, :-1], _[:, -1]
        feature_expansion = 'RBF'
        features = [2500]

    # * Results data structures
    exact_results = {
        'solve time': {_: np.zeros(trials, dtype=float)
                       for _ in features}
    }
    fd_results = {
        'errors': {
            _: np.zeros((fd_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times': {_: np.zeros(trials, dtype=float)
                        for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((fd_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }
    rfd_results = {
        'errors': {
            _: np.zeros((fd_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times': {_: np.zeros(trials, dtype=float)
                        for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((fd_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }
    rp_srht_results = {
        'errors': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times': {_: np.zeros(trials, dtype=float)
                        for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }
    rp_cntsk_results = {
        'errors': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times': {_: np.zeros(trials, dtype=float)
                        for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }
    ihs_srht_results = {
        'errors': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times':
        {_: np.zeros((rp_iterations, trials), dtype=float)
         for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }

    ihs_sjlt_results = {
        'errors': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times':
        {_: np.zeros((rp_iterations, trials), dtype=float)
         for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }

    ihs_countsketch_results = {
        'errors': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        },
        'build times':
        {_: np.zeros((rp_iterations, trials), dtype=float)
         for _ in features},
        'iteration times':
        {_: np.zeros(trials, dtype=float)
         for _ in features},
        'all_times': {
            _: np.zeros((rp_iterations + 1, trials), dtype=float)
            for _ in features
        }
    }

    mean_iter_time_single = lambda a: np.mean(a['all_times'][1:] - a[
        'sketch time'])
    mean_iter_time_multi = lambda a, its: np.mean(a['all_times'][1:] - a[
        'sketch time'] / its)

    for t in range(trials):
        print('*' * 10, '\t TRIAL ', t, '\t', '*' * 10)
        np.random.seed(t)
        sample = np.random.choice(_X.shape[0], size=n, replace=False)
        X_sample, y = _X[sample], _y[sample]

        for i, feature_hyper in enumerate(features):

            print('######### FEATURIZING #########')
            if feature_expansion == 'Polynomial':
                X_poly = PolynomialFeatures(
                    degree=feature_hyper).fit_transform(X_sample)
                if X_poly.shape[1] > X_poly.shape[0]:
                    nkeep = int(1.5 * (X_poly.shape[0] - X_poly.shape[1]))
                    X_poly = X_poly[:, :nkeep]
            else:
                X_poly = RBFSampler(
                    gamma=0.0001, random_state=t,
                    n_components=feature_hyper).fit_transform(X_sample)
            X = StandardScaler().fit_transform(X_poly)
            N, D = X.shape
            X_train_sparse = coo_matrix(X)
            g = np.linalg.norm(X, ord='fro')**2 / sketch_size
            print('#' * 10, f'\t GAMMA: G={g}, i={i} d={feature_hyper}\t',
                  '#' * 10)

            # # ! Optimal solution
            print('#' * 60)
            print('Solving exactly: Data shape: ', X.shape)
            solve_start = timer()
            x_opt = svd_ridge_solve(X, y, g)
            solve_time = timer() - solve_start
            exact_results['solve time'][feature_hyper][t] = solve_time

            # ! FD Sketching
            print('#' * 10, '\t FREQUENT DIRECTIONS \t', '#' * 10)
            #fdr = FDRidge(fd_dim=sketch_size,gamma=g)
            fdr = IterativeRidge(N,
                                 D,
                                 sk_dim=sketch_size,
                                 sk_mode='FD',
                                 gamma=g)
            _, all_x, fd_measured = fdr.fit(X, y, fd_iterations)

            fd_results['errors'][feature_hyper][:, t] = get_euclidean_errors(
                all_x, x_opt)
            fd_results['build times'][feature_hyper][t] = fd_measured[
                'sketch time']
            fd_results['iteration times'][feature_hyper][t] = np.mean(
                fd_measured['update time']
            )  #mean_iter_time_single(fd_measured)
            fd_results['all_times'][feature_hyper][:, t] = fd_measured[
                'all_times']

            # # ! RFD Sketching
            print('#' * 10, '\t ROBUST FREQUENT DIRECTIONS \t', '#' * 10)
            #rfdr = FDRidge(fd_dim=sketch_size,fd_mode='RFD',gamma=g)
            rfdr = IterativeRidge(N,
                                  D,
                                  sk_dim=sketch_size,
                                  sk_mode='RFD',
                                  gamma=g)
            _, rfd_all_x, rfd_measured = rfdr.fit(X, y, fd_iterations)

            rfd_results['errors'][feature_hyper][:, t] = get_euclidean_errors(
                rfd_all_x, x_opt)
            rfd_results['build times'][feature_hyper][t] = rfd_measured[
                'sketch time']
            rfd_results['iteration times'][feature_hyper][t] = np.mean(
                rfd_measured['update time']
            )  #mean_iter_time_single(rfd_measured)
            rfd_results['all_times'][feature_hyper][:, t] = rfd_measured[
                'all_times']

            # # ! Single Random sketches
            print('#' * 10, '\t SRHT SINGLE \t', '#' * 10)
            srht_single = IterativeRidge(
                N, D, sk_dim=sketch_size, sk_mode='SRHT', gamma=g
            )  # RPRidge(rp_dim=sketch_size,rp_mode='Gaussian',gamma=g)
            _, srht_single_all_x, srht_single_measured = srht_single.fit(
                X, y, rp_iterations, seed=i)

            rp_srht_results['errors'][feature_hyper][:,
                                                     t] = get_euclidean_errors(
                                                         srht_single_all_x,
                                                         x_opt)
            rp_srht_results['build times'][feature_hyper][
                t] = srht_single_measured['sketch time']
            rp_srht_results['iteration times'][feature_hyper][t] = np.mean(
                srht_single_measured['update time']
            )  #  mean_iter_time_single(srht_single_measured)
            rp_srht_results['all_times'][
                feature_hyper][:, t] = srht_single_measured['all_times']

            print('#' * 10, '\t CountSketch SINGLE \t', '#' * 10)
            # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time
            # ! so that the timing experiment is not compromised.
            cntsk_single = IterativeRidge(
                N,
                D,
                sk_dim=sketch_size,
                sk_mode='CountSketch',
                gamma=g,
                sparse_data=X_train_sparse
            )  #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g)
            if t == 0:
                _, cntsk_single_all_x, cntsk_single_measured = cntsk_single.fit(
                    X, y)
            _, cntsk_single_all_x, cntsk_single_measured = cntsk_single.fit(
                X, y, rp_iterations, seed=i)

            rp_cntsk_results['errors'][
                feature_hyper][:, t] = get_euclidean_errors(
                    cntsk_single_all_x, x_opt)
            rp_cntsk_results['build times'][feature_hyper][
                t] = cntsk_single_measured['sketch time']
            rp_cntsk_results['iteration times'][feature_hyper][t] = np.mean(
                cntsk_single_measured['update time']
            )  #mean_iter_time_single(cntsk_single_measured)
            rp_cntsk_results['all_times'][
                feature_hyper][:, t] = cntsk_single_measured['all_times']

            # ! Multi Random sketches
            print('#' * 10, '\t CountSketch IHS \t', '#' * 10)
            ihs_cntsk = IterativeRidge(
                N,
                D,
                sk_dim=sketch_size,
                sk_mode='CountSketch',
                gamma=g,
                sparse_data=X_train_sparse,
                ihs_mode='multi'
            )  #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g)
            if t == 0:
                # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time
                # ! so that the timing experiment is not compromised.
                _, ihs_cntsk_all_x, ihs_cntsk_measured = ihs_cntsk.fit(X, y)
            _, ihs_cntsk_all_x, ihs_cntsk_measured = ihs_cntsk.fit(
                X, y, rp_iterations, seed=i)

            ihs_countsketch_results['errors'][
                feature_hyper][:, t] = get_euclidean_errors(
                    ihs_cntsk_all_x, x_opt)
            ihs_countsketch_results['build times'][
                feature_hyper][:, t] = ihs_cntsk_measured['sketch time']
            ihs_countsketch_results[
                'iteration times'][feature_hyper][t] = np.mean(
                    ihs_cntsk_measured['update time']
                )  #mean_iter_time_multi(ihs_cntsk_measured,rp_iterations)
            ihs_countsketch_results['all_times'][
                feature_hyper][:, t] = ihs_cntsk_measured['all_times']

            print('#' * 10, '\t SJLT IHS \t', '#' * 10)
            ihs_sjlt = IterativeRidge(
                N,
                D,
                sk_dim=sketch_size,
                sk_mode='SJLT',
                sjlt_sparsity=5,
                gamma=g,
                sparse_data=X_train_sparse,
                ihs_mode='multi'
            )  #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g)
            if t == 0:
                # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time
                # ! so that the timing experiment is not compromised.
                _, _, _ = ihs_sjlt.fit(X, y)
            _, ihs_sjlt_all_x, ihs_sjlt_measured = ihs_sjlt.fit(X,
                                                                y,
                                                                rp_iterations,
                                                                seed=i)

            ihs_sjlt_results['errors'][
                feature_hyper][:, t] = get_euclidean_errors(
                    ihs_sjlt_all_x, x_opt)
            ihs_sjlt_results['build times'][
                feature_hyper][:, t] = ihs_sjlt_measured['sketch time']
            ihs_sjlt_results['iteration times'][feature_hyper][t] = np.mean(
                ihs_sjlt_measured['update time']
            )  # mean_iter_time_multi(ihs_sjlt_measured,rp_iterations)
            ihs_sjlt_results['all_times'][
                feature_hyper][:, t] = ihs_sjlt_measured['all_times']

            print('#' * 10, '\t SRHT IHS \t', '#' * 10)
            ihs_srht = IterativeRidge(
                N,
                D,
                sk_dim=sketch_size,
                sk_mode='SRHT',
                gamma=g,
                ihs_mode='multi'
            )  #  RPRidge(rp_dim=sketch_size,rp_mode='Gaussian',gamma=g)
            _, ihs_srht_all_x, ihs_srht_measured = ihs_srht.fit(X,
                                                                y,
                                                                rp_iterations,
                                                                seed=i)

            ihs_srht_results['errors'][
                feature_hyper][:, t] = get_euclidean_errors(
                    ihs_srht_all_x, x_opt)
            ihs_srht_results['build times'][
                feature_hyper][:, t] = ihs_srht_measured['sketch time']
            ihs_srht_results['iteration times'][feature_hyper][t] = np.mean(
                ihs_srht_measured['update time']
            )  #mean_iter_time_multi(ihs_srht_measured,rp_iterations)
            ihs_srht_results['all_times'][
                feature_hyper][:, t] = ihs_srht_measured['all_times']

    # ! Prepare and save the results in json format
    pp = pprint.PrettyPrinter(indent=4)
    # print('FD')
    # pp.pprint(fd_results['errors'])
    # print('SRHT-Single')
    # pp.pprint(rp_srht_results['errors'])
    # print('SRHT-Multi')
    # pp.pprint(ihs_g_results['errors'])
    # # print('Gauss')
    # # pp.pprint(rpg_results)
    # print('ihs:SRHT')
    # pp.pprint(ihs_srht_results)
    # print('SJLT')
    # pp.pprint(rp_cntsk_results)
    # print('ihs:SJLT')
    # pp.pprint(ihs_countsketch_results)

    results_file_name = 'results/real_data/error_profile-' + data_name + '.json'
    for d in [
            exact_results, fd_results, rfd_results, rp_srht_results,
            rp_cntsk_results, ihs_srht_results, ihs_sjlt_results,
            ihs_countsketch_results
    ]:
        for k, v in d.items():
            for v_key, v_val in v.items():
                if type(v_val) == np.ndarray:
                    d[k][v_key] = v_val.tolist()
    all_results = {
        'Exact': exact_results,
        'FD': fd_results,
        'RFD': rfd_results,
        'SRHT': rp_srht_results,
        'CountSketch': rp_cntsk_results,
        'ihs:SRHT': ihs_srht_results,
        'ihs:SJLT': ihs_sjlt_results,
        'ihs:CountSketch': ihs_countsketch_results
    }

    with open(results_file_name, 'w') as fp:
        json.dump(all_results, fp, sort_keys=True, indent=4)
Пример #47
0
    def __init__(self,
                 dataset,
                 obs_dim,
                 act_dim,
                 gamma,
                 horizon,
                 policy_net,
                 value_reg,
                 hidden_layers,
                 activation,
                 output_transform,
                 default_length_scale=0.1,
                 random_feature_per_obs_dim=250,
                 norm='std',
                 scale_length_adjustment='median',
                 input_mode='sa',
                 seed=1):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.gamma = gamma
        self.horizon = horizon
        self.norm = norm
        self.policy_net = policy_net
        # self.model_reg = model_reg
        # self.reward_reg = reward_reg
        self.value_reg = value_reg
        self.input_mode = input_mode

        self.n_samples = dataset['obs'].shape[0]
        self.n_episode = dataset['init_obs'].shape[0]
        if self.policy_net is not None:
            self.pi_current = self.policy_net.get_probabilities(dataset['obs'])
            self.pi_next = self.policy_net.get_probabilities(
                dataset['next_obs'])
            self.pi_init = self.policy_net.get_probabilities(
                dataset['init_obs'])
            self.pi_term = self.policy_net.get_probabilities(
                dataset['term_obs'])
        else:
            self.pi_current = dataset['target_prob_obs']
            self.pi_next = dataset['target_prob_next_obs']
            self.pi_init = dataset['target_prob_init_obs']
            self.pi_term = dataset['target_prob_term_obs']
        if self.norm == 'std':
            self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True)
            self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True)
            self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std
            self.next_obs = (dataset['next_obs'] -
                             self.obs_mean) / self.obs_std
            self.init_obs = (dataset['init_obs'] -
                             self.obs_mean) / self.obs_std
            self.term_obs = (dataset['term_obs'] -
                             self.obs_mean) / self.obs_std
        elif self.norm is None:
            self.obs = dataset['obs']
            self.next_obs = dataset['next_obs']
            self.init_obs = dataset['init_obs']
            self.term_obs = dataset['term_obs']
        else:
            raise NotImplementedError
        if scale_length_adjustment == 'median':
            sample_num = 5000
            idx1 = np.random.choice(self.n_samples, sample_num)
            idx2 = np.random.choice(self.n_samples, sample_num)
            med_dist = np.median(np.square(self.obs[None, idx1, :] -
                                           self.obs[idx2, None, :]),
                                 axis=(0, 1))
            med_dist[
                med_dist <
                0.01] = 0.01  # enforce a upperbound on the scale-length of the action component
            scale_length_vector = 1.0 / med_dist
        else:
            scale_length_vector = np.ones(self.obs_dim)
        # import pdb; pdb.set_trace()
        #* set the fourier feature
        transformer_list = []
        self.z_dim = random_feature_per_obs_dim * self.obs_dim
        models = [
            RBFSampler(n_components=random_feature_per_obs_dim,
                       gamma=default_length_scale * dist)
            for dist in scale_length_vector
        ]
        for model in models:
            model.fit([self.obs[0]])
            transformer_list.append((str(model), model))
        self.rff = FeatureUnion(transformer_list)

        #* separate action set indexing
        act_idx = []
        for i in range(self.act_dim):
            act_idx.append(np.where(dataset['acts'] == i)[0])
        #* apply transformation
        Z = self.rff.transform(self.obs)
        Z_prime = self.rff.transform(self.next_obs)
        Z_init = self.rff.transform(self.init_obs)
        Z_term = self.rff.transform(self.term_obs)
        assert self.z_dim == Z.shape[1]
        self.Phi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim))
        self.Phi_pi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim))
        self.Phi_prime_pi = np.zeros(
            (Z_prime.shape[0], Z_prime.shape[1] * self.act_dim))
        self.Phi_init_pi = np.zeros(
            (Z_init.shape[0], Z_init.shape[1] * self.act_dim))
        self.Phi_term_pi = np.zeros(
            (Z_term.shape[0], Z_term.shape[1] * self.act_dim))
        for i in range(self.act_dim):
            self.Phi[act_idx[i],
                     i * self.z_dim:(i + 1) * self.z_dim] = Z[act_idx[i]]
            self.Phi_pi[:, i * self.z_dim:(i + 1) *
                        self.z_dim] = self.pi_current[:, i][:, None] * Z
            self.Phi_prime_pi[:, i * self.z_dim:(i + 1) *
                              self.z_dim] = self.pi_next[:, i][:,
                                                               None] * Z_prime
            self.Phi_init_pi[:, i * self.z_dim:(i + 1) *
                             self.z_dim] = self.pi_init[:, i][:, None] * Z_init
            self.Phi_term_pi[:, i * self.z_dim:(i + 1) *
                             self.z_dim] = self.pi_term[:, i][:, None] * Z_term

        #* Some commonly used variables
        self.I_sa = np.eye(self.act_dim * self.z_dim)
        self.rews = dataset['rews']
        self.init_idx = np.arange(0, self.n_samples, self.horizon)
        self.end_idx = np.arange(self.horizon - 1, self.n_samples,
                                 self.horizon)

        self.rho = dataset[
            'ratio']  #* make sure that the importance weights are already calculated

        #* set-up network
        #! consider representing the actions better
        if self.input_mode == 'sa':
            if self.act_dim == 2:
                acts = dataset[
                    'acts'] * 2 - 1  # turn the actions into [-1,1] for binary action case
                self.x = torch.tensor(np.concatenate((self.obs, acts), axis=1))
                self.w_net = Simple_MLP(input_dim = self.obs_dim+1, output_dim = 1, hidden_layers = hidden_layers,\
                    activation= activation, output_transform = output_transform)
            else:
                raise NotImplementedError
        elif self.input_mode == 's':
            self.x = torch.tensor(self.obs)
            self.w_net = Simple_MLP(input_dim = self.obs_dim, output_dim = 1, hidden_layers = hidden_layers,\
                activation = activation, output_transform = output_transform)

        self.form_td_ball()
        self.prepare_torch_tensor()
Пример #48
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from tpot.builtins import DatasetSelector
from xgboost import XGBClassifier

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=42)

# Average CV score on the training set was:0.7117612161661105
exported_pipeline = make_pipeline(
    DatasetSelector(sel_subset=4, subset_list="subsets.csv"),
    RBFSampler(gamma=0.8),
    XGBClassifier(learning_rate=0.1, max_depth=7, min_child_weight=4, n_estimators=100, nthread=1, subsample=1.0)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #49
0
train_X=np.random.rand(n)*0.5+0.1
train_X=np.expand_dims(train_X,axis=1)
train_y=np.sqrt(train_X*(1-train_X))*np.sin(1.1*np.pi/(train_X+0.05))


n=100
test_X=np.random.rand(n)*0.5+0.1
test_X=np.expand_dims(test_X,axis=1)
test_y=np.sqrt(test_X*(1-test_X))*np.sin(2.1*np.pi/(test_X+0.05))





for gamm in [500000]:
    feature_map_fourier = RBFSampler(gamma=gamm, random_state=1)
    fourier_approx_linear = pipeline.Pipeline([("feature_map", feature_map_fourier), ("svm",LinearRegression())])
    train_acc = []
    test_acc = []   
    features=np.linspace(1,300,301,dtype=int)
    for D in features:
        fourier_approx_linear.set_params(feature_map__n_components=D)
        fourier_approx_linear.fit(train_X, train_y)
        '''
        X_plot=np.expand_dims(np.linspace(0.1,0.6,10000),axis=1)
        y_plot=fourier_approx_linear.predict(X_plot)
        plt.plot(X_plot,y_plot,label=str(D))
        plt.scatter(train_X,train_y,label=str(D))
        plt.show()     '''
        train_acc.append(np.sqrt(np.mean((fourier_approx_linear.predict(train_X)-train_y)**2)))
        test_acc.append(np.sqrt(np.mean((fourier_approx_linear.predict(test_X)-test_y)**2)))
Пример #50
0
    def __init__(self,
                 dataset,
                 obs_dim,
                 act_dim,
                 gamma,
                 horizon,
                 value_reg,
                 default_length_scale=0.2,
                 random_feature_per_obs_dim=250,
                 norm=None,
                 scale_length_adjustment='median',
                 dtype=np.float32,
                 policy_net=None,
                 separate_action_indexing=False,
                 action_encoding_scheme='continuous'):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.gamma = gamma
        self.horizon = horizon
        self.norm = norm
        self.policy_net = policy_net
        self.value_reg = value_reg
        self.dtype = dtype
        self.separate_action_indexing = separate_action_indexing
        self.action_encoding_scheme = action_encoding_scheme

        self.n_samples = dataset['obs'].shape[0]
        self.n_episode = dataset['init_obs'].shape[0]

        self.non_terminal_idx = (dataset['info'] == False)[:, 0]
        self.n_samples_non_terminal = self.non_terminal_idx.sum()
        self.data_acts = dataset['acts'][self.non_terminal_idx]

        if self.policy_net is not None:
            self.pi_current = self.policy_net.get_probabilities(dataset['obs'])
            self.pi_next = self.policy_net.get_probabilities(
                dataset['next_obs'])
            self.pi_init = self.policy_net.get_probabilities(
                dataset['init_obs'])
            self.pi_term = self.policy_net.get_probabilities(
                dataset['term_obs'])
        else:
            self.pi_current = dataset['target_prob_obs'][self.non_terminal_idx]
            self.pi_next = dataset['target_prob_next_obs'][
                self.non_terminal_idx]
            self.pi_init = dataset['target_prob_init_obs']
            self.pi_term = dataset['target_prob_term_obs']
        if self.norm is None:
            self.obs = dataset['obs'][self.non_terminal_idx]
            self.next_obs = dataset['next_obs'][self.non_terminal_idx]
            self.init_obs = dataset['init_obs']
            self.term_obs = dataset['term_obs']
        elif self.norm == 'std':
            self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True)
            self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True)
            self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std
            self.next_obs = (dataset['next_obs'] -
                             self.obs_mean) / self.obs_std
            self.init_obs = (dataset['init_obs'] -
                             self.obs_mean) / self.obs_std
            self.term_obs = (dataset['term_obs'] -
                             self.obs_mean) / self.obs_std
        else:
            raise NotImplementedError
        # pdb.set_trace()
        #* what if we only whiten over the non-terminal tuples
        non_terminal_idx = (dataset['info'] == False)[:, 0]
        obs_mean = np.mean(dataset['obs'][non_terminal_idx],
                           axis=0,
                           keepdims=True)
        obs_std = np.std(dataset['obs'][non_terminal_idx],
                         axis=0,
                         keepdims=True)
        # #* re-whiten the observations:
        self.obs = (self.obs - obs_mean) / obs_std
        self.next_obs = (self.next_obs - obs_mean) / obs_std
        self.init_obs = (self.init_obs - obs_mean) / obs_std
        self.term_obs = (self.term_obs - obs_mean) / obs_std

        #* if not separate action indexing, we are concatenating (s,a) as input
        if not self.separate_action_indexing:
            if self.action_encoding_scheme == 'continuous':
                encoded_actions = np.linspace(-1, 1, self.act_dim)
                # mean_action = np.mean(encoded_actions[self.data_acts[non_terminal_idx]])
                # std_action = np.std(encoded_actions[self.data_acts[non_terminal_idx]])
                mean_action = np.mean(encoded_actions[self.data_acts])
                std_action = np.std(encoded_actions[self.data_acts])

                self.encoded_actions = (encoded_actions -
                                        mean_action) / std_action

                # self.act = (self.data_acts / (self.act_dim-1)) * 2 -1
                # self.act = (self.act - np.mean(self.act, axis=0, keepdims=True))/np.std(self.act, axis=0, keepdims=True)
                self.act = self.encoded_actions[self.data_acts]

                self.input = np.concatenate((self.obs, self.act), axis=1)
                self.input_dim = self.input.shape[1]
            else:
                raise NotImplementedError
        else:
            self.input = self.obs
            self.input_dim = self.obs.shape[1]

        if scale_length_adjustment == 'median':
            sample_num = 5000
            # idx1 = np.random.choice(self.n_samples, sample_num); idx2 = np.random.choice(self.n_samples, sample_num)
            # idx1 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num); idx2 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num)
            idx1 = np.random.choice(self.n_samples_non_terminal, sample_num)
            idx2 = np.random.choice(self.n_samples_non_terminal, sample_num)
            # med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis = (0,1))
            med_dist = np.median(np.square(self.input[None, idx1, :] -
                                           self.input[idx2, None, :]),
                                 axis=(0, 1))
            med_dist[
                med_dist <
                0.01] = 0.01  # enforce a upperbound on the scale-length of the action component
            self.scale_length_vector = 1.0 / med_dist
        else:
            # scale_length_vector = np.ones(self.obs_dim)
            self.scale_length_vector = np.ones(self.input_dim)

        # self.scale_length_vector = np.linspace(1,2,5)
        self.scale_length_vector = np.ones(self.input_dim)
        self.z_dim = random_feature_per_obs_dim * self.input_dim
        self.rff = RBFSampler(n_components=self.z_dim,
                              gamma=default_length_scale)
        self.rff.fit([self.input[0]])
        # #* set the fourier feature
        # transformer_list = []
        # # self.z_dim = random_feature_per_obs_dim * self.obs_dim
        # self.z_dim = random_feature_per_obs_dim * self.input_dim
        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = FeatureUnion(transformer_list)

        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale)]
        # self.rff.fit([self.input[0]])

        #* Some commonly used variables
        # self.I_sa = np.eye(self.act_dim*self.z_dim)
        self.rews = dataset['rews'][self.non_terminal_idx]
        # self.init_idx = np.arange(0, self.n_samples, self.horizon)
        # self.end_idx = np.arange(self.horizon-1, self.n_samples, self.horizon)

        self.rho = dataset['ratio'][
            self.
            non_terminal_idx]  #* make sure that the importance weights are already calculated
Пример #51
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import DatasetSelector

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=39)

# Average CV score on the training set was:0.6592436040044494
exported_pipeline = make_pipeline(
    DatasetSelector(sel_subset=4, subset_list="subsets.csv"),
    RBFSampler(gamma=0.30000000000000004),
    DecisionTreeClassifier(criterion="gini",
                           max_depth=1,
                           min_samples_leaf=15,
                           min_samples_split=20))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #52
0
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

env = gym.envs.make('MountainCar-v0')

env.observation_space.sample()

observation_examples = np.array(
    [env.observation_space.sample() for x in range(10000)])

scaler = sklearn.preprocessing.StandardScaler()

featurizer = sklearn.pipeline.FeatureUnion([
    ('rbf1', RBFSampler(gamma=5.0, n_components=100)),
    ('rbf2', RBFSampler(gamma=2.0, n_components=100)),
    ('rbf3', RBFSampler(gamma=1.0, n_components=100)),
    ('rbf4', RBFSampler(gamma=0.5, n_components=100))
])

featurizer.fit(scaler.fit_transform(observation_examples))


class FunctionApproximator():
    def __init__(self):

        self.models = []
        for i in range(env.action_space.n):
            model = SGDRegressor(learning_rate="constant")
            model.partial_fit([self.featurize_state(env.reset())], [0])
Пример #53
0
class LSTDQ_Kernel():
    def __init__(self,
                 dataset,
                 obs_dim,
                 act_dim,
                 gamma,
                 horizon,
                 value_reg,
                 default_length_scale=0.2,
                 random_feature_per_obs_dim=250,
                 norm=None,
                 scale_length_adjustment='median',
                 dtype=np.float32,
                 policy_net=None,
                 separate_action_indexing=False,
                 action_encoding_scheme='continuous'):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.gamma = gamma
        self.horizon = horizon
        self.norm = norm
        self.policy_net = policy_net
        self.value_reg = value_reg
        self.dtype = dtype
        self.separate_action_indexing = separate_action_indexing
        self.action_encoding_scheme = action_encoding_scheme

        self.n_samples = dataset['obs'].shape[0]
        self.n_episode = dataset['init_obs'].shape[0]

        self.non_terminal_idx = (dataset['info'] == False)[:, 0]
        self.n_samples_non_terminal = self.non_terminal_idx.sum()
        self.data_acts = dataset['acts'][self.non_terminal_idx]

        if self.policy_net is not None:
            self.pi_current = self.policy_net.get_probabilities(dataset['obs'])
            self.pi_next = self.policy_net.get_probabilities(
                dataset['next_obs'])
            self.pi_init = self.policy_net.get_probabilities(
                dataset['init_obs'])
            self.pi_term = self.policy_net.get_probabilities(
                dataset['term_obs'])
        else:
            self.pi_current = dataset['target_prob_obs'][self.non_terminal_idx]
            self.pi_next = dataset['target_prob_next_obs'][
                self.non_terminal_idx]
            self.pi_init = dataset['target_prob_init_obs']
            self.pi_term = dataset['target_prob_term_obs']
        if self.norm is None:
            self.obs = dataset['obs'][self.non_terminal_idx]
            self.next_obs = dataset['next_obs'][self.non_terminal_idx]
            self.init_obs = dataset['init_obs']
            self.term_obs = dataset['term_obs']
        elif self.norm == 'std':
            self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True)
            self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True)
            self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std
            self.next_obs = (dataset['next_obs'] -
                             self.obs_mean) / self.obs_std
            self.init_obs = (dataset['init_obs'] -
                             self.obs_mean) / self.obs_std
            self.term_obs = (dataset['term_obs'] -
                             self.obs_mean) / self.obs_std
        else:
            raise NotImplementedError
        # pdb.set_trace()
        #* what if we only whiten over the non-terminal tuples
        non_terminal_idx = (dataset['info'] == False)[:, 0]
        obs_mean = np.mean(dataset['obs'][non_terminal_idx],
                           axis=0,
                           keepdims=True)
        obs_std = np.std(dataset['obs'][non_terminal_idx],
                         axis=0,
                         keepdims=True)
        # #* re-whiten the observations:
        self.obs = (self.obs - obs_mean) / obs_std
        self.next_obs = (self.next_obs - obs_mean) / obs_std
        self.init_obs = (self.init_obs - obs_mean) / obs_std
        self.term_obs = (self.term_obs - obs_mean) / obs_std

        #* if not separate action indexing, we are concatenating (s,a) as input
        if not self.separate_action_indexing:
            if self.action_encoding_scheme == 'continuous':
                encoded_actions = np.linspace(-1, 1, self.act_dim)
                # mean_action = np.mean(encoded_actions[self.data_acts[non_terminal_idx]])
                # std_action = np.std(encoded_actions[self.data_acts[non_terminal_idx]])
                mean_action = np.mean(encoded_actions[self.data_acts])
                std_action = np.std(encoded_actions[self.data_acts])

                self.encoded_actions = (encoded_actions -
                                        mean_action) / std_action

                # self.act = (self.data_acts / (self.act_dim-1)) * 2 -1
                # self.act = (self.act - np.mean(self.act, axis=0, keepdims=True))/np.std(self.act, axis=0, keepdims=True)
                self.act = self.encoded_actions[self.data_acts]

                self.input = np.concatenate((self.obs, self.act), axis=1)
                self.input_dim = self.input.shape[1]
            else:
                raise NotImplementedError
        else:
            self.input = self.obs
            self.input_dim = self.obs.shape[1]

        if scale_length_adjustment == 'median':
            sample_num = 5000
            # idx1 = np.random.choice(self.n_samples, sample_num); idx2 = np.random.choice(self.n_samples, sample_num)
            # idx1 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num); idx2 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num)
            idx1 = np.random.choice(self.n_samples_non_terminal, sample_num)
            idx2 = np.random.choice(self.n_samples_non_terminal, sample_num)
            # med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis = (0,1))
            med_dist = np.median(np.square(self.input[None, idx1, :] -
                                           self.input[idx2, None, :]),
                                 axis=(0, 1))
            med_dist[
                med_dist <
                0.01] = 0.01  # enforce a upperbound on the scale-length of the action component
            self.scale_length_vector = 1.0 / med_dist
        else:
            # scale_length_vector = np.ones(self.obs_dim)
            self.scale_length_vector = np.ones(self.input_dim)

        # self.scale_length_vector = np.linspace(1,2,5)
        self.scale_length_vector = np.ones(self.input_dim)
        self.z_dim = random_feature_per_obs_dim * self.input_dim
        self.rff = RBFSampler(n_components=self.z_dim,
                              gamma=default_length_scale)
        self.rff.fit([self.input[0]])
        # #* set the fourier feature
        # transformer_list = []
        # # self.z_dim = random_feature_per_obs_dim * self.obs_dim
        # self.z_dim = random_feature_per_obs_dim * self.input_dim
        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = FeatureUnion(transformer_list)

        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale)]
        # self.rff.fit([self.input[0]])

        #* Some commonly used variables
        # self.I_sa = np.eye(self.act_dim*self.z_dim)
        self.rews = dataset['rews'][self.non_terminal_idx]
        # self.init_idx = np.arange(0, self.n_samples, self.horizon)
        # self.end_idx = np.arange(self.horizon-1, self.n_samples, self.horizon)

        self.rho = dataset['ratio'][
            self.
            non_terminal_idx]  #* make sure that the importance weights are already calculated
        # pdb.set_trace()

    def estimate(self):
        if self.separate_action_indexing:
            value_est = self.estimate_LSTDQ_separate_action_indexing()
        else:
            value_est = self.estimate_LSTDQ_concat_sa_input()
        return value_est

    def estimate_LSTDQ_concat_sa_input(self):
        # transformed_action = np.linspace(-1,1, self.act_dim)
        # n_samples = self.non_terminal_idx.sum()
        a_prime = np.tile(self.encoded_actions,
                          self.n_samples_non_terminal)[:, np.newaxis]
        # a_prime = np.tile(self.encoded_actions, self.n_samples)[:,np.newaxis]
        x_prime = np.concatenate(
            (np.repeat(self.next_obs, self.act_dim, axis=0), a_prime), axis=1)
        # a0_expanded = np.tile(transformed_action,self.n_episode)[:,np.newaxis]
        a0_expanded = np.tile(self.encoded_actions, self.n_episode)[:,
                                                                    np.newaxis]
        x0 = np.concatenate(
            (np.repeat(self.init_obs, self.act_dim, axis=0), a0_expanded),
            axis=1)
        # aterm_expanded = np.tile(transformed_action, self.n_episode)[:,np.newaxis]
        aterm_expanded = np.tile(self.encoded_actions,
                                 self.n_episode)[:, np.newaxis]
        xterm = np.concatenate(
            (np.repeat(self.term_obs, self.act_dim, axis=0), aterm_expanded),
            axis=1)

        Z = self.rff.transform(self.input).astype(self.dtype)
        Z_prime = self.rff.transform(x_prime).astype(self.dtype)
        aprime_probs = self.pi_next.flatten()[:, np.newaxis]
        Z_prime = Z_prime * aprime_probs
        Z_prime = Z_prime.reshape((self.n_samples_non_terminal, self.act_dim,
                                   self.z_dim)).sum(axis=1)

        reg = self.value_reg

        regularized_inverse = np.linalg.inv(
            np.matmul(Z.T, Z - self.gamma * Z_prime) +
            reg * np.eye(self.z_dim))
        featurized_reward = np.matmul(Z.T, self.rews)
        value_coef = np.matmul(regularized_inverse, featurized_reward)

        Z0 = self.rff.transform(x0)
        Q0 = np.matmul(Z0, value_coef)

        Z_term = self.rff.transform(xterm)
        Q_term = np.matmul(Z_term, value_coef)

        V_init = (Q0 * self.pi_init.flatten()[:, np.newaxis]).reshape(
            (self.n_episode, self.act_dim)).sum(axis=1)
        V_term = (Q_term * self.pi_term.flatten()[:, np.newaxis]).reshape(
            (self.n_episode, self.act_dim)).sum(axis=1)
        V_traj = V_init - V_term * self.gamma**self.horizon
        value_est = np.mean(V_traj)
        # pdb.set_trace()
        return value_est

    def estimate_LSTDQ_separate_action_indexing(self):
        #* separate action set indexing
        act_idx = []
        for i in range(self.act_dim):
            act_idx.append(np.where(self.data_acts == i)[0])
        #* apply transformation
        Z = self.rff.transform(self.obs).astype(self.dtype)
        Z_prime = self.rff.transform(self.next_obs).astype(self.dtype)
        Z_init = self.rff.transform(self.init_obs).astype(self.dtype)
        Z_term = self.rff.transform(self.term_obs).astype(self.dtype)
        # import pdb; pdb.set_trace()
        assert self.z_dim == Z.shape[1]
        Phi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim),
                       dtype=self.dtype)
        Phi_pi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim),
                          dtype=self.dtype)
        Phi_prime_pi = np.zeros(
            (Z_prime.shape[0], Z_prime.shape[1] * self.act_dim),
            dtype=self.dtype)
        Phi_init_pi = np.zeros(
            (Z_init.shape[0], Z_init.shape[1] * self.act_dim),
            dtype=self.dtype)
        Phi_term_pi = np.zeros(
            (Z_term.shape[0], Z_term.shape[1] * self.act_dim),
            dtype=self.dtype)
        for i in range(self.act_dim):
            Phi[act_idx[i],
                i * self.z_dim:(i + 1) * self.z_dim] = Z[act_idx[i]]
            Phi_pi[:, i * self.z_dim:(i + 1) *
                   self.z_dim] = self.pi_current[:, i][:, None] * Z
            Phi_prime_pi[:, i * self.z_dim:(i + 1) *
                         self.z_dim] = self.pi_next[:, i][:, None] * Z_prime
            Phi_init_pi[:, i * self.z_dim:(i + 1) *
                        self.z_dim] = self.pi_init[:, i][:, None] * Z_init
            Phi_term_pi[:, i * self.z_dim:(i + 1) *
                        self.z_dim] = self.pi_term[:, i][:, None] * Z_term

        I_sa = np.eye(self.act_dim * self.z_dim, dtype=self.dtype)

        regularized_inverse = np.linalg.inv(
            np.matmul(Phi.T, Phi - self.gamma * Phi_prime_pi) +
            self.value_reg * I_sa)
        featurized_reward = np.matmul(Phi.T, self.rews)
        reward_coef = np.matmul(regularized_inverse, featurized_reward)
        V_init = Phi_init_pi @ reward_coef
        V_term = Phi_term_pi @ reward_coef
        V_traj = V_init - V_term * self.gamma**self.horizon
        value_est = np.mean(V_traj)
        # import pdb; pdb.set_trace()
        return value_est
Пример #54
0
 #prop['probability_' + ps] = []
 #prop['class_' + ps] = []
 # restore classifier set from file
 classifier = joblib.load('data/' + algorithm + '-' + ps + '.pkl') 
 
 # restore robust scaler from file
 robust_scaler = joblib.load('data/rs-' + algorithm + '-' + ps + '.pkl') 
 
 # restore classes from file
 classes = joblib.load('data/classes-' + algorithm + '-' + ps + '.pkl') 
 
 
 cstatus = robust_scaler.transform(cstatus_orig)
 
 if algorithm == 'kernel-approx':
     rbf_feature = RBFSampler(gamma=1, random_state=1)
     cstatus = rbf_feature.fit_transform(cstatus)
     
 prob = None
 if algorithm == 'one-vs-rest' or algorithm == 'linear-svm':
     f = np.vectorize(platt_func)
     raw_predictions = classifier.decision_function(cstatus)
     platt_predictions = f(raw_predictions)
     prob = platt_predictions / platt_predictions.sum(axis=1)
     #prob = prob.tolist()
     
 else:
     prob = classifier.predict_proba(cstatus).tolist()
     
 
 for i in range(0,len(classes)):
Пример #55
0
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from tpot.builtins import DatasetSelector

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=87)

# Average CV score on the training set was:0.6941861327400816
exported_pipeline = make_pipeline(
    DatasetSelector(sel_subset=17, subset_list="subsets.csv"),
    RBFSampler(gamma=0.65),
    RandomForestClassifier(bootstrap=True,
                           criterion="gini",
                           max_features=0.5,
                           min_samples_leaf=1,
                           min_samples_split=11,
                           n_estimators=100))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #56
0
import sys
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn import svm
from sklearn.kernel_approximation import RBFSampler
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.grid_search import GridSearchCV


DIMENSION = 400  # Dimension of the original data.
CLASSES = (-1, +1)   # The classes that we are trying to predict.

chi_feature = AdditiveChi2Sampler(sample_steps=1)
chi_feature.fit(np.zeros([1,400]))
rbf = RBFSampler(n_components = 15*DIMENSION, random_state = 1)
rbf.fit(np.zeros([1,400]))

def transform(x_original):
    out = np.concatenate(([1], rbf.transform(chi_feature.transform(x_original)[0])[0]))
    return out

if __name__ == "__main__":
    X = []
    Y = []
    # initialize stochastic gradiant descent    
    cls = SGDClassifier(alpha = 0.0001, fit_intercept=False, n_iter = 15, penalty = "l2", warm_start = "True")
    for line in sys.stdin:
        line = line.strip()
        (label, x_string) = line.split(" ", 1)
        label = int(label)
Пример #57
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import DatasetSelector

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=63)

# Average CV score on the training set was:0.7008379681127179
exported_pipeline = make_pipeline(
    DatasetSelector(sel_subset=1, subset_list="subsets.csv"),
    RBFSampler(gamma=0.2),
    DecisionTreeClassifier(criterion="gini", max_depth=6, min_samples_leaf=3, min_samples_split=8)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #58
0
class ExposeDetector(AnomalyDetector):

  """ This detector is an implementation of The EXPoSE (EXPected Similarity
  Estimation) algorithm as described in Markus Schneider, Wolfgang Ertel,
  Fabio Ramos, "Expected Similarity Estimation for Lage-Scale Batch and
  Streaming Anomaly Detection", arXiv 1601.06602 (2016).

  EXPoSE calculates the likelihood of a data point being normal by using
  the inner product of its feature map with kernel embedding of previous data
  points. This measures the similarity of a data point to previous points
  without assuming an underlying data distribution.

  There are three EXPoSE variants: incremental, windowing and decay. This
  implementation is based on EXPoSE with decay. All three variants have been
  tried on NAB but decay gives the best results.Parameters for this detector
  have been tuned to give the best performance.
  """

  def __init__(self, *args, **kwargs):
    super(ExposeDetector, self).__init__(*args, **kwargs)

    self.kernel = None
    self.previousExposeModel = []
    self.decay = 0.01
    self.timestep = 0


  def initialize(self):
    """Initializes RBFSampler for the detector"""
    self.kernel = RBFSampler(gamma=0.5,
                             n_components=20000,
                             random_state=290)


  def handleRecord(self, inputData):
    """ Returns a list [anomalyScore] calculated using a kernel based
    similarity method described in the comments below"""

    # Transform the input by approximating feature map of a Radial Basis
    # Function kernel using Random Kitchen Sinks approximation
    inputFeature = self.kernel.fit_transform(
      numpy.array([[inputData["value"]]]))

    # Compute expose model as a weighted sum of new data point's feature
    # map and previous data points' kernel embedding. Influence of older data
    # points declines with the decay factor.
    if self.timestep == 0:
      exposeModel = inputFeature
    else:
      exposeModel = ((self.decay * inputFeature) + (1 - self.decay) *
                     self.previousExposeModel)

    # Update previous expose model
    self.previousExposeModel = exposeModel

    # Compute anomaly score by calculating similarity of the new data point
    # with expose model. The similarity measure, calculated via inner
    # product, is the likelihood of data point being normal. Resulting
    # anomaly scores are in the range of -0.02 to 1.02.
    anomalyScore = numpy.asscalar(1 - numpy.inner(inputFeature, exposeModel))
    self.timestep += 1

    return [anomalyScore]
Пример #59
0
import numpy as np
import pandas as pd
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=42)

# Score on the training set was:-0.6922854322689991
exported_pipeline = make_pipeline(
    RBFSampler(gamma=0.8), LogisticRegression(C=0.01, dual=False,
                                              penalty="l1"))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
Пример #60
0
def transform(x_original, make_np=True):
    orig = x_original
    variances_str = "0.0021246993507595866 0.0032713784391997795 0.0033522806931598247 0.0017432450192796278 0.0034743692038798537 0.003637888546929857 0.0019210039127597624 0.0021841610994196136 0.0018762718393396005 0.0034590054363498003 0.0052604099446999682 0.004508790286140099 0.0035272400244497799 0.0030404807453598324 0.0022447918038096385 0.0017851536926196112 0.0021643550482296344 0.0037976255097098874 0.0025753731081197833 0.0029230906247597055 0.0060828219621099217 0.0023575999971396813 0.0043864294801700945 0.0071589655821691772 0.0036986840015399082 0.00057556662468004468 0.0030184163825898096 0.0062797556933995476 0.0018388575003994976 0.0018222650139394971 0.0032805952842698042 0.0035132540814598752 0.0024659598304896477 0.0026319448493497136 0.003572205969799843 0.0030648003435798008 0.0021365654833496528 0.0012356635529695108 0.0021261889005796605 0.0030134591283298012 0.0016100815367798148 0.012523000339860027 0.002519218599329652 0.0052571679389798714 0.0026606913287896975 0.0028296754183797139 0.0039323969569099605 0.0020691205227195992 0.0030826525382697508 0.0020232189983895653 0.0040679867872599708 0.0018371556472196301 0.0031808009477497599 0.0034889724135098699 0.0041241983089198644 0.003466312111199805 0.00070525738208999413 0.0012962120699994075 0.0023748498468496439 0.0039468429845199238 0.0024428431670496745 0.012215355168679928 0.0012535008249493743 0.0026764566235297597 0.0043243784063398552 0.00065200872076008631 0.0022265717804095869 0.00081018893256987797 0.0027757838127496974 0.0011937874021293784 0.0033124457059298595 0.0033779817461398022 0.0026583629339898352 0.00096654598538961438 0.0021773139189896237 0.002624655562289701 0.0015705430665195477 0.0030252402714297136 0.0040940954038199478 0.0027594978981697318 0.00079096095234988185 0.0026036506797997572 0.0027190828795197546 0.0027920414767097406 0.0018699793252895133 0.002401434445989645 0.0031948320317497989 0.0028928477797297309 0.001254727068959367 0.0022096979193596154 0.0021709718136396741 0.0022725767293796106 0.0036734258169697923 0.0028088068982497589 0.00058128786511008252 0.0030860261422598389 0.0028005311404197221 0.0013144850578592786 0.00075680244248994735 0.003594669478579891 0.0032807255223097792 0.0023280524667396774 0.00318162350717981 0.0038591178877899067 0.0027019215482496691 0.00097254474824969451 0.0023080437106096615 0.0013613457456093684 0.0045951612643399054 0.0038485342457099387 0.00043219164003003777 0.001528150938759669 0.0024822021413396867 0.0018061700621995042 0.0028432498431096936 0.00055539853847006056 0.004173783897349969 0.0023134058954397316 0.0035923805665898493 0.001944158411359583 0.0022174885522996423 0.0021200232347196586 0.0014086675440495285 0.0060588732600395838 0.0020999206563196006 0.002311535350179601 0.0012081675861494046 0.0029662122591298679 0.0023064668532896651 0.00086526146860972403 0.0035453290259598483 0.0022721631862096265 0.003677016888759915 0.0027193153269396897 0.0019698620481495626 0.002072663196939612 0.0014700221401894075 0.00017158202360999703 0.0022463464680696336 0.0035194326419099174 0.0030686680423197867 0.002374867405639663 0.00042710055163003362 0.0030035550561797468 0.0037270432987298683 0.0024282900953096712 0.00086048954793975898 0.0011186456857096038 0.0027912283038996942 0.0013746583237494142 0.0043072999357398533 0.0032034503423598666 0.0025760441755196838 0.0023421858856196836 0.002131599313139612 0.0029099423010796777 0.0016998768135196812 0.0028229397603697181 0.0030535556897598208 0.003180828002529861 0.0041489816552998261 0.00056885910910004086 0.0030288286590998306 0.0002859839918500021 0.0025907458249397565 0.0019840401991995621 0.0026709580203396733 0.00018365706286999837 0.0023102737736697076 0.0019214511389595858 0.0035872736249698512 0.0030397738456597189 0.00058895044087008347 0.0011302665188195724 0.0036135427626998772 0.00096930456685965713 0.0018706273234795688 0.0028471338214996859 0.0040263350593498478 0.00041504772780003257 0.00088363138039978097 0.0027967429290597077 0.0021579785680196756 0.0032100556617598404 0.0012821952431594156 0.0011697489935395071 0.0024514963691797428 0.0024098468797296444 0.0035879574826698079 0.003169685177989759 0.0053416716965498916 0.0031958328667698248 0.003017081933489743 0.00025151594039000199 0.0022886760678696417 0.0022956408480896266 0.0041254424031998971 0.0023694221563096735 0.0047916681473398276 0.00051616048678002784 0.0009364954557196728 0.00036740167022002141 0.00078959433233993142 0.0007410161818699483 0.0028233597298397656 0.0035765694441198263 0.0057271246152496317 0.0031925037529198339 0.0014168537242193022 0.0040282638127298667 0.0027408330144697043 0.0022817147531596685 0.0013110019340695283 0.0031049698000498423 0.0014794847673093696 0.0019060075812395761 0.0023860511557697102 0.0025873434738996485 0.0018797211826496064 0.0022561836261797042 0.0041991871207300085 0.0023698767044296855 0.0022702994190196093 0.0029535219055797368 0.00079702808800984168 0.00017141315798999718 0.00071072931258999632 0.0022027503444296218 0.0023522028982396696 0.00034261818457001714 0.0030124186968896794 0.0040563730303498731 0.0018014168708095377 0.0045389503904098493 0.0025631905209596659 0.0021709940360196437 0.0031014667275497628 0.0013724805472092871 0.0011206960384995625 0.0035493743115597959 0.0025190975770797062 0.0042803605014598489 0.0009058812431398496 0.0015261301214595528 0.00043206103726003953 0.002057161621769605 0.0029676093005998037 0.00059245340563008307 0.00060303803797007931 0.0055309290333298089 0.0006063130560400961 0.0024001375326397033 0.0051570050648799921 0.003216390780179791 0.001684353076369542 0.0024379539857596923 0.0033096221900098537 0.001808786421229587 0.00036056760674001951 0.0022999638755596282 0.0042300521607298008 0.0033374014801298532 0.0041061077925497727 0.00090300806356967953 0.0027771389140698217 0.0020966763969595594 0.0022364965134396191 0.0021630341014396426 0.003362866027789783 0.0025631540862897312 0.004191926116449857 0.0023811464991296992 0.0013004726735392649 0.0038548863857898333 0.0021571933421396868 0.0032544925816697214 0.0024967996225797357 0.0039128733433798774 0.0032033546653597454 0.0035349716580698469 0.0022774309789496266 0.0020827816616296431 0.001280163236199224 0.0029566993924298487 0.0030278382394197082 0.0031458574724698 0.00073484413224997748 0.0023053594018396508 0.0032629178035998552 0.0028317322999097433 0.0033847674035998084 0.0021507182045496622 0.0013635142890994728 0.0025417732184397166 0.00046798538031004748 0.0014196850140693168 0.001775496716359453 0.00041823802366003517 0.00072539019745996431 0.0030393665008997704 0.0013936213581092793 0.0024650105378997201 0.0002448311107500009 0.0043274930097698871 0.0045290280761799487 0.0047295668273101684 0.0010168427077595955 0.0027754963934396339 0.0028934546900597821 0.0024947583902996968 0.0017947966152195337 0.002808371739829744 0.00080562592018981933 0.0014184058297892733 0.0018558152750695453 0.0018534208896895739 0.0027403346575797425 0.0031581041628497997 0.0019250669095596151 0.0017553527272695774 0.002912743471719791 0.00051881062016005577 0.0041509390442198381 0.0013269250644194269 0.002515913493569724 0.0032034703723998357 0.0015867479873494805 0.0033147417203898185 0.0032343107633697474 0.0016084849715195411 0.00041333437351003248 0.0015982072633194113 0.0014028860576195891 0.0022158183125796393 0.0029487353931697447 0.0028615529172198303 0.0012540566466694289 0.0028261495420197243 0.00017822631116999813 0.0014531231202394163 0.0025906615127396855 0.0036318312786498171 0.002825987395589701 0.0032132990932597881 0.00093148496318973544 0.0022986618991797251 0.0031201742482197584 0.0088757592945090114 0.0019739854059195429 0.0015964743898695729 0.0030620168350797899 0.0017549143672195243 0.0025403744949397296 0.0013998610671793503 0.00050658872377004334 0.0024219329259397276 0.0016578000335194041 0.0014255931402395057 0.0025947821308797258 0.0015455710208097471 0.0019424337106196282 0.0043638276133198444 0.0024791513534598046 0.002471546965979776 0.0032594199180097532 0.00081732890395981583 0.00047798563291005168 0.0026265644132597047 0.0029957660721997665 0.0033466747844698567 0.0030434931783497998 0.0032186603864098446 0.0025580746428896777 0.0074381240438289309 0.0026177068932397522 0.0010374525766094667 0.0018484145568895259 0.0032105816832397539 0.0025588880273796702 0.0011027058149395553 0.00165028316301944 0.0022621210840096185 0.00063843135713010388 0.002677249425599694 0.0011529594838495104 0.0020757956716295806 0.00063164132836008679 0.0012984328854694727 0.0030668599805997697 0.0013209850432293402 0.0017350537225995246 0.0027999960618096992 0.0045968238896799086 0.0015396509469794125 0.0026842448170297231 0.0020969214423495791 0.0032249556936598013 0.00029111348006000424 0.0011551860431694666 0.0031812251568797824"
    #mn = np.array(map(float, means_str.split(" ")))
    #mn = np.fromstring(means_str)
    variances = np.fromstring(variances_str, sep=' ')
    means_str = "3.8753948237858108e-06 1.2972946111794674e-05 1.2594051521366083e-05 5.0841523278404734e-06 1.8774317409263048e-05 6.2913210996917487e-05 1.269807222669888e-05 3.2193349475262057e-06 6.5226200570272061e-06 1.1473588836338628e-05 2.7180466935587737e-05 1.4762302565458717e-05 3.3722317512532468e-05 6.8216505240041436e-06 7.1028116499628903e-06 6.5493827073439618e-06 3.80367131264172e-06 1.4028847130371071e-05 9.3773632055309283e-06 6.493323349342037e-06 0.0012533506935897218 4.9911335763841195e-06 1.2793399333055094e-05 7.251611930188133e-05 9.5489822043414659e-06 3.8895300628186868e-06 4.173457402556971e-05 0.00011347419063456421 2.5715278760111459e-06 3.2518257183024889e-06 1.1746203655396577e-05 5.564016383146592e-05 3.6296631509353909e-06 4.3289811407316681e-06 1.6025500646546836e-05 8.7246747361516438e-06 4.2410327327645271e-06 4.3732089713098806e-06 5.9073865563619062e-06 2.4944097977347468e-05 2.6986158170267078e-05 0.00019357426874984057 5.1764074423215301e-06 3.5213588425492417e-05 5.6548098935816624e-06 4.9935937088475483e-06 2.3828362907972465e-05 3.521023866293484e-06 4.9870702736337188e-06 2.7658266039366798e-06 1.1424139609302174e-05 4.6380793952958809e-06 8.1857174384998292e-06 1.6642225648910047e-05 1.8268643132929127e-05 1.5473118685259949e-05 9.7616078787441458e-07 4.1097607144367696e-06 5.0459663323074957e-06 8.1752036387080678e-06 6.2517426726346483e-06 0.00021128251533625498 2.4441154311918049e-06 1.0193291769369655e-05 1.6000078417860217e-05 1.3360615760691735e-06 3.9318274983244583e-06 3.7424801978201094e-06 4.5859948912655592e-06 2.1863893895928264e-06 1.4465960374765088e-05 1.226800721873276e-05 1.8464105024954982e-05 1.6648636202068534e-06 3.6936226607579947e-06 6.5624020308052344e-06 8.1339303452353934e-06 9.5047711128428641e-06 1.4246167594118415e-05 6.0140973294197884e-06 1.8256200156735017e-06 1.0903757639504039e-05 5.5080914174679564e-06 5.2142169736994904e-06 2.6292604236996645e-06 4.9623024158512934e-06 1.3171420269231491e-05 5.1064782563443342e-06 2.2201233797532346e-06 3.5523146873797785e-06 4.0447453033151591e-06 3.4393314844283629e-06 1.2283374778942664e-05 1.2292876875817127e-05 1.3500473667799135e-06 1.5982740863426082e-05 5.1149263226338105e-06 3.6545265412690049e-06 4.4324293930103502e-06 1.3464151551507424e-05 8.2607323905827565e-06 5.3487969307959027e-06 7.699747933440781e-06 2.6028092053793074e-05 4.6160336251911396e-06 3.4679078250202434e-06 4.1733322591036512e-06 5.3685295356327671e-06 4.1690461279070458e-05 1.8175584863744415e-05 1.4529974714941822e-06 8.9646541680474962e-06 3.8638936584656166e-06 2.9622882868516527e-06 7.0496709821419259e-06 3.1582263769680431e-06 9.405912339046591e-06 9.0755581225100531e-06 1.6325319116706371e-05 7.4249528783198223e-06 6.4142049677004635e-06 4.5308256388559377e-06 4.3379101302365048e-06 0.00010082767573262403 3.8073220474859233e-06 3.2462395975613701e-06 2.7311928376618711e-06 1.3798802536934602e-05 4.3141812822167945e-06 1.3418830948478911e-06 1.2429912124862659e-05 4.5075176921294976e-06 5.1646366657811792e-05 4.5044907401523191e-06 3.8984503442526084e-06 3.5443432542494581e-06 2.4525978397502771e-06 9.3143290305042167e-07 5.5977615024444758e-06 1.4190797086073543e-05 6.9561233764939789e-06 8.0114861452901582e-06 1.2454920191746878e-06 5.5587154982870272e-06 1.0799672251505274e-05 5.2959102834492533e-06 1.5685688647449261e-06 4.0529428722210623e-06 1.1678512895855624e-05 3.2192802988981066e-06 3.7209970472627806e-05 1.3342539819491425e-05 7.8622903069455567e-06 5.2192321914900928e-06 3.9052134579505441e-06 5.0680769571043553e-06 7.9552828837898563e-06 6.7762118492538826e-06 8.5875102642240075e-06 2.0992545616427373e-05 2.0487505271743291e-05 4.3745997535029968e-06 7.1046977878669946e-06 7.7167495498190023e-07 1.0141932308464567e-05 4.2219873766408028e-06 4.5710190852658248e-06 1.1970402654479661e-06 7.6102614732724262e-06 4.6239298630603015e-06 4.9995946799371758e-05 5.7956634809724437e-06 1.5024720589152287e-06 6.0635032731039673e-06 1.5391627780641011e-05 2.178652052162647e-06 4.2030056647134055e-06 5.0822379579415565e-06 1.9836303495641017e-05 1.8930994307717652e-06 3.0604158961858623e-06 6.5280625603827021e-06 1.0265727137904331e-05 4.0302422231094213e-05 3.7750836192671517e-06 4.0367914908354297e-06 8.0446362665366717e-06 4.7656248380853414e-06 8.6978972436276061e-06 7.9679700766206762e-06 2.9451374286812033e-05 1.3111273739035649e-05 6.6028118897700181e-06 1.4941804584231896e-06 3.9528326512917906e-06 3.847295383196301e-06 3.5756600152130488e-05 6.10565382283349e-06 3.5891435340776665e-05 3.6066217532076844e-06 2.0888559126779404e-06 1.0755002920858641e-06 5.8998610038911923e-06 1.9512692088167549e-06 5.0713400804749472e-06 1.4585512351101608e-05 4.822908984311966e-05 9.8016096252778945e-06 1.9911328814957375e-06 2.4764204976600043e-05 6.4805250037636707e-06 3.7935478658080509e-06 5.1083549212952252e-06 1.1189053457458745e-05 2.5200508287861594e-06 7.8373349366817099e-06 7.9847294470099685e-06 4.3095275213819756e-06 5.0268163315597379e-06 7.4832981742681862e-06 9.2408501776852945e-06 4.784135850487231e-06 5.6532252724841891e-06 5.3930817570733614e-06 1.2687973462442569e-06 1.0372124095824449e-06 4.1435096417718113e-06 3.9981959056867675e-06 4.3520178967986713e-06 1.4659748060826231e-06 5.3366902864809163e-06 3.1416765924193689e-05 2.9749844077512922e-06 3.1381515491784522e-05 4.3260417959669591e-06 4.197030498717592e-06 5.306570430382929e-06 1.8883854746421685e-06 4.1937519548496871e-06 6.9194038197555032e-06 7.1767073252994241e-06 2.4833484439498967e-05 5.8383610252210572e-06 3.6243330253608428e-06 1.18902799300137e-06 3.8963636200265115e-06 1.2883918919165478e-05 1.3605525456692033e-06 1.7407965336936251e-06 0.00016857768522088627 1.4100686994311071e-06 7.1668903489840609e-06 2.7108318215380169e-05 5.4590558436375845e-06 6.2033647867466643e-06 5.6859033955868132e-06 4.3241078188076546e-05 9.0432098151017242e-06 1.0594888618529579e-06 4.0484870451845699e-06 2.9548153849811755e-05 9.410471996079331e-06 2.1009809505791367e-05 1.4939978216919125e-06 2.1026313371938338e-05 2.912760631269843e-06 4.1130865661336849e-06 4.0964425120045752e-06 1.0334704132812778e-05 1.1639088987295558e-05 2.0866544215744135e-05 4.7665503013673322e-06 2.4282885077105844e-06 2.4696946110127049e-05 5.8943453758772547e-06 7.0559765519393299e-06 8.6495232917104309e-06 2.4674527585132413e-05 6.5466440985476235e-06 1.4291488938382783e-05 4.0363838996778781e-06 7.5171096440058871e-06 1.7659216070078882e-06 2.3552682868282767e-05 6.0075484731317116e-06 2.9678689121826856e-05 4.5688281985000224e-06 4.2587818969459276e-06 1.2282850125910679e-05 5.6981633973611215e-06 1.2193919548692016e-05 7.7909581862542261e-06 4.1995999932004883e-06 4.2310001927379966e-06 1.4034983645177226e-06 2.2253775626039904e-06 2.5484625453534006e-06 1.5024773624760737e-06 1.1886813960082901e-06 8.943485028332714e-06 1.802211533446878e-06 8.7804607030574995e-06 1.4714171056899874e-06 3.039182778117474e-05 2.9469599285561173e-05 1.6190782721728404e-05 2.1980748656966054e-06 9.1492500963304843e-06 1.3139192984142854e-05 5.5841754669416901e-06 3.2663084979403296e-06 7.8300182408015622e-06 1.4650747681293603e-06 1.8418132244867557e-06 3.1634249051793445e-06 4.2879811205541378e-06 6.821776038991282e-06 7.2547994800721606e-06 4.5762861000866325e-06 4.0033741553487421e-06 1.3944663969273685e-05 1.5205123797572826e-06 3.7950333845819879e-05 4.6914603422440762e-06 4.3642212832058213e-06 1.6888537402380868e-05 5.3097299301474431e-06 8.5974973592752354e-06 1.0183715675617148e-05 4.4233012671049924e-06 2.8020268713604479e-06 2.4903519176724564e-06 6.0367933560789913e-06 3.6066482258866671e-06 5.5465358638439433e-06 1.6406145480373579e-05 3.6475034103942783e-06 7.545922378704344e-06 1.0510117913470496e-06 6.383917613657175e-06 4.1469930879045612e-06 1.5104979761103841e-05 7.9249357960338965e-06 6.6303162793237734e-06 2.3058946881412919e-06 6.93384276789908e-06 6.2217404008410318e-06 0.00053927751612010478 2.7688222907463807e-06 5.1593082062395665e-06 9.4327080926443393e-06 3.3336519843947502e-06 5.1198323130590842e-06 4.5094438342118166e-06 2.6237274608190453e-06 5.1693775448212788e-06 2.1082108591551617e-06 6.8329929120308474e-06 6.2018823452726071e-06 2.1240415994925091e-05 4.0243827456115514e-06 3.0522891049621393e-05 1.4011920974680818e-05 1.7239640547533074e-05 1.7993086639426091e-05 1.4355334226673438e-06 1.1012319919274514e-06 1.0614538321433708e-05 7.2890435254277739e-06 8.5872764781091643e-06 1.3084966706891505e-05 1.1094006709484758e-05 4.2456925142930984e-06 3.6872244517667462e-05 1.0859154502284048e-05 1.5319903891298572e-06 2.7727900163087534e-06 7.2483213769211959e-06 5.1159362377455894e-06 2.6822480525986132e-06 2.889767166323531e-06 3.55288675821463e-06 1.3380456162305463e-06 5.2278105015869388e-06 2.3031150972921671e-06 4.1508531796520333e-06 1.8528326040776206e-06 6.3815646996712558e-06 6.9338240811962186e-06 2.5793558575700516e-06 4.3737400474318956e-06 6.0837447954729297e-06 4.7903414469400619e-05 2.8013740155544375e-06 4.7622560053896967e-06 3.250652556381526e-06 9.5664014501971676e-06 8.2542503434926804e-07 2.5912870572853299e-06 6.0526418572379129e-06"
    #variances = np.array(map(float, variances_str.split(" ")))
    means = np.fromstring(means_str, sep=' ')

    x_original = np.array(x_original)
    #x_original -= means
    #x_original /= variances
    x_original -= means
    x_original /= variances

    #x_original = np.delete(xxxx_original, features_ordered_by_importance2[-1:])
    #most_important_features1 = np.delete(x_original, features_ordered_by_importance2[5:])

    x = []

    def sqr(x):
        return x * x

    def sqr3(x):
        return x * x * x

    def e_pow(x):
        return math.exp(x)

    def me_pow(x):
        return math.exp(-x)

    def fred(x):
        return round(math.fabs(x) * 1000)

    def extend_x(arr, additions=True, extension=True):
        if extension:
            x.extend(arr)
        if additions:
            x.append(scipy.std(arr))
            x.append(scipy.var(arr))
            x.append(sum(arr) / len(arr))
            x.append(sum(np.abs(arr)) / len(arr))
            x.append(min(arr))
            x.append(max(arr))
            x.append(scipy.mean(arr))
            x.append(scipy.median(arr))


    def count_smaller_ratio(arr, delta):
        return sum(1 if el <= delta else 0 for el in arr) / len(arr)


    if True:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))

        #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
        #zzz = rbf_feature.fit_transform(np.array(x))[0]
        #extend_x(list(zzz))

    if False:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))

        sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1)
        zzz1 = sampler1.fit_transform(np.array(orig))[0]

        #sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1)
        #zzz2 = sampler2.fit_transform(np.array([i + 1.0 for i in x]))[0]

        sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20)
        zzz3 = sampler3.fit_transform(np.array(x))[0]
        x = []
        extend_x(x_original)
        #extend_x(np.abs(x_original))
        #extend_x(np.sqrt(np.abs(x_original)))

        extend_x(list(zzz1))
        #extend_x(list(zzz2))
        extend_x(list(zzz3))

    if False:
        #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=100)
        #zzz = rbf_feature.fit_transform(np.array(x_original))[0]
        #extend_x(list(zzz))
        pass

    if False:
        extend_x(x_original)
        extend_x(np.sqrt(np.abs(x_original)))
        extend_x(np.abs(x_original))


    #for i in x_original:
    #    print i
    #
    # x.append(count_smaller_ratio(x_original, 0.1))
    # x.append(count_smaller_ratio(x_original, 0.2))
    # x.append(count_smaller_ratio(x_original, 0.3))
    # x.append(count_smaller_ratio(x_original, 0.4))
    # x.append(count_smaller_ratio(x_original, 0.5))
    # x.append(count_smaller_ratio(x_original, 0.6))
    # x.append(count_smaller_ratio(x_original, 0.7))
    # x.append(count_smaller_ratio(x_original, 0.8))
    # x.append(count_smaller_ratio(x_original, 0.9))
    # x.append(count_smaller_ratio(x_original, 1.0))
    # x.append(count_smaller_ratio(x_original, -0.1))
    # x.append(count_smaller_ratio(x_original, -0.2))
    # x.append(count_smaller_ratio(x_original, -0.3))
    # x.append(count_smaller_ratio(x_original, -0.4))
    # x.append(count_smaller_ratio(x_original, -0.5))
    # x.append(count_smaller_ratio(x_original, -0.6))
    # x.append(count_smaller_ratio(x_original, -0.7))
    # x.append(count_smaller_ratio(x_original, -0.8))
    # x.append(count_smaller_ratio(x_original, -0.9))
    # x.append(count_smaller_ratio(x_original, -1.0))

    #x.append(count_smaller_ratio(x_original, 0.01))
    #x.append(count_smaller_ratio(x_original, 0.001))for i in x_original: print i
    #x.append(count_smaller_ratio(x_original, 0.0001))
    #x.append(count_smaller_ratio(x_original, 0.00001))
    #x.append(count_smaller_ratio(x_original, 0.000001))
    #x.append(count_smaller_ratio(x_original, 0.00000000001))

    # Do something with most_important_features1

    #extend_x(np.expm1(x_original))
    #extend_x(np.square(x_original))
    #extend_x(map(me_pow, x_original))
    #extend_x(np.sqrt(np.sqrt(np.abs(x_original))))
    #extend_x((np.sqrt(np.sqrt(orig)) - np.sqrt(np.sqrt(means))) / np.sqrt(np.sqrt(variances)))

    #extend_x([(-1 if i < 0 else (0 if i == 0 else 1)) for i in x_original])
    #x.append(sum([i if i > 0 else 0 for i in x_original]) / len(x_original))
    #x.append(sum([i if i < 0 else 0 for i in x_original]) / len(x_original))
    #extend_x(np.tanh(x_original))
    #extend_x(np.cos(x_original))

    #extend_x(map(e_pow, x_original))

    #extend_x(np.sqrt())
    #extend_x(np.sqrt(np.abs(x_original)))
    #extend_x((np.sqrt(orig) - np.sqrt(means)) / np.sqrt(variances))
    #extend_x(map(e_pow, x_original))
    #extend_x(map(sqr, map(e_pow, x_original)))

    #x.append(sum(np.abs(x_original)) / len(x_original))
    #x.append(1.)


    #x.extend(map(math.sin, x_original))
    #x.extend(map(math.sin, map(math.sqrt, x_original)))

    #extend_x(map(math.sqrt, map(e_pow, x_original)))
    #extend_x(map(math.sqrt, map(math.sqrt, x_original)))


    #x.extend(map(fred, x_original))
    #x.extend(map(sqr3, x_original))
    #x.extend(map(me_pow, x_original))
    #x.extend(map(math.log, x_original))

    if make_np:
        return np.array(x)

    return x