Exemplo n.º 1
0
    def get_shogun_statistics(self):
        # turn data into Shogun representation (columns vectors)
        feat_p = sg.RealFeatures(self._x.reshape(1, len(self._x)))
        feat_q = sg.RealFeatures(self._y.reshape(1, len(self._y)))

        # choose kernel for testing. Here: Gaussian
        kernel_width = 1
        kernel = sg.GaussianKernel(10, kernel_width)

        # create mmd instance of test-statistic
        self._mmd = sg.QuadraticTimeMMD()
        self._mmd.set_kernel(kernel)
        self._mmd.set_p(feat_p)
        self._mmd.set_q(feat_q)

        # compute biased and unbiased test statistic (default is unbiased)
        self._mmd.set_statistic_type(sg.ST_BIASED_FULL)
        biased_statistic = self._mmd.compute_statistic()

        self._mmd.set_statistic_type(sg.ST_UNBIASED_FULL)
        unbiased_statistic = self._mmd.compute_statistic()

        self._statistic = unbiased_statistic

        print("\nShogun tests statistics:")
        print(
            f"biased test statistic {len(self._x)} x MMD_b[X,Y]^2={biased_statistic:.2f}"
        )
        print(
            f"unbiased test statistic {len(self._x)} x MMD_u[X,Y]^2={unbiased_statistic:.2f}"
        )
        return self
Exemplo n.º 2
0
        def RunQDAShogun():
            totalTimer = Timer()

            Log.Info("Loading dataset", self.verbose)
            try:
                # Load train and test dataset.
                trainData = np.genfromtxt(self.dataset[0], delimiter=',')
                trainFeat = shogun.RealFeatures(trainData[:, :-1].T)

                if len(self.dataset) == 2:
                    testSet = np.genfromtxt(self.dataset[1], delimiter=',')
                    testFeat = shogun.RealFeatures(testData.T)

                if len(options) > 0:
                    Log.Fatal("Unknown parameters: " + str(options))
                    raise Exception("unknown parameters")

                # Labels are the last row of the training set.
                labels = shogun.MulticlassLabels(
                    trainData[:, (trainData.shape[1] - 1)])

                with totalTimer:

                    model = shogun.QDA(trainFeat, labels)
                    model.train()
                    if len(self.dataset) == 2:
                        model.apply_multiclass(testFeat).get_labels()
            except Exception as e:
                return -1

            return totalTimer.ElapsedTime()
Exemplo n.º 3
0
def _read_toy_data(request):
    y_set = []
    x_set = []
    x_set_induc = []
    points = []
    points_induc = []
    model_sel_error = False
    toy_data = json.loads(request.POST['point_set'])
    for pt in toy_data:
        if int(pt['label']) == 1:
            points.append(pt)
        elif pt['label'] == -1:
            points_induc.append(pt)

    for pt in points:
        y_set.append(float(pt["y"]))
        x_set.append(float(pt["x"]))

    for pt in points_induc:
        x_set_induc.append(float(pt["x"]))

    noise_level = float(request.POST['noise_level'])
    scale = float(request.POST['scale'])
    inf = request.POST['inf']
    domain = json.loads(request.POST['axis_domain'])

    labels = np.array(y_set, dtype=np.float64)
    num = len(x_set)
    if num == 0:
        raise Http404
    examples = np.zeros((1, num))
    for i in xrange(num):
        examples[0, i] = x_set[i]
    feat_train = sg.RealFeatures(examples)
    labels = sg.RegressionLabels(labels)

    #Get inducing points
    num_induc = len(x_set_induc)

    if num_induc != 0:
        examples_induc = np.zeros((1, num_induc))
        for i in xrange(num_induc):
            examples_induc[0, i] = x_set_induc[i]
        feat_train_induc = sg.RealFeatures(examples_induc)
    elif num_induc == 0:
        feat_train_induc = None

    kernel = get_kernel(request, feat_train)
    try:
        learn = request.POST["learn"]
    except:
        raise ValueError("Argument Error")

    if int(feat_train.get_num_vectors()) > 100 and learn == "ML2":
        model_sel_error = True

    return (feat_train, labels, noise_level, scale, kernel, domain, learn,
            feat_train_induc, inf), model_sel_error
Exemplo n.º 4
0
    def visualise_distribution_test_statistic(self, alpha=0.05):
        num_samples = 500

        # we first sample null distribution
        null_samples = self._mmd.sample_null()

        # we then sample alternative distribution, generate new data for that
        alt_samples = np.zeros(num_samples)
        for i in range(num_samples):
            x = norm.rvs(size=self._n, loc=self._mu, scale=self._sigma_squared)
            y = laplace.rvs(size=self._n, loc=self._mu, scale=self._b)
            feat_p = sg.RealFeatures(np.reshape(x, (1, len(x))))
            feat_q = sg.RealFeatures(np.reshape(y, (1, len(y))))

            kernel_width = 1
            kernel = sg.GaussianKernel(10, kernel_width)

            mmd = sg.QuadraticTimeMMD()
            mmd.set_kernel(kernel)

            mmd.set_p(feat_p)
            mmd.set_q(feat_q)
            alt_samples[i] = mmd.compute_statistic()

        np.std(alt_samples)

        plt.figure(figsize=(18, 5))

        plt.subplot(131)
        plt.hist(null_samples, 50, color='blue')
        plt.title('Null distribution')
        plt.subplot(132)
        plt.title('Alternative distribution')
        plt.hist(alt_samples, 50, color='green')

        plt.subplot(133)
        plt.hist(null_samples, 50, color='blue')
        plt.hist(alt_samples, 50, color='green', alpha=0.5)
        plt.title('Null and alternative distriution')

        # find (1-alpha) element of null distribution
        null_samples_sorted = np.sort(null_samples)
        quantile_idx = int(len(null_samples) * (1 - alpha))
        quantile = null_samples_sorted[quantile_idx]
        plt.axvline(x=quantile,
                    ymin=0,
                    ymax=100,
                    color='red',
                    label=str(int(round(
                        (1 - alpha) * 100))) + '% quantile of null')
        plt.show()
        return self
Exemplo n.º 5
0
def get_binary_features(request):
    try:
        point_set_raw = json.loads(request.POST['point_set'])
    except:
        raise ValueError("cannot read click pts")
    class_a_point_set = []
    class_b_point_set = []
    for point in point_set_raw:
        if point['label'] == 1:
            class_a_point_set.append([point['x'], point['y']])
        else:
            class_b_point_set.append([point['x'], point['y']])
    class_a = np.transpose(np.array(class_a_point_set, dtype=float))
    class_b = np.transpose(np.array(class_b_point_set, dtype=float))

    if not (len(class_a) + len(class_b)):
        raise ValueError("labels not enough")
    else:
        features = np.concatenate((class_a, class_b), axis=1)
        labels = np.concatenate(
            (np.ones(class_a.shape[1]), -np.ones(class_b.shape[1])), axis=1)

    features = sg.RealFeatures(features)
    labels = sg.BinaryLabels(labels)

    return features, labels
Exemplo n.º 6
0
def get_multi_features(request):
    try:
        point_set_raw = json.loads(request.POST['point_set'])
    except:
        raise ValueError("cannot read click pts")

    x = []
    y = []
    labels = []
    for pt in point_set_raw:
        x.append(float(pt['x']))
        y.append(float(pt['y']))
        labels.append(float(pt['label']))

    n = len(set(labels))

    if not n:
        raise ValueError("0-labels")
    elif n == 1:
        raise ValueError("1-class-labels")
    else:
        features = np.array([x, y])

    features = sg.RealFeatures(features)
    labels = sg.MulticlassLabels(np.array(labels))

    return features, labels
Exemplo n.º 7
0
    def __init__(self,
                 X,
                 y,
                 n_importance,
                 prior_log_pdf,
                 ridge=0.,
                 num_shogun_threads=1):
        self.n_importance = n_importance
        self.prior_log_pdf = prior_log_pdf
        self.ridge = ridge
        self.X = X
        self.y = y

        self.num_shogun_threads = num_shogun_threads

        # tell shogun to use 1 thread only
        logger.debug("Using Shogun with %d threads" % self.num_shogun_threads)
        sg.ZeroMean().parallel.set_num_threads(self.num_shogun_threads)

        # shogun representation of data
        self.sg_labels = sg.BinaryLabels(self.y)
        self.sg_feats_train = sg.RealFeatures(self.X.T)

        # ARD: set theta, which is in log-scale, as kernel weights
        D = X.shape[1]
        theta_start = np.ones(D)

        self.sg_mean = sg.ZeroMean()
        self.sg_likelihood = sg.LogitLikelihood()
Exemplo n.º 8
0
def regress_dump(request):
    try:
        data_set = request.POST['data_set']
        feature = request.POST['feature']

        temp_feats = sg.RealFeatures(
            sg.CSVFile(REGRESS_DATA_DIR + REGRESS_DATA_SET[data_set]))
        labels = sg.RegressionLabels(
            sg.CSVFile(REGRESS_DATA_DIR + REGRESS_LABELS[data_set]))
        lab = labels.get_labels()

        #rescale to 0...1
        preproc = sg.RescaleFeatures()
        preproc.init(temp_feats)
        temp_feats.add_preprocessor(preproc)
        temp_feats.apply_preprocessor(True)
        mat = temp_feats.get_feature_matrix()

        if feature == 'CRIM':
            feat = mat[0]
        elif feature == 'DIS':
            feat = mat[7]
        elif feature == 'INDUS':
            feat = mat[2]
        elif feature == 'LSTAT':
            feat = mat[12]
    except:
        raise Http404

    toy_data = []
    for i in xrange(len(feat)):
        toy_data.append({'x': feat[i], 'y': lab[i], 'label': float(0)})
    return HttpResponse(json.dumps(toy_data))
Exemplo n.º 9
0
def _train_clustering(point_set, distance_name, k):
    labels = np.array([0]*len(point_set))
    features = np.zeros((2, len(point_set)))

    for i in xrange(len(point_set)):
        features[0, i] = point_set[i]['x']
        features[1, i] = point_set[i]['y']
        labels[i] = point_set[i]['label']

    lab = sg.BinaryLabels(labels)
    train = sg.RealFeatures(features)
             
    if distance_name == "EuclideanDistance":
        distance = sg.EuclideanDistance(train, train)
    elif distance_name == "ManhattanMetric":
        distance = sg.ManhattanMetric(train, train)
    elif distance_name == "JensenMetric":
        distance = sg.JensenMetric(train, train)
    else:
        raise TypeError
                  
    kmeans = sg.KMeans(k, distance)
    kmeans.train()

    return kmeans
Exemplo n.º 10
0
def shogunProcess(clustersNumber, dataLessTarget, datasetName, runinfo = None, initialClusters = None):
    import shogun

    outputFile = datasetOutFile(datasetName, SHOGUN_ALGO, runinfo=runinfo)

    if os.path.exists(outputFile):
        print("shogun skipped")
        return

    train_features = shogun.RealFeatures(dataLessTarget.values.astype("float64").transpose())
    # distance metric over feature matrix - Euclidean distance
    distance = shogun.EuclideanDistance(train_features, train_features)

    hierarchical = shogun.Hierarchical(clustersNumber, distance)

    #TODO Makes the pyhon process dies!!!???!!!

    d = hierarchical.get_merge_distances()
    cp = hierarchical.get_cluster_pairs()
    
    with open(outputFile, 'w') as csvfile:
        filewriter = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
    
        for index, row in dataLessTarget.iterrows():
            filewriter.writerow([index, result[index].item(0)])
Exemplo n.º 11
0
    def run_kmeans_base(self,
                        nb_clusters,
                        src_file,
                        data_without_target,
                        dataset_name,
                        run_number,
                        config_function,
                        run_info=None,
                        nb_iterations=None):
        self._init()
        output_file, centroids_file = self._prepare_files(
            dataset_name, run_info, True)

        train_features = shogun.RealFeatures(
            data_without_target.values.astype("float64").transpose())
        # distance metric over feature matrix - Euclidean distance
        distance = shogun.EuclideanDistance(train_features, train_features)

        # KMeans object created
        kmeans = shogun.KMeans(nb_clusters, distance)
        if config_function is not None:
            config_function(kmeans)

        if nb_iterations is not None:
            kmeans.set_max_iter(nb_iterations)

        centers, result = Shogun._kmeans_process(kmeans)
        ClusteringToolkit._save_clustering(
            Shogun._clustering_to_list(data_without_target, result),
            output_file)
        ClusteringToolkit._save_centroids(Shogun._centroids_to_list(centers),
                                          centroids_file)

        return output_file, {"centroids": centroids_file}
Exemplo n.º 12
0
def regression(request):
    try:
        domain = json.loads(request.POST['axis_domain'])
        X = np.linspace(domain['horizontal'][0], domain['horizontal'][1], 100)
        x = np.array([X])
        feat = sg.RealFeatures(x)

        arguments = _read_data(request)

        tool = request.POST['regression']
        if (tool == 'LeastSquaresRegression'):
            ls = _train_ls(*arguments)
            y = _apply_ls(feat, ls)

        elif (tool == 'LinearRidgeRegression'):
            lrr = _train_lrr(*arguments)
            y = _apply_lrr(feat, lrr)

        elif (tool == 'KernelRidgeRegression'):
            krr, kernel, train = _train_krr(*arguments)
            y = _apply_krr(kernel, train, feat, krr)

        line_dot = []
        for i in xrange(len(X)):
            line_dot.append({'x': X[i], 'y': y[i]})
        return HttpResponse(json.dumps(line_dot))
    except:
        raise Http404
Exemplo n.º 13
0
 def load_mult_data(self, x_train, z_train):
     '''
     This function re-configures the training data according to the library requirement
     '''
     self.input_dim = x_train.shape[1]
     self.z_train = shogun.RegressionLabels(z_train)
     self.x_train = shogun.RealFeatures(
         np.array(x_train).reshape(self.input_dim, len(x_train)))
Exemplo n.º 14
0
    def feature_prepare(self, X):
        features = shogun.CombinedFeatures()
        X = X.astype(np.float64)
        for kernel_type in self.kernel_dict.keys():
            for kernel_feature in self.kernel_dict[kernel_type].values():
                features.append_feature_obj(
                    shogun.RealFeatures(X[:, kernel_feature].T))

        return features
Exemplo n.º 15
0
def shogun_mmd(X,
               Y,
               kernel_width,
               null_samples=1000,
               median_samples=1000,
               cache_size=32):
    '''
    Run an MMD test using a Gaussian kernel.

    Parameters
    ----------
    X : row-instance feature array

    Y : row-instance feature array

    kernel_width : float
        The bandwidth of the RBF kernel (sigma).

    null_samples : int
        How many times to sample from the null distribution.

    Returns
    -------
    p_val : float
        The obtained p value of the test.

    stat : float
        The test statistic.

    null_samples : array of length null_samples
        The samples from the null distribution.
    '''
    import shogun as sg
    mmd = sg.QuadraticTimeMMD()
    mmd.set_p(sg.RealFeatures(X.T.astype(np.float64)))
    mmd.set_q(sg.RealFeatures(Y.T.astype(np.float64)))
    mmd.set_kernel(sg.GaussianKernel(cache_size, float(kernel_width)))

    mmd.set_num_null_samples(null_samples)
    samps = mmd.sample_null()
    stat = mmd.compute_statistic()

    p_val = np.mean(stat <= samps)
    return p_val, stat, samps
Exemplo n.º 16
0
    def dftoxz(self, dataframe, data_type):

        x = None
        z = None

        x = shogun.RealFeatures(
            np.array(dataframe["x"]).reshape(1, len(dataframe["x"])))
        if data_type == 'train':
            z = shogun.RegressionLabels(np.array(dataframe['z_train']))

        return x, z
Exemplo n.º 17
0
    def load_data(self, dataframe):
        '''
        This function re-configures the training data according to the library requirement
        '''

        self.train_dataframe = dataframe
        # Re-configuration of the data
        self.z = shogun.RealFeatures(
            np.array(self.train_dataframe['z_train']).reshape(
                1, len(self.train_dataframe["z_train"])))
        self.x_train, self.z_train = self.dftoxz(self.train_dataframe, 'train')
Exemplo n.º 18
0
def shogunProcess(clustersNumber,
                  dataLessTarget,
                  datasetName,
                  runinfo=None,
                  initialClusters=None):
    import shogun

    outputFile = datasetOutFile(datasetName, SHOGUN_ALGO, runinfo=runinfo)
    clustersOutputFile = datasetOutFile(datasetName,
                                        centroidFor(SHOGUN_ALGO),
                                        runinfo=runinfo)

    if os.path.exists(outputFile) and os.path.exists(clustersOutputFile):
        print("shogun skipped")
        return

    train_features = shogun.RealFeatures(
        dataLessTarget.values.astype("float64").transpose())
    # distance metric over feature matrix - Euclidean distance
    distance = shogun.EuclideanDistance(train_features, train_features)

    # KMeans object created
    kmeans = shogun.KMeans(clustersNumber, distance)

    if initialClusters is None:
        # set KMeans++ flag
        kmeans.set_use_kmeanspp(True)
    else:
        # set new initial centers
        kmeans.set_initial_centers(
            initialClusters.astype("float64").transpose())

    # KMeans training
    kmeans.train()

    # cluster centers
    centers = kmeans.get_cluster_centers()

    # Labels for data points
    result = kmeans.apply()

    with open(outputFile, 'w') as csvfile:
        filewriter = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)

        for index, row in dataLessTarget.iterrows():
            filewriter.writerow([index, result[index].item(0)])

    with open(clustersOutputFile, 'w') as clusterFile:
        filewriter = csv.writer(clusterFile, quoting=csv.QUOTE_MINIMAL)

        for row in centers.transpose():
            filewriter.writerow(row.tolist())
Exemplo n.º 19
0
def support_vector_regression(request):
    try:
        arguments=_read_data(request)
        svm=_train_svr(*arguments)
        domain = json.loads(request.POST['axis_domain'])
        x=np.linspace(domain['horizontal'][0], domain['horizontal'][1], 100)
        y=np.array(svm.apply(sg.RealFeatures(np.array([x]))).get_labels(), dtype=np.float64)
        line_dot = []
        for i in xrange(len(x)):
            line_dot.append({'x' : x[i], 'y' : y[i]})
        return HttpResponse(json.dumps(line_dot))
    except:
        raise Http404
Exemplo n.º 20
0
def mmd_test(Sample1, Sample2):
    for i in range(Sample1.shape[1]):
        x = Sample1[:, i]
        y = Sample2[:, i]

        feat_p = sg.RealFeatures(x.reshape(1, len(x)))
        feat_q = sg.RealFeatures(y.reshape(1, len(y)))

        # choose kernel for testing. Here: Gaussian
        kernel_width = 1
        kernel = sg.GaussianKernel(10, kernel_width)

        # create mmd instance of test-statistic
        mmd = sg.QuadraticTimeMMD()
        mmd.set_kernel(kernel)
        mmd.set_p(feat_p)
        mmd.set_q(feat_q)

        # compute biased and unbiased test statistic (default is unbiased)
        mmd.set_statistic_type(sg.ST_UNBIASED_FULL)
        statistic = mmd.compute_statistic()

    return statistic
Exemplo n.º 21
0
    def RunMetrics(self, options):
        Log.Info("Perform QDA.", self.verbose)

        results = self.QDAShogun(options)
        if results < 0:
            return results

        metrics = {'Runtime': results}

        if len(self.dataset) >= 3:
            trainData, labels = SplitTrainData(self.dataset)
            testData = LoadDataset(self.dataset[1])
            truelabels = LoadDataset(self.dataset[2])

            model = shogun.QDA(shogun.RealFeatures(trainData.T),
                               shogun.MulticlassLabels(labels))
            model.train()
            predictions = model.apply_multiclass(
                shogun.RealFeatures(testData.T)).get_labels()

            confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)

            metrics['Avg Accuracy'] = Metrics.AverageAccuracy(confusionMatrix)
            metrics['MultiClass Precision'] = Metrics.AvgPrecision(
                confusionMatrix)
            metrics['MultiClass Recall'] = Metrics.AvgRecall(confusionMatrix)
            metrics['MultiClass FMeasure'] = Metrics.AvgFMeasure(
                confusionMatrix)
            metrics['MultiClass Lift'] = Metrics.LiftMultiClass(
                confusionMatrix)
            metrics['MultiClass MCC'] = Metrics.MCCMultiClass(confusionMatrix)
            metrics['MultiClass Information'] = Metrics.AvgMPIArray(
                confusionMatrix, truelabels, predictions)
            metrics['Simple MSE'] = Metrics.SimpleMeanSquaredError(
                truelabels, predictions)

        return metrics
Exemplo n.º 22
0
def classify_gp(features,
                labels,
                kernel,
                domain,
                lik,
                learn,
                scale,
                returnValues=True):
    mean = sg.ZeroMean()
    inf = sg.EPInferenceMethod(kernel, features, mean, labels, lik)
    inf.set_scale(scale)
    gp = sg.GaussianProcessBinaryClassification(inf)
    best_width = 0.0
    best_param = 0
    best_degree = 0
    best_scale = 0.0

    if learn == 'ML2':
        inf.set_scale(1)
        if kernel.get_name() == 'GaussianKernel':
            kernel.set_width(1)
        grad = sg.GradientEvaluation(gp, features, labels,
                                     sg.GradientCriterion(), False)
        grad.set_function(inf)
        grad_search = sg.GradientModelSelection(grad)
        best_combination = grad_search.select_model()
        best_combination.apply_to_machine(gp)
        try:
            best_width = sg.GaussianKernel.obtain_from_generic(
                inf.get_kernel()).get_width()
        except:
            pass
        best_scale = inf.get_scale()
    gp.train()

    size = 50
    x1 = np.linspace(domain['horizontal'][0], domain['horizontal'][1], size)
    y1 = np.linspace(domain['vertical'][0], domain['vertical'][1], size)
    x, y = np.meshgrid(x1, y1)

    test = sg.RealFeatures(np.array((np.ravel(x), np.ravel(y))))

    if returnValues:
        out = gp.apply(test).get_values()
    else:
        out = gp.apply(test).get_labels()
    z = out.reshape((size, size))
    z = np.transpose(z)
    return x, y, z, best_width, best_param, best_scale
Exemplo n.º 23
0
    def run_hierarchical(self,
                         nb_clusters,
                         src_file,
                         data_without_target,
                         dataset_name,
                         run_number,
                         run_info=None):
        output_file, = self._prepare_files(dataset_name, run_info, False)

        train_features = shogun.RealFeatures(
            data_without_target.values.astype("float64").transpose())
        # distance metric over feature matrix - Euclidean distance
        distance = shogun.EuclideanDistance(train_features, train_features)

        hierarchical = shogun.Hierarchical(nb_clusters, distance)
Exemplo n.º 24
0
    def predict_mult(self, x_test):
        '''
        This function predicts for the test data
        '''
        self.x_test = shogun.RealFeatures(
            np.array(x_test).reshape(self.input_dim, len(x_test)))

        if type(self.model) == str:
            return
        else:
            self.z_postmean = self.model.apply_regression(self.x_test)
            self.z_postvar = np.sqrt(
                self.model.get_variance_vector(self.x_test))

        return self.z_postmean, self.z_postvar
Exemplo n.º 25
0
def _predictive_process(feat_train, labels, noise_level, scale, kernel, domain,
                        learn, feat_induc, inf_select):
    variances, means, best_width, best_scale, best_sigma = _process(
        feat_train, labels, noise_level, scale, kernel, domain, learn,
        feat_induc, inf_select, True)
    size = 75
    x_test = np.array(
        [np.linspace(domain['horizontal'][0], domain['horizontal'][1], size)])
    feat_test = sg.RealFeatures(x_test)
    y1 = np.linspace(domain['vertical'][0], domain['vertical'][1], 50)
    D = np.zeros((len(y1), size))

    # evaluate normal distribution at every prediction point (column)
    for j in range(np.shape(D)[1]):
        # create gaussian distributio instance, expects mean vector and covariance matrix, reshape
        gauss = sg.GaussianDistribution(
            np.array(means[j]).reshape(1, ),
            np.array(variances[j]).reshape(1, 1))

        # evaluate predictive distribution for test point, method expects matrix
        D[:, j] = np.exp(gauss.log_pdf_multiple(y1.reshape(1, len(y1))))

    z = np.transpose(D)
    z_max = np.nanmax(z)
    z_min = np.nanmin(z)
    z_delta = 0.1 * (np.nanmax(z) - np.nanmin(z))

    result = []
    for i in xrange(len(feat_test.get_feature_matrix()[0])):
        result.append({
            'x': feat_test.get_feature_matrix()[0][i],
            'y': means[i],
            'range_upper': means[i] + 2 * np.sqrt(variances[i]),
            'range_lower': means[i] - 2 * np.sqrt(variances[i]),
            'best_width': float(best_width),
            'best_scale': float(best_scale),
            'best_sigma': float(best_sigma),
            "status": "ok",
            "domain": [z_min - z_delta, z_max + z_delta],
            "max": z_max + z_delta,
            "min": z_min - z_delta,
            "z": z.tolist()
        })

    return result
Exemplo n.º 26
0
    def run_gaussian(self,
                     nb_clusters,
                     src_file,
                     data_without_target,
                     dataset_name,
                     run_number,
                     run_info=None):
        output_file, = self._prepare_files(dataset_name, run_info, False)

        train_features = shogun.RealFeatures(
            data_without_target.values.astype("float64").transpose())
        # distance metric over feature matrix - Euclidean distance
        # distance = shogun.EuclideanDistance(train_features, train_features)

        gmm = shogun.GMM(nb_clusters)
        gmm.set_features(train_features)
        gmm.train_em()

        print(gmm)
Exemplo n.º 27
0
def _process(x1_set, x2_set, kernel_width, kernel_name, degree):
    num = len(x1_set)
    if num == 0:
        raise Http404
    examples = np.zeros((2, num))
    for i in xrange(num):
        examples[0, i] = x1_set[i]
        examples[1, i] = x2_set[i]
    feat_train = sg.RealFeatures(examples)

    # construct covariance function
    if kernel_name == "LinearKernel":
        kernel = sg.LinearKernel(feat_train, feat_train)
    elif kernel_name == "PolynomialKernel":
        kernel = sg.PolyKernel(feat_train, feat_train, degree, True)
    elif kernel_name == "GaussianKernel":
        kernel = sg.GaussianKernel(feat_train, feat_train, kernel_width)
    kernel_matrix = kernel.get_kernel_matrix()
    return kernel_matrix.tolist()
Exemplo n.º 28
0
def _read_data(request):
    labels = []
    features = []
    data = json.loads(request.POST['point_set'])
    cost = float(request.POST['C'])
    tubeeps = float(request.POST['tube'])
    kernel_name = request.POST['kernel']
    for pt in data:
        labels.append(float(pt["y"]))
        features.append(float(pt["x"]))
    labels = np.array(labels, dtype=np.float64)
    num = len(features)
    if num == 0:
        raise TypeError
    examples = np.zeros((1,num))
    
    for i in xrange(num):
        examples[0,i] = features[i]
    
    lab = sg.RegressionLabels(labels)
    train = sg.RealFeatures(examples)
    kernel = get_kernel(request, train)
    return (cost, tubeeps, lab, kernel)
Exemplo n.º 29
0
def _read_data(request):
    labels = []
    features = []
    data = json.loads(request.POST['point_set'])
    tau = float(request.POST['Tau'])
    for pt in data:
        labels.append(float(pt["y"]))
        features.append(float(pt["x"]))
    labels = np.array(labels, dtype=np.float64)
    num = len(features)
    if num == 0:
        raise TypeError
    examples = np.zeros((1, num))

    for i in xrange(num):
        examples[0, i] = features[i]

    lab = sg.RegressionLabels(labels)
    train = sg.RealFeatures(examples)

    sigma = float(request.POST["sigma"])
    kernel = sg.GaussianKernel(train, train, sigma)

    return (tau, lab, kernel, train)
Exemplo n.º 30
0
import shogun as sg
import data

# load data
feature_matrix = data.swissroll()
# create features instance
features = sg.RealFeatures(feature_matrix)

# create Linear Local Tangent Space Alignment converter instance
converter = sg.LinearLocalTangentSpaceAlignment()

# set target dimensionality
converter.set_target_dim(2)
# set number of neighbors
converter.set_k(10)
# set number of threads
converter.parallel.set_num_threads(2)
# set nullspace shift (optional)
converter.set_nullspace_shift(-1e-6)

# compute embedding with Linear Local Tangent Space Alignment method
embedding = converter.embed(features)