示例#1
0
        def RunAllKnnShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the query
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) == 2:
                    referenceData = np.genfromtxt(self.dataset[0],
                                                  delimiter=',')
                    queryData = np.genfromtxt(self.dataset[1], delimiter=',')
                    queryFeat = RealFeatures(queryFeat.T)
                else:
                    referenceData = np.genfromtxt(self.dataset, delimiter=',')

                # Labels are the last row of the dataset.
                labels = MulticlassLabels(
                    referenceData[:, (referenceData.shape[1] - 1)])
                referenceData = referenceData[:, :-1]

                with totalTimer:
                    # Get all the parameters.
                    k = re.search("-k (\d+)", options)
                    if not k:
                        Log.Fatal(
                            "Required option: Number of furthest neighbors to find."
                        )
                        q.put(-1)
                        return -1
                    else:
                        k = int(k.group(1))
                        if (k < 1 or k > referenceData.shape[0]):
                            Log.Fatal("Invalid k: " + k.group(1) +
                                      "; must be greater than 0" +
                                      " and less or equal than " +
                                      str(referenceData.shape[0]))
                            q.put(-1)
                            return -1

                    referenceFeat = RealFeatures(referenceData.T)
                    distance = EuclideanDistance(referenceFeat, referenceFeat)

                    # Perform All K-Nearest-Neighbors.
                    model = SKNN(k, distance, labels)
                    model.train()

                    if len(self.dataset) == 2:
                        out = model.apply(queryFeat).get_labels()
                    else:
                        out = model.apply(referenceFeat).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
示例#2
0
def assign_labels(data, centroids):
    from shogun.Classifier import KNN
    from numpy import arange

    labels = Labels(arange(1., 11.))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclidianDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)
def assign_labels(data, centroids):
    from shogun.Classifier import KNN
    from numpy import arange

    labels = Labels(arange(1.,11.))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclidianDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)
示例#4
0
def knn_train(train_data=None, train_label = None, k=1):
    train_data  = RealFeatures(train_data)
    distance    = EuclidianDistance(train_data, train_data)
    try:
        train_label = Labels(array(train_label.tolist(), dtype=float64))
    except Exception as e:
        print e
        raise Exception
    knn_model   = KNN(k, distance, train_label)
    knn_train   = knn_model.train()
    return knn_model
示例#5
0
    def RunAllKnnShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the query 
      # file. 
      try:
        Log.Info("Loading dataset", self.verbose)
        if len(self.dataset) == 2:
          referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
          queryData = np.genfromtxt(self.dataset[1], delimiter=',')
          queryFeat = RealFeatures(queryFeat.T)
        else:
          referenceData = np.genfromtxt(self.dataset, delimiter=',')

        # Labels are the last row of the dataset.
        labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)])
        referenceData = referenceData[:,:-1]
      
        with totalTimer:
          # Get all the parameters.
          k = re.search("-k (\d+)", options)
          if not k:
            Log.Fatal("Required option: Number of furthest neighbors to find.")
            q.put(-1)
            return -1
          else:
            k = int(k.group(1))
            if (k < 1 or k > referenceData.shape[0]):
              Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0"
                + " and less or equal than " + str(referenceData.shape[0]))
              q.put(-1)
              return -1

          referenceFeat = RealFeatures(referenceData.T)
          distance = EuclideanDistance(referenceFeat, referenceFeat)

          # Perform All K-Nearest-Neighbors.
          model = SKNN(k, distance, labels)
          model.train()      

          if len(self.dataset) == 2:
            out = model.apply(queryFeat).get_labels()
          else:
            out = model.apply(referenceFeat).get_labels()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def assign_labels(data, centroids, ncenters):
	from shogun.Distance import EuclideanDistance
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import KNN
	from numpy import arange

	labels = MulticlassLabels(arange(0.,ncenters))
	fea = RealFeatures(data)
	fea_centroids = RealFeatures(centroids)
	distance = EuclideanDistance(fea_centroids, fea_centroids)
	knn = KNN(1, distance, labels)
	knn.train()
	return knn.apply(fea)
示例#7
0
def assign_labels(data, centroids, ncenters):
    from shogun.Distance import EuclideanDistance
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import KNN
    from numpy import arange

    labels = MulticlassLabels(arange(0., ncenters))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclideanDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)
示例#8
0
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ):
	from shogun.Features import RealFeatures, Labels
	from shogun.Classifier import KNN
	from shogun.Distance import EuclidianDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	distance=EuclidianDistance(feats_train, feats_train)


	labels=Labels(label_train_multiclass)

	knn=KNN(k, distance, labels)
	knn_train = knn.train()
	output=knn.apply(feats_test).get_labels()
	return knn,knn_train,output
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ):
	from shogun.Features import RealFeatures, Labels
	from shogun.Classifier import KNN
	from shogun.Distance import EuclidianDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	distance=EuclidianDistance(feats_train, feats_train)


	labels=Labels(label_train_multiclass)

	knn=KNN(k, distance, labels)
	knn_train = knn.train()
	output=knn.apply(feats_test).get_labels()
	multiple_k=knn.classify_for_multiple_k()
	return knn,knn_train,output,multiple_k
def knn ():
	print 'KNN'

	from shogun.Features import RealFeatures, Labels
	from shogun.Classifier import KNN
	from shogun.Distance import EuclidianDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	distance=EuclidianDistance(feats_train, feats_train)

	k=3
	labels=Labels(label_train_multiclass)

	knn=KNN(k, distance, labels)
	knn.train()
	output=knn.classify(feats_test).get_labels()
示例#11
0
trainData = trainData.reshape(-1, 10000)
f.close()

f = open(os.path.dirname(__file__) + '../data/arcene_train.label')
trainLabel = np.fromfile(f, dtype=np.int32, sep=' ')
f.close()

# Load test data.
f = open(os.path.dirname(__file__) + '../data/arcene_test.data')
testData = np.fromfile(f, dtype=np.float64, sep=' ')
testData = testData.reshape(-1, 10000)
f.close()

f = open(os.path.dirname(__file__) + '../data/arcene_test.label')
testLabel = np.fromfile(f, dtype=np.float64, sep=' ')
f.close()

# Construct a KNN classifier with a neighborhood size of 9.
feat = RealFeatures(trainData.T)
distance = EuclideanDistance(feat, feat)
labels = BinaryLabels(trainLabel.astype(np.float64))
testFeat = RealFeatures(testData.T)
knn = KNN(9, distance, labels)
knn.train()

# Predict the classification.
output = knn.apply(testFeat).get_labels()

# Validate the classification.
print output == testLabel