Пример #1
0
 def test_unique_allmasked(self):
     # Test all masked
     data = masked_array([1, 1, 1], mask=True)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, ], mask=[True]))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0, 0, 0])
     #
     # Test masked
     data = masked
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array(masked))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0])
Пример #2
0
 def test_unique_allmasked(self):
     # Test all masked
     data = masked_array([1, 1, 1], mask=True)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, ], mask=[True]))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0, 0, 0])
     #
     # Test masked
     data = masked
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array(masked))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0])
Пример #3
0
 def test_unique_onmaskedarray(self):
     # Test unique on masked data w/use_mask=True
     data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0])
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
     #
     data.fill_value = 3
     data = masked_array(data=[1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0], fill_value=3)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
Пример #4
0
 def test_unique_onmaskedarray(self):
     # Test unique on masked data w/use_mask=True
     data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0])
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
     #
     data.fill_value = 3
     data = masked_array(data=[1, 1, 1, 2, 2, 3],
                         mask=[0, 0, 1, 0, 1, 0], fill_value=3)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
Пример #5
0
 def adapt(self, mcmc_chain, step_output):
     # only learn the proposal once, at a pre-specified iteration
     if mcmc_chain.iteration == self.num_samples_when_to_switch:
         iter_no = mcmc_chain.iteration
         inds = randint(iter_no - self.num_sample_discard, size=self.num_samples_gmm) + self.num_sample_discard
         unique_inds = unique(inds)
         self.proposal = self.fit_gmm(mcmc_chain.samples[unique_inds])
Пример #6
0
def calcN(classKernels, trainLabels):
    N = zeros((len(trainLabels), len(trainLabels)))
    for i, l in enumerate(unique(trainLabels)):
        numExamplesWithLabel = len(where(trainLabels == l)[0])
        Idiff = identity(numExamplesWithLabel, Float64) - (1.0 / numExamplesWithLabel) * ones(numExamplesWithLabel, Float64)
        firstDot = dot(classKernels[i], Idiff)
        labelTerm = dot(firstDot, transpose(classKernels[i]))
        N += labelTerm
    N = nan_to_num(N)
    #make N more numerically stable
    #if I had more time, I would train this parameter, but I don't
    additionToN = ((mean(diag(N)) + 1) / 100.0) * identity(N.shape[0], Float64) 
    N += additionToN
            
    #make sure N is invertable
    for i in range(1000):
        try:
            inv(N)
        except LinAlgError:
            #doing this to make sure the maxtrix is invertable
            #large value supported by section titled
            #"numerical issues and regularization" in the paper
            N += additionToN

    return N
Пример #7
0
    def adapt(self, mcmc_chain, step_output):
        """
        Updates the sliding window of samples to use
        """
        iter_no = mcmc_chain.iteration
        samples = mcmc_chain.samples[0:(iter_no + 1)]

        # only adapt after discard has passed
        if iter_no > self.sample_discard:

            if iter_no < self.sample_discard + self.num_samples_Z:
                # use all samples after discard if not yet enough
                self.Z = samples[self.sample_discard:(iter_no + 1)]
            else:
                # stop adapting at some point
                if iter_no < self.stop_adapt:
                    # once enough samples, use random subset with repetition
                    # and remove duplicates. Sampling without repetition is too expensive
                    inds = randint(
                        iter_no - self.sample_discard,
                        size=self.num_samples_Z) + self.sample_discard
                    unique_inds = unique(inds)
                    #                print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples"

                    self.Z = samples[unique_inds]
Пример #8
0
 def test_unique_onlist(self):
     # Test unique on list
     data = [1, 1, 1, 2, 2, 3]
     test = unique(data, return_index=True, return_inverse=True)
     self.assertTrue(isinstance(test[0], MaskedArray))
     assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0]))
     assert_equal(test[1], [0, 3, 5])
     assert_equal(test[2], [0, 0, 0, 1, 1, 2])
Пример #9
0
 def test_unique_onlist(self):
     # Test unique on list
     data = [1, 1, 1, 2, 2, 3]
     test = unique(data, return_index=True, return_inverse=True)
     self.assertTrue(isinstance(test[0], MaskedArray))
     assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0]))
     assert_equal(test[1], [0, 3, 5])
     assert_equal(test[2], [0, 0, 0, 1, 1, 2])
Пример #10
0
 def adapt(self, mcmc_chain, step_output):
     # only learn the proposal once, at a pre-specified iteration
     if mcmc_chain.iteration == self.num_samples_when_to_switch:
         iter_no = mcmc_chain.iteration
         inds = randint(iter_no - self.num_sample_discard,
                        size=self.num_samples_gmm) + self.num_sample_discard
         unique_inds = unique(inds)
         self.proposal = self.fit_gmm(mcmc_chain.samples[unique_inds])
Пример #11
0
    def __init__(self, X, format=None, class_column=None, classes='auto'):
        '''
        The DataObject class represents the data analysed using a AnomalyDetector.

        X can be an Format instance or an numpy array. In the previous case, we assume
        it is used to describe the content that is added to the object using add2Darray
        or add1Darray methods. In the other case, we automatically generate a format instance,
        unless the format argument is provided. If the class_column is specified, we use it
        to generate a column in the auto-generated format where the elements are index into
        the classes_ list. If the classes_ list is set to 'auto', the elements in X of the
        class_column are used to auto-create a classes_ list.

        :param X: a Format instance or a numpy array
        :param format: None or a pyisc Format instance
        :param class_column: None or an integer
        :param classes: 'auto' or a list of elements in X[class_column]
        :return:
        '''
        self.class_column = class_column
        if isinstance(X, pyisc._DataObject):
            pyisc._DataObject.__init__(self,X.get_isc_data_object())
            return
        elif isinstance(X, pyisc.Format):
            self._format = X
            pyisc._DataObject.__init__(self,X)
            return
        elif isinstance(X, ndarray):
                if format is None:
                    format = Format()
                    num_cols = len(X.T)
                    if class_column is not None:
                        assert class_column >= 0 and class_column < num_cols
                    for col in range(num_cols):
                        if col != class_column:
                            format.addColumn("Column %i"%col, Format.Continuous)
                        else:
                            format.addColumn("Column %i"%col, Format.Symbol)
                            A =  X.T.copy()
                            if classes == 'auto':
                                self.classes_ =  list(sorted(unique(A[class_column])))
                            else:
                                self.classes_ = classes
                            class_col = format.get_nth_column(class_column)
                            for c in self.classes_:
                                class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c))
                            A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]]
                            X = A.T
                    self._format = format
                    if X.ndim == 1: # This fixes a problem of converting it to c++ data object
                        X = array([X.copy()]).T

                    pyisc._DataObject.__init__(self,format,X.astype(float))
                    return
                elif isinstance(format, pyisc.Format):
                    self._format = format
                    pyisc._DataObject.__init__(self,format,X)
                    return
        pyisc._DataObject.__init__(self,X)
Пример #12
0
    def __init__(self, X, format=None, class_column=None, classes='auto'):
        '''
        The DataObject class represents the data analysed using a AnomalyDetector.

        X can be an Format instance or an numpy array. In the previous case, we assume
        it is used to describe the content that is added to the object using add2Darray
        or add1Darray methods. In the other case, we automatically generate a format instance,
        unless the format argument is provided. If the class_column is specified, we use it
        to generate a column in the auto-generated format where the elements are index into
        the classes_ list. If the classes_ list is set to 'auto', the elements in X of the
        class_column are used to auto-create a classes_ list.

        :param X: a Format instance or a numpy array
        :param format: None or a pyisc Format instance
        :param class_column: None or an integer
        :param classes: 'auto' or a list of elements in X[class_column]
        :return:
        '''
        self.class_column = class_column
        if isinstance(X, pyisc.Format):
            self._format = X
            pyisc._DataObject.__init__(self,X)
            return
        elif isinstance(X, ndarray):
            if format is None:
                format = Format()
                num_cols = len(X.T)
                if class_column is not None:
                    assert class_column >= 0 and class_column < num_cols
                for col in range(num_cols):
                    if col != class_column:
                        format.addColumn("Column %i"%col, Format.Continuous)
                    else:
                        format.addColumn("Column %i"%col, Format.Symbol)
                        A =  X.T.copy()
                        if classes == 'auto':
                            self.classes_ =  list(sorted(unique(A[class_column])))
                        else:
                            self.classes_ = classes
                        class_col = format.get_nth_column(class_column)
                        for c in self.classes_:
                            class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c))
                        A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]]
                        X = A.T
                self._format = format
                if X.ndim == 1: # This fixes a problem of converting it to c++ data object
                    X = array([X.copy()]).T

                pyisc._DataObject.__init__(self,format,X.astype(float))
                return
            elif isinstance(format, pyisc.Format):
                self._format = format
                pyisc._DataObject.__init__(self,format,X)
                return
        pyisc._DataObject.__init__(self,X)
Пример #13
0
def getClassKernels(fullKernelMatrix, trainLabels):
    #create a matrix where rows correspond to all examples
    #and columns correspond to examples of a specific class
    #so if l is the total number of examples, and lj is the number of examples in class j
    #then we're creating an l x lj matrix
    uniqueLabels = unique(trainLabels)
    ret = []
    for l in uniqueLabels:
        labelIndexes = where(trainLabels == l)[0]
        k = zeros((len(fullKernelMatrix), len(labelIndexes)))
        for r in range(len(k)):
            for c in range(len(k[r])):
                k[r][c] = fullKernelMatrix[r][labelIndexes[c]]
        ret.append(k)
    return ret        
Пример #14
0
    def adapt(self, mcmc_chain, step_output):
        """
        Updates the sliding window of samples to use
        """
        iter_no = mcmc_chain.iteration
        samples = mcmc_chain.samples[0 : (iter_no + 1)]

        # only adapt after discard has passed
        if iter_no > self.sample_discard:

            if iter_no < self.sample_discard + self.num_samples_Z:
                # use all samples after discard if not yet enough
                self.Z = samples[self.sample_discard : (iter_no + 1)]
            else:
                # stop adapting at some point
                if iter_no < self.stop_adapt:
                    # once enough samples, use random subset with repetition
                    # and remove duplicates. Sampling without repetition is too expensive
                    inds = randint(iter_no - self.sample_discard, size=self.num_samples_Z) + self.sample_discard
                    unique_inds = unique(inds)
                    #                print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples"

                    self.Z = samples[unique_inds]
Пример #15
0
def calcM(classKernelList, trainLabels):
    Mlist = []
    for (classKernel, label) in zip(classKernelList, unique(trainLabels)):
        Mlist.append(calcClassM(classKernel, trainLabels, label))
    Mdiff = Mlist[0] - Mlist[1]
    return outer(Mdiff, Mdiff)