def test_unique_allmasked(self): # Test all masked data = masked_array([1, 1, 1], mask=True) test = unique(data, return_index=True, return_inverse=True) assert_equal(test[0], masked_array([1, ], mask=[True])) assert_equal(test[1], [0]) assert_equal(test[2], [0, 0, 0]) # # Test masked data = masked test = unique(data, return_index=True, return_inverse=True) assert_equal(test[0], masked_array(masked)) assert_equal(test[1], [0]) assert_equal(test[2], [0])
def test_unique_onmaskedarray(self): # Test unique on masked data w/use_mask=True data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0]) test = unique(data, return_index=True, return_inverse=True) assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) assert_equal(test[1], [0, 3, 5, 2]) assert_equal(test[2], [0, 0, 3, 1, 3, 2]) # data.fill_value = 3 data = masked_array(data=[1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0], fill_value=3) test = unique(data, return_index=True, return_inverse=True) assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) assert_equal(test[1], [0, 3, 5, 2]) assert_equal(test[2], [0, 0, 3, 1, 3, 2])
def adapt(self, mcmc_chain, step_output): # only learn the proposal once, at a pre-specified iteration if mcmc_chain.iteration == self.num_samples_when_to_switch: iter_no = mcmc_chain.iteration inds = randint(iter_no - self.num_sample_discard, size=self.num_samples_gmm) + self.num_sample_discard unique_inds = unique(inds) self.proposal = self.fit_gmm(mcmc_chain.samples[unique_inds])
def calcN(classKernels, trainLabels): N = zeros((len(trainLabels), len(trainLabels))) for i, l in enumerate(unique(trainLabels)): numExamplesWithLabel = len(where(trainLabels == l)[0]) Idiff = identity(numExamplesWithLabel, Float64) - (1.0 / numExamplesWithLabel) * ones(numExamplesWithLabel, Float64) firstDot = dot(classKernels[i], Idiff) labelTerm = dot(firstDot, transpose(classKernels[i])) N += labelTerm N = nan_to_num(N) #make N more numerically stable #if I had more time, I would train this parameter, but I don't additionToN = ((mean(diag(N)) + 1) / 100.0) * identity(N.shape[0], Float64) N += additionToN #make sure N is invertable for i in range(1000): try: inv(N) except LinAlgError: #doing this to make sure the maxtrix is invertable #large value supported by section titled #"numerical issues and regularization" in the paper N += additionToN return N
def adapt(self, mcmc_chain, step_output): """ Updates the sliding window of samples to use """ iter_no = mcmc_chain.iteration samples = mcmc_chain.samples[0:(iter_no + 1)] # only adapt after discard has passed if iter_no > self.sample_discard: if iter_no < self.sample_discard + self.num_samples_Z: # use all samples after discard if not yet enough self.Z = samples[self.sample_discard:(iter_no + 1)] else: # stop adapting at some point if iter_no < self.stop_adapt: # once enough samples, use random subset with repetition # and remove duplicates. Sampling without repetition is too expensive inds = randint( iter_no - self.sample_discard, size=self.num_samples_Z) + self.sample_discard unique_inds = unique(inds) # print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples" self.Z = samples[unique_inds]
def test_unique_onlist(self): # Test unique on list data = [1, 1, 1, 2, 2, 3] test = unique(data, return_index=True, return_inverse=True) self.assertTrue(isinstance(test[0], MaskedArray)) assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0])) assert_equal(test[1], [0, 3, 5]) assert_equal(test[2], [0, 0, 0, 1, 1, 2])
def __init__(self, X, format=None, class_column=None, classes='auto'): ''' The DataObject class represents the data analysed using a AnomalyDetector. X can be an Format instance or an numpy array. In the previous case, we assume it is used to describe the content that is added to the object using add2Darray or add1Darray methods. In the other case, we automatically generate a format instance, unless the format argument is provided. If the class_column is specified, we use it to generate a column in the auto-generated format where the elements are index into the classes_ list. If the classes_ list is set to 'auto', the elements in X of the class_column are used to auto-create a classes_ list. :param X: a Format instance or a numpy array :param format: None or a pyisc Format instance :param class_column: None or an integer :param classes: 'auto' or a list of elements in X[class_column] :return: ''' self.class_column = class_column if isinstance(X, pyisc._DataObject): pyisc._DataObject.__init__(self,X.get_isc_data_object()) return elif isinstance(X, pyisc.Format): self._format = X pyisc._DataObject.__init__(self,X) return elif isinstance(X, ndarray): if format is None: format = Format() num_cols = len(X.T) if class_column is not None: assert class_column >= 0 and class_column < num_cols for col in range(num_cols): if col != class_column: format.addColumn("Column %i"%col, Format.Continuous) else: format.addColumn("Column %i"%col, Format.Symbol) A = X.T.copy() if classes == 'auto': self.classes_ = list(sorted(unique(A[class_column]))) else: self.classes_ = classes class_col = format.get_nth_column(class_column) for c in self.classes_: class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c)) A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]] X = A.T self._format = format if X.ndim == 1: # This fixes a problem of converting it to c++ data object X = array([X.copy()]).T pyisc._DataObject.__init__(self,format,X.astype(float)) return elif isinstance(format, pyisc.Format): self._format = format pyisc._DataObject.__init__(self,format,X) return pyisc._DataObject.__init__(self,X)
def __init__(self, X, format=None, class_column=None, classes='auto'): ''' The DataObject class represents the data analysed using a AnomalyDetector. X can be an Format instance or an numpy array. In the previous case, we assume it is used to describe the content that is added to the object using add2Darray or add1Darray methods. In the other case, we automatically generate a format instance, unless the format argument is provided. If the class_column is specified, we use it to generate a column in the auto-generated format where the elements are index into the classes_ list. If the classes_ list is set to 'auto', the elements in X of the class_column are used to auto-create a classes_ list. :param X: a Format instance or a numpy array :param format: None or a pyisc Format instance :param class_column: None or an integer :param classes: 'auto' or a list of elements in X[class_column] :return: ''' self.class_column = class_column if isinstance(X, pyisc.Format): self._format = X pyisc._DataObject.__init__(self,X) return elif isinstance(X, ndarray): if format is None: format = Format() num_cols = len(X.T) if class_column is not None: assert class_column >= 0 and class_column < num_cols for col in range(num_cols): if col != class_column: format.addColumn("Column %i"%col, Format.Continuous) else: format.addColumn("Column %i"%col, Format.Symbol) A = X.T.copy() if classes == 'auto': self.classes_ = list(sorted(unique(A[class_column]))) else: self.classes_ = classes class_col = format.get_nth_column(class_column) for c in self.classes_: class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c)) A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]] X = A.T self._format = format if X.ndim == 1: # This fixes a problem of converting it to c++ data object X = array([X.copy()]).T pyisc._DataObject.__init__(self,format,X.astype(float)) return elif isinstance(format, pyisc.Format): self._format = format pyisc._DataObject.__init__(self,format,X) return pyisc._DataObject.__init__(self,X)
def getClassKernels(fullKernelMatrix, trainLabels): #create a matrix where rows correspond to all examples #and columns correspond to examples of a specific class #so if l is the total number of examples, and lj is the number of examples in class j #then we're creating an l x lj matrix uniqueLabels = unique(trainLabels) ret = [] for l in uniqueLabels: labelIndexes = where(trainLabels == l)[0] k = zeros((len(fullKernelMatrix), len(labelIndexes))) for r in range(len(k)): for c in range(len(k[r])): k[r][c] = fullKernelMatrix[r][labelIndexes[c]] ret.append(k) return ret
def adapt(self, mcmc_chain, step_output): """ Updates the sliding window of samples to use """ iter_no = mcmc_chain.iteration samples = mcmc_chain.samples[0 : (iter_no + 1)] # only adapt after discard has passed if iter_no > self.sample_discard: if iter_no < self.sample_discard + self.num_samples_Z: # use all samples after discard if not yet enough self.Z = samples[self.sample_discard : (iter_no + 1)] else: # stop adapting at some point if iter_no < self.stop_adapt: # once enough samples, use random subset with repetition # and remove duplicates. Sampling without repetition is too expensive inds = randint(iter_no - self.sample_discard, size=self.num_samples_Z) + self.sample_discard unique_inds = unique(inds) # print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples" self.Z = samples[unique_inds]
def calcM(classKernelList, trainLabels): Mlist = [] for (classKernel, label) in zip(classKernelList, unique(trainLabels)): Mlist.append(calcClassM(classKernel, trainLabels, label)) Mdiff = Mlist[0] - Mlist[1] return outer(Mdiff, Mdiff)