def fit(self, x_train, y_train): """ LDF model First call QDF model to caculate the mean, cov_matirx """ #import ipdb; ipdb.set_trace() self.num_class, y_train, self.label_map = QDF.transform_label(y_train) qdf = QDF().fit(x_train, y_train) prior = qdf.prior mean = qdf.mean cov_matrix = qdf.cov_matrix #print_cov_matrix(cov_matrix) # cacualte the shared covariance matirx avg_cov = np.matlib.zeros(cov_matrix[0].shape) for i in range(self.num_class): avg_cov += (prior[i] * cov_matrix[i]) self.inverse_cov = avg_cov.getI() # get the inverse covariance matrix num_feature = x_train.shape[1] # each column for weight[i] weight = np.matrix([0] * num_feature).T self.w0 = [] for i in range(self.num_class): wi = 2 * self.inverse_cov.T * mean[i] weight = np.hstack((weight, wi)) wi0 = 2 * math.log(prior[i]) - (mean[i].T * self.inverse_cov * mean[i])[0, 0] self.w0.append(wi0) self.weight = weight[:, 1:] return self
def twostep(train_feature, train_label, test_feature, Classifier, kwargs): """ Classifier: the classifier class **kwd: additional model parameters for the classifier Return: train_pred: 在整个train_data上的准确率,用于查看是否发生过拟合 test_pred: test_data上的准确率,是最终结果 """ #import ipdb; ipdb.set_trace() # twostep method model1_feature, model1_label, model2_feature, model2_label = make_twostep_dataset(train_feature, train_label) # 构造两个模型 clf1 = Classifier(**kwargs).fit(model1_feature, model1_label) clf2 = Classifier(**kwargs).fit(model2_feature, model2_label) train_pred = twostep_predict(clf1, clf2, train_feature) test_pred = twostep_predict(clf1, clf2, test_feature) # 测试两步模型中每步的准确率 print 'Step 1 acc:' y_pred = clf1.predict(model1_feature) print classification_report(model1_label, y_pred) print 'Step 2 acc:' y_pred = clf2.predict(model2_feature) print classification_report(model2_label, y_pred) return train_pred, test_pred
def twostep(train_feature, train_label, test_feature, Classifier, kwargs): """ Classifier: the classifier class **kwd: additional model parameters for the classifier Return: train_pred: 在整个train_data上的准确率,用于查看是否发生过拟合 test_pred: test_data上的准确率,是最终结果 """ #import ipdb; ipdb.set_trace() # twostep method model1_feature, model1_label, model2_feature, model2_label = make_twostep_dataset( train_feature, train_label) # 构造两个模型 clf1 = Classifier(**kwargs).fit(model1_feature, model1_label) clf2 = Classifier(**kwargs).fit(model2_feature, model2_label) train_pred = twostep_predict(clf1, clf2, train_feature) test_pred = twostep_predict(clf1, clf2, test_feature) # 测试两步模型中每步的准确率 print 'Step 1 acc:' y_pred = clf1.predict(model1_feature) print classification_report(model1_label, y_pred) print 'Step 2 acc:' y_pred = clf2.predict(model2_feature) print classification_report(model2_label, y_pred) return train_pred, test_pred
def fit(self, x_train, y_train): """ LDF model First call QDF model to caculate the mean, cov_matirx """ #import ipdb; ipdb.set_trace() self.num_class, y_train, self.label_map = QDF.transform_label(y_train) qdf = QDF().fit(x_train, y_train) prior = qdf.prior mean = qdf.mean cov_matrix = qdf.cov_matrix #print_cov_matrix(cov_matrix) # cacualte the shared covariance matirx avg_cov = np.matlib.zeros(cov_matrix[0].shape) for i in range(self.num_class): avg_cov += (prior[i] * cov_matrix[i]) self.inverse_cov = avg_cov.getI() # get the inverse covariance matrix num_feature = x_train.shape[1] # each column for weight[i] weight = np.matrix([0] * num_feature).T self.w0 = [] for i in range(self.num_class): wi = 2 * self.inverse_cov.T * mean[i] weight = np.hstack((weight, wi)) wi0 = 2 * math.log(prior[i]) - (mean[i].T * self.inverse_cov * mean[i])[0,0] self.w0.append(wi0) self.weight = weight[:, 1:] return self
def predict(self, x_test): predicted_labels = [] for row in x_test: x = np.matrix(row, np.float64).T max_posteriori = -float('inf') prediction = -1 for i in range(self.num_class): p = (-1 * (x.T * self.inverse_cov * x) + self.weight[:, i].T * x + self.w0[i])[0,0] #p = (self.weight[:, i].T * x + self.w0[i])[0,0] if p > max_posteriori: max_posteriori = p prediction = i predicted_labels.append(prediction) return QDF.map_class_index(predicted_labels, self.label_map)
def predict(self, x_test): predicted_labels = [] for row in x_test: x = np.matrix(row, np.float64).T max_posteriori = -float('inf') prediction = -1 for i in range(self.num_class): p = (-1 * (x.T * self.inverse_cov * x) + self.weight[:, i].T * x + self.w0[i])[0, 0] #p = (self.weight[:, i].T * x + self.w0[i])[0,0] if p > max_posteriori: max_posteriori = p prediction = i predicted_labels.append(prediction) return QDF.map_class_index(predicted_labels, self.label_map)