예제 #1
0
def main():
    print('--- Adaboost ---')
    data = datasets.load_digits()
    X, y = data.data, data.target

    digit1 = 1
    digit2 = 8

    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]

    y[y == digit1] = 1
    y[y == digit2] = -1

    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = Adaboost(n_estimators=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_pred, y_test)

    clf_tree = ClassificationTree()
    clf_tree.fit(X_train, y_train)
    y_pred_tree = clf_tree.predict(X_test)
    acc_tree = accuracy_score(y_pred, y_test)

    print("Adaboost_Accuracy:", acc)
    print("Tree_Accuracy:", acc_tree)
예제 #2
0
    def fit(self, X, y):
        n_samples, n_features = X.shape[0], X.shape[1]

        # 初始化各样本的权重
        w = np.full(n_samples, (1 / n_samples))
        # 储存每个分类器
        self.clfs = []

        for i in tqdm(range(self.n_clfs)):
            # 实例化一个分类器
            clf = ClassificationTree()
            # 训练
            clf.fit(X, y)
            # 得到训练结果
            y_pred = clf.predict(X)
            # 计算训练误差
            print(accuracy_score(y, y_pred))
            error = sum(w[y != y_pred])
            # 对于错误率大于0.5的分类器,因为是二分类问题(Adaboost只能解决二分类问题),我们可以翻转
            # 分类器的预测结果来使得其错误率为1-error>0.5
            print(error)
            if error > 0.5:
                self.polarity[i] = -1
                y_pred *= -1
                error = 1 - error
            self.alphas[i] = 0.5 * np.log((1.0 - error) / (error + 1e-10))
            predictions = np.array(self.polarity[i] * y_pred)
            w *= np.exp(-self.alphas[i] * y * predictions)
            w /= sum(w)
            self.clfs.append(clf)
예제 #3
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)

    clf = RandomForest(n_estimators=10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)
예제 #4
0
def main():
    
    print ("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, seed=3)  

    clf = XGBoost(n_estimators=20)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)
예제 #5
0
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)
예제 #6
0
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    y = to_categorical(y.astype("int"))
    n_hidden = 512

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    clf = MultilayerPerceptron(n_hidden)
    clf.fit(X_train, y_train)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")
    Plot().plot_in_2d(X_test, y_pred, title="perceptron", accuracy=accuracy, legend_labels=np.unique(y))
예제 #7
0
def main_classifier():

    print("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    print(y_train.shape)

    clf = GradientBoostingClassifier(n_estimators=10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)
예제 #8
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # data preprocess: One-hot encoding of nominal y-values
    y = to_categorical(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid)
    clf.fit(X_train, y_train)

    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    Plot().plot_in_2d(X_test, y_pred, title="perceptron", accuracy=accuracy, legend_labels=np.unique(y))
예제 #9
0
    def evaluate(self, sess, data_manager, id_to_tag):  # 评价slot和intent的分数
        """
        :param sess: session  to run the model
        :param data: list of data
        :param id_to_tag: index to tag name
        :param id_to_intent: index to intent name
        :return: evaluate result
        """
        slot_results = []
        itent_results = []
        trans = self.trans.eval()
        for batch in data_manager.iter_batch():
            strings = batch[0]
            tags = batch[-2]  # 真实的slot标签
            intents = np.asarray(batch[-1])[:, 1]  # 真实的intents标签

            lengths, scores_slot, intent_idx, intent_rank = self.run_step(
                sess, False, batch)

            batch_paths = self.decode(scores_slot, lengths,
                                      trans)  # viterbi算法求出最佳路径

            for i in range(len(strings)):
                result = []
                string = strings[i][:lengths[i]]

                gold = iobes_iob(
                    [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                pred = iobes_iob(
                    [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])

                for char, gold, pred in zip(string, gold, pred):
                    result.append(" ".join([char, gold, pred]))

                slot_results.append(result)

            intent_acc = accuracy_score(intents, intent_idx)
            itent_results.append(intent_acc)

        return slot_results, itent_results
예제 #10
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost_1(n_clfs=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
예제 #11
0
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

from deep.activations_functions import Sigmoid
from deep.loss_functions import CrossEntropy
from deep.perceptron import Perceptron
from utils.utils import accuracy_score


np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)

clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid)
clf.fit(X, y)


x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
tmp = np.c_[xx.ravel(), yy.ravel()]
Z = clf.predict(tmp)
y_pred = np.argmax(Z, axis=1)
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy: {accuracy}")

plt.contourf(xx, yy, y_pred.reshape(xx.shape), cmap=plt.cm.get_cmap("Spectral"))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.get_cmap("Spectral"))
plt.show()
예제 #12
0
 def accuracy(self, y, p):
     return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1))
예제 #13
0
                    self.parameters[c][feature_i]['var'], feature_value)
                    posterior *= likelihood
                else:
                    posterior *= self.parameters[c][feature_i]
            # 储存x为类别c的概率
            posteriors.append(posterior)
        # 返回概率最大的类别
        return self.classes[np.argmax(posteriors)]

    def predict(self, X_test):
        y_pred = [self.get_label(x) for x in X_test]
        return y_pred


if __name__ == '__main__':

    print("-- Navie-Bayes --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = Navie_Bayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)
예제 #14
0
def main(args):
    torch.manual_seed(17)  # Randomly seed PyTorch

    # Load train, dev, test iterators with auto-batching and pretrained vectors
    (train, dev, test, code_vecs,
     comm_vecs) = utils.load_all_data(args.batch_size, args.corpus)

    # Create model
    model = cl.CodeCommClassifier(code_vecs,
                                  comm_vecs,
                                  gpu=args.use_gpu,
                                  model_type=args.model)

    # Load a saved model
    if args.load != "":
        model.load_state_dict(torch.load(args.load))

    # Discover available CUDA devices and use DataParallelism if possible
    devices = list(range(torch.cuda.device_count()))
    if len(devices) > 1:
        print("Using {} GPUs!!".format(len(devices)))
        model = nn.DataParallel(model)

    # Send model to GPU for processing
    if args.use_gpu:
        model.cuda()

    # Do training
    if args.epochs > 0:
        print("Training {} model w/ batch_sz={} on {} dataset".format(
            args.model, args.batch_size, args.corpus))
        # Adam optimizer works, SGD fails to train the network for any batch size
        optimizer = optim.Adam(model.parameters(), args.learning_rate)
        class_weights = torch.tensor([10.])
        if args.use_gpu:
            class_weights = class_weights.cuda()
        for epoch in range(args.epochs):
            model.train()  # Set the model to training mode
            with tqdm(total=len(train),
                      desc="Epoch {}/{}".format(epoch + 1,
                                                args.epochs)) as pbar:
                total_loss = 0
                for b_idx, batch in enumerate(train):
                    # model.zero_grad()
                    optimizer.zero_grad()  # Clear current gradient

                    # Get the label into a tensor for loss prop
                    truth = torch.autograd.Variable(batch.label).float()
                    if args.use_gpu:
                        truth = truth.cuda()

                    # Run the model using the batch
                    outputs = model((batch.code, batch.comm))

                    # Get loss from log(softmax())
                    loss = F.binary_cross_entropy_with_logits(
                        outputs.view(-1),
                        truth.view(-1),
                        pos_weight=class_weights)

                    loss.backward()  # Propagate loss
                    optimizer.step()  # Update the optimizer

                    new_loss = loss.item()
                    total_loss += new_loss
                    curr_loss = total_loss / (b_idx + 1)
                    pbar.set_postfix(batch_loss=curr_loss)
                    pbar.update()

            # Check DEV accuracy after every epoch
            scores = score_dataset(model, dev)
            acc = utils.accuracy_score(scores)
            sys.stdout.write("Epoch {} -- dev acc: {}%\n".format(
                epoch + 1, acc))

    # Save the model weights
    if args.save != "":
        torch.save(model.state_dict(), args.save)

    # Save the DEV set evaluations
    if args.eval_dev:
        print(
            "Evaluating Dev for {} model w/ batch_sz={} on {} dataset".format(
                args.model, args.batch_size, args.corpus))
        scores = score_dataset(model, dev)
        dev_score_path = utils.CODE_CORPUS / "results" / "{}_{}_{}_{}_gpus_scores_dev.pkl".format(
            args.model, args.batch_size, args.corpus, len(devices))
        utils.save_scores(scores, dev_score_path)

    # Save the TEST set evaluations
    if args.eval_test:
        print(
            "Evaluating Dev for {} model w/ batch_sz={} on {} dataset".format(
                args.model, args.batch_size, args.corpus))
        scores = score_dataset(model, test)
        test_score_path = utils.CODE_CORPUS / "results" / "{}_{}_{}_{}_gpus_scores_test.pkl".format(
            args.model, args.batch_size, args.corpus, len(devices))
        utils.save_scores(scores, test_score_path)