示例#1
0
    def get(self):
        """Gets the Sentiment Labelled Sentences dataset (sparse).

    Returns:
      x_train: scipy.sparse.*matrix
        array of features of training data
      y_train: np.array
        1-D array of class labels of training data
      x_test: scipy.sparse.*matrix
        array of features of test data
      y_test: np.array
          1-D array of class labels of the test data
    """
        data_path = '{}/{}'.format(FILE_PATH, 'sentiment_sentences')
        with tf.gfile.GFile(
                '{}/{}'.format(data_path, 'amazon_cells_labelled.txt'),
                'r') as f:
            amazon_df = pd.read_csv(f,
                                    sep='\t',
                                    header=None,
                                    quoting=csv.QUOTE_NONE)
        with tf.gfile.GFile('{}/{}'.format(data_path, 'imdb_labelled.txt'),
                            'r') as f:
            imdb_df = pd.read_csv(f,
                                  sep='\t',
                                  header=None,
                                  quoting=csv.QUOTE_NONE)
        with tf.gfile.GFile('{}/{}'.format(data_path, 'yelp_labelled.txt'),
                            'r') as f:
            yelp_df = pd.read_csv(f,
                                  sep='\t',
                                  header=None,
                                  quoting=csv.QUOTE_NONE)

        df = pd.concat([amazon_df, imdb_df, yelp_df])

        x = df[0].values
        y = df[1].values

        x, y = shuffle_arrays(x, y, random_state=RANDOM_STATE)

        train_test_split = 1000
        x_train = x[train_test_split:]
        y_train = y[train_test_split:]
        x_test = x[:train_test_split]
        y_test = y[:train_test_split]

        x_train, x_test = vectorize_text(x_train,
                                         x_test,
                                         method=self.vectorizer)

        return x_train, y_train, x_test, y_test
示例#2
0
def train(X_train,
          y_train,
          epochs=5,
          learning_rate=0.001,
          shuffle=True,
          verbose=True,
          plotting=True):
    """
    Train a perceptron using Widrow-Hoff

    :param X_train: training data
    :param y_train: training labels
    :param epochs: number of epochs
    :param learning_rate: the learning rate
    :param shuffle: if we want to shuffle the data at the beginning of each epoch
    :param verbose: if we want to show the accuracy on the training data after each epoch
    :param plotting: if we want to plot the decision boundary at each epoch
    :return: return trained W and b
    """
    num_weighs = X_train.shape[1]
    miu = 0.0
    sigma = 0.15
    W = np.random.normal(miu, sigma, num_weighs)
    b = np.zeros(1)

    for e in range(epochs):
        if shuffle:
            X_train, y_train = shuffle_arrays(X_train, y_train)
        for x, y in zip(X_train, y_train):
            y_hat = np.dot(x, W) + b
            # loss = (y - y_hat) * (y - y_hat) / 2.0
            W = W - learning_rate * (y_hat - y) * x
            b = b - learning_rate * (y_hat - y)

        if plotting:
            plt.title("Epoch " + str(e + 1))
            plot_decision_boundary(X_train, y_train, W, b, x, y)
        if verbose:
            print(f'accuracy {e}:',
                  (y_train == np.round(sigmoid(X_train.dot(W) + b))).mean())

    if verbose:
        print('accuracy',
              (y_train == np.round(sigmoid(X_train.dot(W) + b))).mean())
示例#3
0
def train_nn(X_train,
             y_train,
             epochs=5,
             learning_rate=0.01,
             shuffle=True,
             verbose=True,
             num_hidden_neurons=5):
    miu = 0.0

    # randomly generating the hidden layer weights matrix
    W_1 = np.random.normal(miu, 1.0 / num_hidden_neurons,
                           (X_train.shape[1], num_hidden_neurons))
    # 2 - input data dimension
    # num_hidden_neurons - number of neurons on the hidden layer
    # with mean = miu and standard deviation = sigma (1/num_hidden_neurons - Xavier initialization)
    b_1 = np.zeros(num_hidden_neurons)  # initializing the bias with 0
    # randomly generating the output layer weights matrix
    W_2 = np.random.normal(miu, 1.0 / 1, (num_hidden_neurons, 1))
    # num_hidden_neurons - the number of neurons on the hidden layer
    # 1 - one neuron on the output layer
    # with mean = miu and standard deviation = sigma (1/num_hidden_neurons - Xavier initialization)

    b_2 = np.zeros(1)  # initializing the bias with 0

    for e in range(epochs):
        if shuffle:
            X_train, y_train = shuffle_arrays(X_train, y_train)

        z_1, a_1, z_2, a_2 = forward(X_train, W_1, b_1, W_2, b_2)

        dw_1, db_1, dw_2, db_2 = backward(a_1, a_2, z_1, W_2, X_train, y_train,
                                          X_train.shape[0])

        W_1 -= learning_rate * dw_1
        b_1 -= learning_rate * db_1
        W_2 -= learning_rate * dw_2
        b_2 -= learning_rate * db_2

        if verbose:
            print((np.round(a_2) == y_train).mean())