示例#1
0
def generate_2D(N, corners):
    if corners:
        print("Generating %dx%d 2-D adjacency system with corners..." %
              (N**2, N**2))
        A = np.zeros((N**2, N**2)) + 8 * np.eye(N**2)
    else:
        print("Generating %dx%d 2-D adjacency system without corners..." %
              (N**2, N**2))
        A = np.zeros((N**2, N**2)) + 4 * np.eye(N**2)
    # These are the same for both cases
    off_one = np.full(N**2 - 1, -1, dtype=np.float64)
    A += np.diag(off_one, k=1)
    A += np.diag(off_one, k=-1)
    off_N = np.full(N * (N - 1), -1, dtype=np.float64)
    A += np.diag(off_N, k=N)
    A += np.diag(off_N, k=-N)
    # If we have corners then we have four more cases
    if corners:
        off_N_plus = np.full(N * (N - 1) - 1, -1, dtype=np.float64)
        A += np.diag(off_N_plus, k=N + 1)
        A += np.diag(off_N_plus, k=-(N + 1))
        off_N_minus = np.full(N * (N - 1) + 1, -1, dtype=np.float64)
        A += np.diag(off_N_minus, k=N - 1)
        A += np.diag(off_N_minus, k=-(N - 1))
    # Then we can generate a random b matrix
    b = np.random.rand(N**2)
    return A, b
示例#2
0
def cross_correlate(x, y, C, K, R, S, B, H, W):
    dw = np.zeros(shape=(R, S, C, K))
    # cross-correlate images to compute weight gradients
    y_pad = np.zeros(shape=(K, B, H + R - 1, W + S - 1))
    y_pad[:, :, R / 2:-(R / 2), S / 2:-(S / 2)] = y
    for r in range(R):
        for s in range(S):
            y_shift = y_pad[:, :, r:r + H, s:s + W]
            for c in range(C):
                for k in range(K):
                    dw[r, s, c,
                       k] = np.sum(x[c, :, :, :] * y_shift[k, :, :, :])
    return dw
示例#3
0
def solve(A, b, conv_iters, max_iters, verbose):
    print("Solving system...")
    x = np.zeros(A.shape[1])
    r = b - A.dot(x)
    p = r
    rsold = r.dot(r)
    converged = -1
    # Should always converge in fewer iterations than this
    max_iters = (min(max_iters, b.shape[0])
                 if max_iters is not None else b.shape[0])
    for i in range(max_iters):
        Ap = A.dot(p)
        alpha = rsold / (p.dot(Ap))
        x = x + alpha * p
        r = r - alpha * Ap
        rsnew = r.dot(r)
        # We only do the convergence test every conv_iters or on the last
        # iteration
        if (i % conv_iters == 0
                or i == (max_iters - 1)) and np.sqrt(rsnew) < 1e-10:
            converged = i
            break
        if verbose:
            print("Residual: " + str(rsnew))
        beta = rsnew / rsold
        p = r + beta * p
        rsold = rsnew
    if converged < 0:
        print("Convergence FAILURE!")
    else:
        print("Converged in %d iterations" % (converged))
    return x
示例#4
0
def logistic_regression(
    T, features, target, steps, learning_rate, sample, add_intercept=False
):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1), dtype=T)
        features = np.hstack((intercept, features))

    weights = np.zeros(features.shape[1], dtype=T)

    for step in range(steps):
        scores = np.dot(features, weights)
        predictions = sigmoid(scores)

        error = target - predictions
        gradient = np.dot(error, features)
        weights += learning_rate * gradient

        if step % sample == 0:
            print(
                "Log Likelihood of step "
                + str(step)
                + ": "
                + str(log_likelihood(features, target, weights))
            )

    return weights
示例#5
0
    def forward(X, WLSTM, c0=None, h0=None):
        """
        X should be of shape (n,b,input_size), where n = length of sequence, b
        = batch size
        """
        n, b, input_size = X.shape
        d = int(WLSTM.shape[1] / 4)  # hidden size
        if c0 is None:
            c0 = np.zeros((b, d))
        if h0 is None:
            h0 = np.zeros((b, d))

        # Perform the LSTM forward pass with X as the input
        xphpb = WLSTM.shape[0]  # x plus h plus bias, lol
        Hin = np.zeros(
            (n, b, xphpb))  # input [1, xt, ht-1] to each tick of the LSTM
        Hout = np.zeros(
            (n, b,
             d))  # hidden representation of the LSTM (gated cell content)
        IFOG = np.zeros((n, b, d * 4))  # input, forget, output, gate (IFOG)
        IFOGf = np.zeros((n, b, d * 4))  # after nonlinearity
        C = np.zeros((n, b, d))  # cell content
        Ct = np.zeros((n, b, d))  # tanh of cell content

        for t in range(n):
            # concat [x,h] as input to the LSTM
            prevh = Hout[t - 1] if t > 0 else h0
            Hin[t, :, 0] = 1  # bias
            Hin[t, :, 1:input_size + 1] = X[t]
            Hin[t, :, input_size + 1:] = prevh
            # compute all gate activations. dots: (most work is this line)
            IFOG[t] = Hin[t].dot(WLSTM)
            # non-linearities
            IFOGf[t, :, :3 * d] = 1.0 / (1.0 + np.exp(-IFOG[t, :, :3 * d])
                                         )  # sigmoids; these are the gates
            IFOGf[t, :, 3 * d:] = np.tanh(IFOG[t, :, 3 * d:])  # tanh
            # compute the cell activation
            prevc = C[t - 1] if t > 0 else c0
            C[t] = (IFOGf[t, :, :d] * IFOGf[t, :, 3 * d:] +
                    IFOGf[t, :, d:2 * d] * prevc)
            Ct[t] = np.tanh(C[t])
            Hout[t] = IFOGf[t, :, 2 * d:3 * d] * Ct[t]

        cache = {}
        cache["WLSTM"] = WLSTM
        cache["Hout"] = Hout
        cache["IFOGf"] = IFOGf
        cache["IFOG"] = IFOG
        cache["C"] = C
        cache["Ct"] = Ct
        cache["Hin"] = Hin
        cache["c0"] = c0
        cache["h0"] = h0

        # return C[t], as well so we can continue LSTM with prev state
        # init if needed
        return Hout, C[t], Hout[t], cache
示例#6
0
def initialize(N):
    print("Initializing stencil grid...")
    grid = np.zeros((N + 2, N + 2))
    grid[:, 0] = -273.15
    grid[:, -1] = -273.15
    grid[-1, :] = -273.15
    grid[0, :] = 40.0
    return grid
示例#7
0
def solve(A, b, iters, verbose):
    print("Solving system...")
    x = np.zeros(A.shape[1])
    d = np.diag(A)
    R = A - np.diag(d)
    for i in range(iters):
        x = (b - np.dot(R, x)) / d
    return x
示例#8
0
    def __init__(self, H_size, X_size, z_size, weight_sd):
        self.W_f = Param(
            "W_f", np.random.randn(H_size, z_size) * weight_sd + 0.5
        )
        self.b_f = Param("b_f", np.zeros((H_size, 1)))

        self.W_i = Param(
            "W_i", np.random.randn(H_size, z_size) * weight_sd + 0.5
        )
        self.b_i = Param("b_i", np.zeros((H_size, 1)))

        self.W_C = Param("W_C", np.random.randn(H_size, z_size) * weight_sd)
        self.b_C = Param("b_C", np.zeros((H_size, 1)))

        self.W_o = Param(
            "W_o", np.random.randn(H_size, z_size) * weight_sd + 0.5
        )
        self.b_o = Param("b_o", np.zeros((H_size, 1)))

        # For final layer to predict the next character
        self.W_v = Param("W_v", np.random.randn(X_size, H_size) * weight_sd)
        self.b_v = Param("b_v", np.zeros((X_size, 1)))
def run_kmeans(C, D, T, I, N, S, benchmarking):  # noqa: E741
    print("Running kmeans...")
    print("Number of data points: " + str(N))
    print("Number of dimensions: " + str(D))
    print("Number of centroids: " + str(C))
    print("Max iterations: " + str(I))
    start = datetime.datetime.now()
    data, centroids = initialize(N, D, C, T)

    data_dots = np.square(np.linalg.norm(data, ord=2, axis=1))
    zero_point = np.zeros((1, data.shape[1]), dtype=data.dtype)

    labels = None
    iteration = 0
    prior_distance_sum = None
    # We run for max iterations or until we converge
    # We only test convergence every S iterations
    while iteration < I:
        pairwise_distances = calculate_distances(data, centroids, data_dots)

        new_labels = relabel(pairwise_distances)

        distance_sum = find_centroids(centroids, data, new_labels,
                                      pairwise_distances, zero_point, C, D)

        if iteration > 0 and iteration % S == 0:
            changes = np.not_equal(labels, new_labels)
            total_changes = np.sum(changes)
            delta = distance_sum / prior_distance_sum
            print("Iteration " + str(iteration) + " produced " +
                  str(total_changes) + " changes, and total distance is " +
                  str(distance_sum))
            # We ignore the result of the threshold test in the case
            # that we are running performance benchmarks to measure
            # performance for a certain number of iterations
            if delta > 1 - 0.000001 and not benchmarking:
                print("Threshold triggered, terminating iterations early")
                break
        prior_distance_sum = distance_sum
        labels = new_labels
        iteration += 1
    # This final distance sum also synchronizes the results
    print("Final distance sum at iteration " + str(iteration) + ": " +
          str(prior_distance_sum))
    stop = datetime.datetime.now()
    delta = stop - start
    total = delta.total_seconds() * 1000.0
    print("Elapsed Time: " + str(total) + " ms")
    return total
def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing):
    start = datetime.datetime.now()

    X = np.random.randn(sentence_length, batch_size, hidden_size)
    h0 = np.random.randn(1, hidden_size)
    WLSTM = np.random.randn(
        word_size + hidden_size, 4 * hidden_size
    ) / np.sqrt(word_size + hidden_size)

    xphpb = WLSTM.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    Hin = np.zeros((n, b, xphpb))
    Hout = np.zeros((n, b, d))
    IFOG = np.zeros((n, b, d * 4))
    IFOGf = np.zeros((n, b, d * 4))
    C = np.zeros((n, b, d))
    Ct = np.zeros((n, b, d))

    for t in range(0, n):
        if t == 0:
            prev = np.tile(h0, (b, 1))
        else:
            prev = Hout[t - 1]

        Hin[t, :, :word_size] = X[t]
        Hin[t, :, word_size:] = prev
        # compute all gate activations. dots:
        IFOG[t] = Hin[t].dot(WLSTM)
        # non-linearities
        IFOGf[t, :, : 3 * d] = 1.0 / (
            1.0 + np.exp(-IFOG[t, :, : 3 * d])
        )  # sigmoids these are the gates
        IFOGf[t, :, 3 * d :] = np.tanh(IFOG[t, :, 3 * d :])  # tanh
        # compute the cell activation
        C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d :]
        if t > 0:
            C[t] += IFOGf[t, :, d : 2 * d] * C[t - 1]
        Ct[t] = np.tanh(C[t])
        Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t]

    # Do a little sum of the outputs to synchronize and check for NaNs
    total = np.sum(Hout)
    assert not math.isnan(total)

    stop = datetime.datetime.now()
    delta = stop - start
    total = delta.total_seconds() * 1000.0
    if timing:
        print("Elapsed Time: " + str(total) + " ms")
    return total
示例#11
0
def test():
    np.random.seed(50)
    datanp = np.random.randn(2000000, 3)
    data = lg.array(datanp)
    pointsnp = np.random.choice(lg.arange(len(data)), 4, False)
    points = lg.array(pointsnp)

    centroids = data[points]
    centroidsnp = datanp[pointsnp]
    sqdists = lg.zeros((4, len(data)))
    sqdistsnp = np.zeros((4, len(datanp)))
    for i in range(4):
        vec = data - centroids[i]
        vecnp = datanp - centroidsnp[i]
        sqdists[i] = lg.linalg.norm(vec, axis=1)
        sqdistsnp[i] = np.linalg.norm(vecnp, axis=1)

    clusters = lg.argmin(sqdists, axis=0)
    clustersnp = np.argmin(sqdistsnp, axis=0)
    assert lg.array_equal(lg.where(clusters == 0), np.where(clustersnp == 0))
示例#12
0
def test():
    x = lg.array([[1, 2], [3, 4], [5, 6]])
    assert lg.array_equal(x[[0, 1, 2], [0, 1, 0]], [1, 4, 5])

    x = lg.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
    rows = lg.array([0, 3])
    columns = lg.array([0, 2])
    assert lg.array_equal(x[rows[:, np.newaxis], columns], [[0, 2], [9, 11]])

    zg = lg.array([[-1.2 + 0.5j, 1.2 - 2j], [-2.2 + 3.5j, 4.2 - 6.2j]])
    m = lg.array([[True, False], [False, True]])
    assert lg.array_equal(zg[m], [-1.2 + 0.5j, 4.2 - 6.2j])

    anp = np.array([[[2, 1], [3, 2]], [[2, 4], [4, 1]]])
    a = lg.array(anp)
    nznp = anp < 3
    nzgp = a < 3
    assert lg.array_equal(anp[nznp], a[nzgp])

    y = lg.array([[[True, True], [False, True]], [[True, False], [False,
                                                                  True]]])
    z = lg.nonzero(y)
    assert lg.array_equal(a[z], lg.array([2, 1, 2, 2, 1]))

    np.random.seed(42)
    anp = np.random.randn(10, 10, 4)
    a = lg.array(anp)
    bnp = np.array([3, 4, 6])
    cnp = np.array([1, 4, 5])
    b = lg.array(bnp)
    c = lg.array(cnp)

    assert lg.array_equal(a[b], anp[bnp])
    assert lg.array_equal(a[(b, c)], anp[(b, c)])

    onesnp = np.zeros(10, int)
    ones = lg.zeros(10, int)

    dnp = np.random.randn(20, 4)
    d = lg.array(dnp)
    assert lg.array_equal(dnp[np.where(onesnp)], d[lg.where(ones)])
def test():
    x = lg.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    x[0:5] = lg.array([11, 12, 13, 14, 15])
    x[5:10] = lg.array([16, 17, 18, 19, 20])
    x[4:8] = lg.array([21, 22, 23, 24])
    assert np.array_equal(x[5:10], [22, 23, 24, 19, 20])
    assert np.array_equal(x, [11, 12, 13, 14, 21, 22, 23, 24, 19, 20])

    anp = np.zeros((5, 6))
    bnp = np.random.random((5, 4))
    cnp = np.random.random((5, 2))
    a = lg.zeros((5, 6))
    b = lg.array(bnp)
    c = lg.array(cnp)
    a[:, :4] = b
    a[:, 0] = 1
    a[:, 3:5] = c
    anp[:, :4] = bnp
    anp[:, 0] = 1
    anp[:, 3:5] = cnp
    assert np.array_equal(a, anp)

    dnp = np.random.random((2, 3, 4))
    enp = np.random.random((2, 3, 4))
    fnp = np.random.random((3, 2))
    d = lg.array(dnp)
    e = lg.array(enp)
    f = lg.array(fnp)
    d[1, :, 0] = 1
    d[1, :, 1:3] = f
    d[0] = e[1]
    dnp[1, :, 0] = 1
    dnp[1, :, 1:3] = fnp
    dnp[0] = enp[1]
    assert np.array_equal(d, dnp)

    return
示例#14
0
def test():

    height = 10
    width = 10
    grid = lg.zeros((height + 2, width + 2), np.float32)
    grid[:, 0] = -273.15
    grid[:, -1] = -273.15
    grid[-1, :] = -273.15
    grid[0, :] = 40.0
    center = grid[1:-1, 1:-1]
    north = grid[0:-2, 1:-1]
    east = grid[1:-1, 2:]
    west = grid[1:-1, 0:-2]
    south = grid[2:, 1:-1]
    for i in range(2):
        average = center + north + east + west + south
        work = 0.2 * average
        delta = lg.sum(lg.absolute(work - center))
        center[:] = work
    npGrid = np.zeros((height + 2, width + 2), np.float32)
    npGrid[:, 0] = -273.15
    npGrid[:, -1] = -273.15
    npGrid[-1, :] = -273.15
    npGrid[0, :] = 40.0
    npcenter = npGrid[1:-1, 1:-1]
    npnorth = npGrid[0:-2, 1:-1]
    npeast = npGrid[1:-1, 2:]
    npwest = npGrid[1:-1, 0:-2]
    npsouth = npGrid[2:, 1:-1]
    for i in range(2):
        npaverage = npcenter + npnorth + npeast + npwest + npsouth
        npwork = 0.2 * npaverage
        nptemp = np.absolute(npwork - npcenter)
        npdelta = np.sum(nptemp)
        npcenter[:] = npwork
    assert np.allclose(delta, npdelta)
    return
def test():
    word_size = 10
    hidden_size = 10
    sentence_length = 2
    batch_size = 3
    X = np.random.randn(sentence_length, batch_size, hidden_size)
    h0 = np.random.randn(1, hidden_size)
    WLSTM = np.random.randn(word_size + hidden_size,
                            4 * hidden_size) / np.sqrt(word_size + hidden_size)

    xphpb = WLSTM.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    Hin = np.zeros((n, b, xphpb))
    Hout = np.zeros((n, b, d))
    IFOG = np.zeros((n, b, d * 4))
    IFOGf = np.zeros((n, b, d * 4))
    C = np.zeros((n, b, d))
    Ct = np.zeros((n, b, d))

    for t in range(0, n):
        if t == 0:
            prev = np.tile(h0, (b, 1))
        else:
            prev = Hout[t - 1]

        Hin[t, :, :word_size] = X[t]
        Hin[t, :, word_size:] = prev
        # compute all gate activations. dots:
        IFOG[t] = Hin[t].dot(WLSTM)
        # non-linearities
        IFOGf[t, :, :3 * d] = 1.0 / (1.0 + np.exp(-IFOG[t, :, :3 * d])
                                     )  # sigmoids these are the gates
        IFOGf[t, :, 3 * d:] = np.tanh(IFOG[t, :, 3 * d:])  # tanh
        # compute the cell activation
        C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d:]
        if t > 0:
            C[t] += IFOGf[t, :, d:2 * d] * C[t - 1]
        Ct[t] = np.tanh(C[t])
        Hout[t] = IFOGf[t, :, 2 * d:3 * d] * Ct[t]

    return
示例#16
0
def linear_regression(T,
                      features,
                      target,
                      steps,
                      learning_rate,
                      sample,
                      add_intercept=False):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1), dtype=T)
        features = np.hstack((intercept, features))

    weights = np.zeros(features.shape[1], dtype=T)

    for step in range(steps):
        scores = np.dot(features, weights)
        error = scores - target
        gradient = -(1.0 / len(features)) * error.dot(features)
        weights += learning_rate * gradient

        if step % sample == 0:
            print("Error of step " + str(step) + ": " +
                  str(np.sum(np.power(error, 2))))

    return weights
示例#17
0
def testtion():

    word_size = 10
    hidden_size = 10
    sentence_length = 5
    batch_size = 3
    lg.random.seed(42)

    WLSTM = lg.random.randn(word_size + hidden_size,
                            4 * hidden_size) / lg.sqrt(word_size + hidden_size)

    xphpb = WLSTM.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    dHout = lg.random.randn(n, b, d)
    IFOGf = lg.random.randn(n, b, d * 4)
    C = lg.random.randn(n, b, d)
    Ct = lg.random.randn(n, b, d)
    Hin = lg.random.randn(n, b, xphpb)

    dIFOG = lg.zeros((n, b, d * 4))
    dIFOGf = lg.zeros(IFOGf.shape)
    dHin = lg.zeros(Hin.shape)
    dC = lg.zeros(C.shape)
    dh0 = lg.zeros((1, d))

    for t in reversed(range(n)):
        tanhCt = Ct[t]
        dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t]
        # backprop tanh non-linearity first then continue backprop
        dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t])

        if t > 0:
            dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t]
            dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t]

        dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t]
        dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t]

        # backprop activation functions
        dIFOG[t, :,
              3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:]
        y = IFOGf[t, :, :3 * d]
        dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d]

        # backprop matrix multiply
        dHin[t] = dIFOG[t].dot(WLSTM.transpose())

        # backprop the identity transforms into Hin
        if t > 0:
            dHout[t - 1, :] += dHin[t, :, word_size:]
        else:
            dh0[0] += lg.sum(dHin[t, :, word_size:], 0)

    np.random.seed(42)

    WLSTM = np.random.randn(word_size + hidden_size,
                            4 * hidden_size) / np.sqrt(word_size + hidden_size)

    xphpb = WLSTM.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    dHout = np.random.randn(n, b, d)
    IFOGf = np.random.randn(n, b, d * 4)
    C = np.random.randn(n, b, d)
    Ct = np.random.randn(n, b, d)
    Hin = np.random.randn(n, b, xphpb)

    dIFOG = np.zeros((n, b, d * 4))
    dIFOGf = np.zeros(IFOGf.shape)
    dHin = np.zeros(Hin.shape)
    dC = np.zeros(C.shape)
    dhnp0 = np.zeros((1, d))

    for t in reversed(range(n)):
        tanhCt = Ct[t]
        dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t]
        # backprop tanh non-linearity first then continue backprop
        dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t])

        if t > 0:
            dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t]
            dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t]

        dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t]
        dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t]

        # backprop activation functions
        dIFOG[t, :,
              3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:]
        y = IFOGf[t, :, :3 * d]
        dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d]

        # backprop matrix multiply
        dHin[t] = dIFOG[t].dot(WLSTM.transpose())

        # backprop the identity transforms into Hin
        if t > 0:
            dHout[t - 1, :] += dHin[t, :, word_size:]
        else:
            dhnp0[0] += np.sum(dHin[t, :, word_size:], 0)

    assert np.allclose(dh0[0], dhnp0[0])
示例#18
0
    def backward(dHout_in, cache, dcn=None, dhn=None):

        WLSTM = cache["WLSTM"]
        Hout = cache["Hout"]
        IFOGf = cache["IFOGf"]
        IFOG = cache["IFOG"]
        C = cache["C"]
        Ct = cache["Ct"]
        Hin = cache["Hin"]
        c0 = cache["c0"]
        # h0 = cache["h0"]
        n, b, d = Hout.shape
        input_size = WLSTM.shape[0] - d - 1  # -1 due to bias

        # backprop the LSTM
        dIFOG = np.zeros(IFOG.shape)
        dIFOGf = np.zeros(IFOGf.shape)
        dWLSTM = np.zeros(WLSTM.shape)
        dHin = np.zeros(Hin.shape)
        dC = np.zeros(C.shape)
        dX = np.zeros((n, b, input_size))
        dh0 = np.zeros((b, d))
        dc0 = np.zeros((b, d))
        dHout = (dHout_in.copy()
                 )  # make a copy so we don't have any funny side effects
        if dcn is not None:
            dC[n - 1] += dcn.copy()  # carry over gradients from later
        if dhn is not None:
            dHout[n - 1] += dhn.copy()
        for t in reversed(range(n)):

            tanhCt = Ct[t]
            dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t]
            # backprop tanh non-linearity first then continue backprop
            dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t])
            if t > 0:
                dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t]
                dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t]
            else:
                dIFOGf[t, :, d:2 * d] = c0 * dC[t]
                dc0 = IFOGf[t, :, d:2 * d] * dC[t]
            dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t]
            dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t]

            # backprop activation functions
            dIFOG[t, :,
                  3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:]
            y = IFOGf[t, :, :3 * d]
            dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d]

            # backprop matrix multiply
            dWLSTM += np.dot(Hin[t].transpose(), dIFOG[t])
            dHin[t] = dIFOG[t].dot(WLSTM.transpose())

            # backprop the identity transforms into Hin
            dX[t] = dHin[t, :, 1:input_size + 1]
            if t > 0:
                dHout[t - 1, :] += dHin[t, :, input_size + 1:]
            else:
                dh0 += dHin[t, :, input_size + 1:]

        return dX, dWLSTM, dc0, dh0
示例#19
0
def checkSequentialMatchesBatch():
    """ check LSTM I/O forward/backward interactions """

    n, b, d = (5, 3, 4)  # sequence length, batch size, hidden size
    input_size = 10
    WLSTM = LSTM.init(input_size, d)  # input size, hidden size
    X = np.random.randn(n, b, input_size)
    h0 = np.random.randn(b, d)
    c0 = np.random.randn(b, d)

    # sequential forward
    cprev = c0
    hprev = h0
    caches = [{} for t in range(n)]
    Hcat = np.zeros((n, b, d))

    for t in range(n):
        xt = X[t:t + 1]
        _, cprev, hprev, cache = LSTM.forward(xt, WLSTM, cprev, hprev)
        caches[t] = cache
        Hcat[t] = hprev

    # sanity check: perform batch forward to check that we get the same thing
    H, _, _, batch_cache = LSTM.forward(X, WLSTM, c0, h0)

    assert np.allclose(H, Hcat), "Sequential and Batch forward don" "t match!"

    # eval loss
    wrand = np.random.randn(*Hcat.shape)
    # loss = np.sum(Hcat * wrand)
    dH = wrand

    # get the batched version gradients
    BdX, BdWLSTM, Bdc0, Bdh0 = LSTM.backward(dH, batch_cache)

    # now perform sequential backward
    dX = np.zeros_like(X)
    dWLSTM = np.zeros_like(WLSTM)
    dc0 = np.zeros_like(c0)
    dh0 = np.zeros_like(h0)
    dcnext = None
    dhnext = None
    for t in reversed(range(n)):
        dht = dH[t].reshape((1, b, d))
        # print("dht")
        # print(dht.shape)
        # print(dht[0])
        dx, dWLSTMt, dcprev, dhprev = LSTM.backward(dht, caches[t], dcnext,
                                                    dhnext)
        dhnext = dhprev
        dcnext = dcprev

        dWLSTM += dWLSTMt  # accumulate LSTM gradient
        dX[t] = dx[0]
        if t == 0:
            dc0 = dcprev
            dh0 = dhprev

    # and make sure the gradients match
    print(
        "Making sure batched version agrees with sequential version: (should "
        "all be True)")
    print(np.allclose(BdX, dX))
    print(np.allclose(BdWLSTM, dWLSTM))
    print(np.allclose(Bdc0, dc0))
    print(np.allclose(Bdh0, dh0))
def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing):
    start = datetime.datetime.now()

    WLSTM = np.random.randn(word_size + hidden_size,
                            4 * hidden_size) / np.sqrt(word_size + hidden_size)

    xphpb = WLSTM.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    dHout = np.random.randn(n, b, d)
    IFOGf = np.random.randn(n, b, d * 4)
    C = np.random.randn(n, b, d)
    Ct = np.random.randn(n, b, d)
    Hin = np.random.randn(n, b, xphpb)

    dIFOG = np.zeros((n, b, d * 4))
    dIFOGf = np.zeros(IFOGf.shape)
    dHin = np.zeros(Hin.shape)
    dC = np.zeros(C.shape)
    dh0 = np.zeros((1, d))

    for t in reversed(range(n)):
        tanhCt = Ct[t]
        dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t]
        # backprop tanh non-linearity first then continue backprop
        dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t])

        if t > 0:
            dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t]
            dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t]

        dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t]
        dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t]

        # backprop activation functions
        dIFOG[t, :,
              3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:]
        y = IFOGf[t, :, :3 * d]
        dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d]

        # backprop matrix multiply
        dHin[t] = dIFOG[t].dot(WLSTM.transpose())

        # backprop the identity transforms into Hin
        if t > 0:
            dHout[t - 1, :] += dHin[t, :, word_size:]
        else:
            dh0[0] += np.sum(dHin[t, :, word_size:], 0)

    # Do a little sum to synchronize and check for NaNs
    total = np.sum(dh0)
    assert not math.isnan(total)

    stop = datetime.datetime.now()
    delta = stop - start
    total = delta.total_seconds() * 1000.0
    if timing:
        print("Elapsed Time: " + str(total) + " ms")
    return total
示例#21
0
def testtion():

    word_size = 10
    hidden_size = 10
    sentence_length = 5
    batch_size = 3

    np.random.seed(42)

    WLSTM_np = np.random.randn(
        word_size + hidden_size, 4 * hidden_size
    ) / np.sqrt(word_size + hidden_size)

    xphpb = WLSTM_np.shape[0]
    d = hidden_size
    n = sentence_length
    b = batch_size

    WLSTM_lg = lg.array(WLSTM_np)

    dHout_np = np.random.randn(n, b, d)
    IFOGf_np = np.random.randn(n, b, d * 4)
    C_np = np.random.randn(n, b, d)
    Ct_np = np.random.randn(n, b, d)
    Hin_np = np.random.randn(n, b, xphpb)

    dIFOG_np = np.zeros((n, b, d * 4))
    dIFOGf_np = np.zeros(IFOGf_np.shape)
    dHin_np = np.zeros(Hin_np.shape)
    dC_np = np.zeros(C_np.shape)
    dh0_np = np.zeros((1, d))

    dHout_lg = lg.array(dHout_np)
    IFOGf_lg = lg.array(IFOGf_np)
    C_lg = lg.array(C_np)
    Ct_lg = lg.array(Ct_np)
    Hin_lg = lg.array(Hin_np)

    dIFOG_lg = lg.zeros((n, b, d * 4))
    dIFOGf_lg = lg.zeros(IFOGf_lg.shape)
    dHin_lg = lg.zeros(Hin_lg.shape)
    dC_lg = lg.zeros(C_lg.shape)
    dh0_lg = lg.zeros((1, d))

    for t in reversed(range(n)):
        tanhCt_np = Ct_np[t]
        tanhCt_lg = Ct_lg[t]
        # assert lg.allclose(tanhCt_np, tanhCt_lg)

        dIFOGf_np[t, :, 2 * d : 3 * d] = tanhCt_np * dHout_np[t]
        dIFOGf_lg[t, :, 2 * d : 3 * d] = tanhCt_lg * dHout_lg[t]
        # assert lg.allclose(dIFOGf_np[t,:,2*d:3*d], dIFOGf_lg[t,:,2*d:3*d])

        # backprop tanh non-linearity first then continue backprop
        dC_np[t] += (1 - tanhCt_np ** 2) * (
            IFOGf_np[t, :, 2 * d : 3 * d] * dHout_np[t]
        )
        dC_lg[t] += (1 - tanhCt_lg ** 2) * (
            IFOGf_lg[t, :, 2 * d : 3 * d] * dHout_lg[t]
        )
        # assert lg.allclose(dC_np[t], dC_lg[t])

        if t > 0:
            dIFOGf_np[t, :, d : 2 * d] = C_np[t - 1] * dC_np[t]
            dIFOGf_lg[t, :, d : 2 * d] = C_lg[t - 1] * dC_lg[t]
            # assert lg.allclose(dIFOGf_np[t,:,d:2*d], dIFOGf_lg[t,:,d:2*d])

            dC_np[t - 1] += IFOGf_np[t, :, d : 2 * d] * dC_np[t]
            dC_lg[t - 1] += IFOGf_lg[t, :, d : 2 * d] * dC_lg[t]
            # assert lg.allclose(dC_np[t-1], dC_lg[t-1])

        dIFOGf_np[t, :, :d] = IFOGf_np[t, :, 3 * d :] * dC_np[t]
        dIFOGf_lg[t, :, :d] = IFOGf_lg[t, :, 3 * d :] * dC_lg[t]
        # assert lg.allclose(dIFOGf_np[t,:,:d], dIFOGf_lg[t,:,:d])

        dIFOGf_np[t, :, 3 * d :] = IFOGf_np[t, :, :d] * dC_np[t]
        dIFOGf_lg[t, :, 3 * d :] = IFOGf_lg[t, :, :d] * dC_lg[t]
        # assert lg.allclose(dIFOGf_np, dIFOGf_lg)

        # backprop activation functions
        dIFOG_np[t, :, 3 * d :] = (
            1 - IFOGf_np[t, :, 3 * d :] ** 2
        ) * dIFOGf_np[t, :, 3 * d :]
        dIFOG_lg[t, :, 3 * d :] = (
            1 - IFOGf_lg[t, :, 3 * d :] ** 2
        ) * dIFOGf_lg[t, :, 3 * d :]
        # assert lg.allclose(dIFOG_np[t,:,3*d:], dIFOG_lg[t,:,3*d:])

        y_np = IFOGf_np[t, :, : 3 * d]
        y_lg = IFOGf_lg[t, :, : 3 * d]
        # assert lg.allclose(y_np, y_lg)

        dIFOG_np[t, :, : 3 * d] = (y_np * (1.0 - y_np)) * dIFOGf_np[
            t, :, : 3 * d
        ]
        dIFOG_lg[t, :, : 3 * d] = (y_lg * (1.0 - y_lg)) * dIFOGf_lg[
            t, :, : 3 * d
        ]
        # assert lg.allclose(dIFOG_np[t,:,:3*d], dIFOG_lg[t,:,:3*d])

        # backprop matrix multiply
        dHin_np[t] = dIFOG_np[t].dot(WLSTM_np.transpose())
        dHin_lg[t] = dIFOG_lg[t].dot(WLSTM_lg.transpose())
        # assert lg.allclose(dHin_np[t], dHin_lg[t])

        # backprop the identity transforms into Hin
        if t > 0:
            dHout_np[t - 1, :] += dHin_np[t, :, word_size:]
            dHout_lg[t - 1, :] += dHin_lg[t, :, word_size:]
            # assert lg.allclose(dHout_np[t-1,:], dHout_lg[t-1,:])
        else:
            dh0_np[0] += np.sum(dHin_np[t, :, word_size:], 0)
            dh0_lg[0] += lg.sum(dHin_lg[t, :, word_size:], 0)
            # Check this one at the end
    # print(dh0_np[0])
    # print(dh0_lg[0])
    assert np.allclose(dh0_np[0], dh0_lg[0])
示例#22
0
def run_lstm(
    file_name,
    H_size,
    T_steps,
    max_iters,
    learning_rate,
    weight_sd,
    dump,
    timing,
):
    with open(file_name, "r") as f:
        data = f.read()
        chars = list(set(data))
        data_size, X_size = len(data), len(chars)
        print("data has %d characters, %d unique" % (data_size, X_size))
        char_to_idx = {ch: i for i, ch in enumerate(chars)}

    z_size = H_size + X_size  # Size of concatenate(H, X) vector

    parameters = Parameters(H_size, X_size, z_size, weight_sd)

    # Exponential average of loss
    # Initialize to a error of a random model
    smooth_loss = -np.log(1.0 / X_size) * T_steps

    pointer = 0

    start = datetime.datetime.now()

    for iteration in range(max_iters):
        # Reset
        if pointer + T_steps >= len(data) or iteration == 0:
            g_h_prev = np.zeros((H_size, 1))
            g_C_prev = np.zeros((H_size, 1))
            pointer = 0

        inputs = [char_to_idx[ch] for ch in data[pointer : pointer + T_steps]]
        targets = [
            char_to_idx[ch] for ch in data[pointer + 1 : pointer + T_steps + 1]
        ]

        loss, g_h_prev, g_C_prev = forward_backward(
            inputs,
            targets,
            g_h_prev,
            g_C_prev,
            T_steps,
            H_size,
            X_size,
            parameters,
        )
        smooth_loss = smooth_loss * 0.999 + loss * 0.001

        # Print every hundred steps
        if iteration % dump == 0:
            update_status(iteration, smooth_loss)

        update_parameters(learning_rate, parameters)

        pointer += T_steps
    update_status(max_iters, smooth_loss)

    stop = datetime.datetime.now()
    delta = stop - start
    total = delta.total_seconds() * 1000.0
    if timing:
        print("Elapsed Time: " + str(total) + " ms")
    return total
示例#23
0
def forward_backward(
    inputs, targets, h_prev, C_prev, T_steps, H_size, X_size, parameters
):
    # To store the values for each time step
    x_s, z_s, f_s, i_s, = (
        {},
        {},
        {},
        {},
    )
    C_bar_s, C_s, o_s, h_s = {}, {}, {}, {}
    v_s, y_s = {}, {}

    # Values at t - 1
    h_s[-1] = np.copy(h_prev)
    C_s[-1] = np.copy(C_prev)

    loss = 0
    # Loop through time steps
    assert len(inputs) == T_steps
    for t in range(len(inputs)):
        x_s[t] = np.zeros((X_size, 1))
        x_s[t][inputs[t]] = 1  # Input character

        (
            z_s[t],
            f_s[t],
            i_s[t],
            C_bar_s[t],
            C_s[t],
            o_s[t],
            h_s[t],
            v_s[t],
            y_s[t],
        ) = forward(
            x_s[t], h_s[t - 1], C_s[t - 1], H_size, X_size, parameters
        )  # Forward pass

        loss += -np.log(y_s[t][targets[t], 0])  # Loss for at t

    clear_gradients(parameters)

    dh_next = np.zeros_like(h_s[0])  # dh from the next character
    dC_next = np.zeros_like(C_s[0])  # dh from the next character

    for t in reversed(range(len(inputs))):
        # Backward pass
        dh_next, dC_next = backward(
            target=targets[t],
            dh_next=dh_next,
            dC_next=dC_next,
            C_prev=C_s[t - 1],
            H_size=H_size,
            X_size=X_size,
            z=z_s[t],
            f=f_s[t],
            i=i_s[t],
            C_bar=C_bar_s[t],
            C=C_s[t],
            o=o_s[t],
            h=h_s[t],
            v=v_s[t],
            y=y_s[t],
            p=parameters,
        )

    clip_gradients(parameters)

    return loss, h_s[len(inputs) - 1], C_s[len(inputs) - 1]
示例#24
0
def test():
    x = lg.array([1, 2, 3])
    y = np.array([1, 2, 3])
    z = lg.array(y)
    assert np.array_equal(x, z)
    assert x.dtype == z.dtype

    xe = lg.empty((2, 3))
    ye = np.empty((2, 3))
    assert lg.shape(xe) == np.shape(ye)
    assert xe.dtype == ye.dtype

    xz = lg.zeros((2, 3))
    yz = np.zeros((2, 3))
    assert np.array_equal(xz, yz)
    assert xz.dtype == yz.dtype

    xo = lg.ones((2, 3))
    yo = np.ones((2, 3))
    assert np.array_equal(xo, yo)
    assert xo.dtype == yo.dtype

    xf = lg.full((2, 3), 3)
    yf = np.full((2, 3), 3)
    assert np.array_equal(xf, yf)
    assert xf.dtype == yf.dtype

    xel = lg.empty_like(x)
    yel = np.empty_like(y)
    assert lg.shape(xel) == np.shape(yel)
    assert xel.dtype == yel.dtype

    xzl = lg.zeros_like(x)
    yzl = np.zeros_like(y)
    assert np.array_equal(xzl, yzl)
    assert xzl.dtype == yzl.dtype

    xol = lg.ones_like(x)
    yol = np.ones_like(y)
    assert np.array_equal(xol, yol)
    assert xol.dtype == yol.dtype

    xfl = lg.full_like(x, 3)
    yfl = np.full_like(y, 3)
    assert np.array_equal(xfl, yfl)
    assert xfl.dtype == yfl.dtype

    x = lg.arange(10)
    y = np.arange(10)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(10, dtype=np.int32)
    y = np.arange(10, dtype=np.int32)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(2.0, 10.0)
    y = np.arange(2.0, 10.0)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(2, 30, 3)
    y = np.arange(2, 30, 3)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    # xfls = lg.full_like(x, '3', dtype=np.str_)
    # yfls = np.full_like(y, '3', dtype=np.str_)
    # assert(lg.array_equal(xfls, yfls))
    # assert(xfls.dtype == yfls.dtype)

    return
示例#25
0
def initialize(M, N, K, ft):
    A = np.random.rand(N, N).astype(ft)
    B = np.random.rand(N, N).astype(ft)
    C = np.zeros((N, N), dtype=ft)
    return A, B, C