示例#1
0
文件: lstm.py 项目: pombredanne/minpy
 def print_training_prediction(weights):
     print("Training text                         Predicted text")
     logprobs = np.asarray(pred_fun(weights, train_inputs))
     for t in range(logprobs.shape[1]):
         training_text  = one_hot_to_string(train_inputs[:,t,:])
         predicted_text = one_hot_to_string(logprobs[:,t,:])
         print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))
示例#2
0
 def print_training_prediction(weights):
     print("Training text                         Predicted text")
     logprobs = np.asarray(pred_fun(weights, train_inputs))
     for t in range(logprobs.shape[1]):
         training_text = one_hot_to_string(train_inputs[:, t, :])
         predicted_text = one_hot_to_string(logprobs[:, t, :])
         print(
             training_text.replace('\n', ' ') + "|" +
             predicted_text.replace('\n', ' '))
示例#3
0
def get_training_data():
    """
    Reads input data from the 'data' directory.
    :return: A matrix of form 50000 x 3072
    """
    x_data = None
    y_data = []
    for nr in range(1, 6):
        batch = unpickle('data/data_batch_' + str(nr))
        if x_data is None:
            x_data = np.asarray(batch[b'data'])
            y_data = batch[b'labels']
        else:
            x_data = np.concatenate(
                [np.asarray(x_data),
                 np.asarray(batch[b'data'])], axis=0)
            y_data += batch[b'labels']
    return x_data, y_data
示例#4
0
    def run(self):
        lase_te = 0.0
        lase_pe = 0.0
        te = 0.0
        pe = 0.0

        for i in range(self._iters):
            theta_mk = np.asarray([[0.0] * self._k for _ in range(self._n_docs)])
            psi_kj = np.asarray([[0.0] * self._vocab_n for _ in range(self._k)])
            mj = self._p_dm * np.matmul(self._theta_m_k, self._psi_k_j)
            for m in range(self._n_docs):
                mk = np.matmul(self._n_mj[m, :], self.qz_mjk_m(mj, m))
                p = mk / self._Nm[m]
                theta_mk[m] = np.asarray(p).reshape(self._k)

            den = [0.0] * self._k
            for k in range(self._k):
                p = np.multiply(self._n_mj, np.mat(self.qz_mjk_k(mj, k)))
                dk = np.sum(p)
                den[k] = dk

            for k in range(self._k):
                p = np.multiply(self._n_mj, self.qz_mjk_k(mj, k))
                p = np.sum(p, axis=0) / den[k]
                psi_kj[k] = p

            lase_pe = pe
            lase_te = te
            pe = PLSA.cross_entropy(psi_kj, self._psi_k_j)
            te = PLSA.cross_entropy(theta_mk, self._theta_m_k)

            if (abs(lase_pe - pe) < self._min_delta and abs(lase_te - te) < self._min_delta) or np.isnan(pe) or np.isnan(te):
                break

            self._psi_k_j = psi_kj
            self._theta_m_k = theta_mk
            yield i, te, pe
示例#5
0
    def __init__(self, docs, K=10, iters=10, min_delta=0.01):
        vocabulary = Counter(chain(*docs))
        vocab_idx = sorted(vocabulary.items(), key=lambda x: x[1], reverse=True)
        vocab_idx = dict([(w, idx) for idx, (w, n) in enumerate(vocab_idx)])

        self._k = K
        self._iters = iters
        self._vocab_n = len(vocab_idx)
        self._n_docs = len(docs)
        self._min_delta = min_delta

        _docs = [[vocab_idx[w] for w in doc] for doc in docs]
        self._theta_m_k = PLSA.normalize(np.random.random([self._n_docs, self._k]))
        self._psi_k_j = PLSA.normalize(np.random.random([self._k, self._vocab_n]))
        self._Nm = np.asarray([len(doc) for doc in docs])
        self._p_dm = 1.0 / self._n_docs
        self._n_mj = pd.DataFrame([Counter(doc) for doc in _docs]).fillna(0).values
        self._vocab = dict([(idx, w) for w, idx in vocab_idx.items()])
示例#6
0
def test_numeric():
    # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc',
    # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast',
    # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer',
    # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose',
    # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types',
    # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray',
    # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like',
    # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot',
    # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll',
    # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string',
    # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str',
    # 'set_string_function', 'little_endian', 'require', 'fromiter',
    # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load',
    # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity',
    # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr',
    # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate',
    # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_',
    # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul'
    x = np.arange(6)
    x = x.reshape((2, 3))
    np.zeros_like(x)
    y = np.arange(3, dtype=np.float)
    np.zeros_like(y)
    np.ones(5)
    np.ones((5, ), dtype=np.int)
    np.ones((2, 1))
    s = (2, 2)
    np.ones(s)
    x = np.arange(6)
    x = x.reshape((2, 3))
    np.ones_like(x)
    y = np.arange(3, dtype=np.float)
    np.ones_like(y)
    np.full((2, 2), np.inf)
    x = np.arange(6, dtype=np.int)
    np.full_like(x, 1)
    np.full_like(x, 0.1)
    np.full_like(y, 0.1)
    np.count_nonzero(np.eye(4))
    np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]])
    np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=0)
    np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=1)
    a = [1, 2]
    np.asarray(a)
    a = np.array([1, 2])
    np.asarray(a) is a
    a = np.array([1, 2], dtype=np.float32)
    np.asarray(a, dtype=np.float32) is a
    np.asarray(a, dtype=np.float64) is a
    np.asarray(a) is a
    np.asanyarray(a) is a
    a = [1, 2]
    np.asanyarray(a)
    np.asanyarray(a) is a
    x = np.arange(6).reshape(2, 3)
    np.ascontiguousarray(x, dtype=np.float32)
    x = np.arange(6).reshape(2, 3)
    y = np.asfortranarray(x)
    x = np.arange(6).reshape(2, 3)
    y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F'])
    a = np.array([[1, 2, 3], [4, 5, 6]], order='C')
    np.isfortran(a)
    b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN')
    np.isfortran(b)
    a = np.array([[1, 2, 3], [4, 5, 6]], order='C')
    np.isfortran(a)
    b = a.T
    np.isfortran(b)
    np.isfortran(np.array([1, 2], order='FORTRAN'))
    x = np.arange(6).reshape(2, 3)
    np.argwhere(x > 1)
    x = np.arange(-2, 3)
    np.flatnonzero(x)
    np.correlate([1, 2, 3], [0, 1, 0.5])
    np.correlate([1, 2, 3], [0, 1, 0.5], "same")
    np.correlate([1, 2, 3], [0, 1, 0.5], "full")
    np.correlate([1 + 1j, 2, 3 - 1j], [0, 1, 0.5j], 'full')
    np.correlate([0, 1, 0.5j], [1 + 1j, 2, 3 - 1j], 'full')
    np.convolve([1, 2, 3], [0, 1, 0.5])
    np.convolve([1, 2, 3], [0, 1, 0.5], 'same')
    np.convolve([1, 2, 3], [0, 1, 0.5], 'valid')
    rl = np.outer(np.ones((5, )), np.linspace(-2, 2, 5))
    # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,)))
    # grid = rl + im
    x = np.array(['a', 'b', 'c'], dtype=object)
    np.outer(x, [1, 2, 3])
    a = np.arange(60.).reshape(3, 4, 5)
    b = np.arange(24.).reshape(4, 3, 2)
    c = np.tensordot(a, b, axes=([1, 0], [0, 1]))
    c.shape
    # A slower but equivalent way of computing the same...
    d = np.zeros((5, 2))
    a = np.array(range(1, 9))
    A = np.array(('a', 'b', 'c', 'd'), dtype=object)
    x = np.arange(10)
    np.roll(x, 2)
    x2 = np.reshape(x, (2, 5))
    np.roll(x2, 1)
    np.roll(x2, 1, axis=0)
    np.roll(x2, 1, axis=1)
    a = np.ones((3, 4, 5, 6))
    np.rollaxis(a, 3, 1).shape
    np.rollaxis(a, 2).shape
    np.rollaxis(a, 1, 4).shape
    x = np.zeros((3, 4, 5))
    np.moveaxis(x, 0, -1).shape
    np.moveaxis(x, -1, 0).shape
    np.transpose(x).shape
    np.moveaxis(x, [0, 1], [-1, -2]).shape
    np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape
    x = [1, 2, 3]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1, 2]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1, 2, 0]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1, 2]
    y = [4, 5]
    np.cross(x, y)
    x = np.array([[1, 2, 3], [4, 5, 6]])
    y = np.array([[4, 5, 6], [1, 2, 3]])
    np.cross(x, y)
    np.cross(x, y, axisc=0)
    x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    y = np.array([[7, 8, 9], [4, 5, 6], [1, 2, 3]])
    np.cross(x, y)
    np.cross(x, y, axisa=0, axisb=0)
    # np.array_repr(np.array([1,2]))
    # np.array_repr(np.ma.array([0.]))
    # np.array_repr(np.array([], np.int32))
    x = np.array([1e-6, 4e-7, 2, 3])
    # np.array_repr(x, precision=6, suppress_small=True)
    # np.array_str(np.arange(3))
    a = np.arange(10)
    x = np.arange(4)
    np.set_string_function(lambda x: 'random', repr=False)
    grid = np.indices((2, 3))
    grid.shape
    grid[0]  # row indices
    grid[1]  # column indices
    x = np.arange(20).reshape(5, 4)
    row, col = np.indices((2, 3))
    x[row, col]
    np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
    np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int)
    np.isscalar(3.1)
    np.isscalar([3.1])
    np.isscalar(False)
    # np.binary_repr(3)
    # np.binary_repr(-3)
    # np.binary_repr(3, width=4)
    # np.binary_repr(-3, width=3)
    # np.binary_repr(-3, width=5)
    # np.base_repr(5)
    # np.base_repr(6, 5)
    # np.base_repr(7, base=5, padding=3)
    # np.base_repr(10, base=16)
    # np.base_repr(32, base=16)
    np.identity(3)
    np.allclose([1e10, 1e-7], [1.00001e10, 1e-8])
    np.allclose([1e10, 1e-8], [1.00001e10, 1e-9])
    np.allclose([1e10, 1e-8], [1.0001e10, 1e-9])
    # np.allclose([1.0, np.nan], [1.0, np.nan])
    # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
    np.isclose([1e10, 1e-7], [1.00001e10, 1e-8])
    np.isclose([1e10, 1e-8], [1.00001e10, 1e-9])
    np.isclose([1e10, 1e-8], [1.0001e10, 1e-9])
    # np.isclose([1.0, np.nan], [1.0, np.nan])
    # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
    np.array_equal([1, 2], [1, 2])
    np.array_equal(np.array([1, 2]), np.array([1, 2]))
    np.array_equal([1, 2], [1, 2, 3])
    np.array_equal([1, 2], [1, 4])
    np.array_equiv([1, 2], [1, 2])
    np.array_equiv([1, 2], [1, 3])
    np.array_equiv([1, 2], [[1, 2], [1, 2]])
    np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]])
    np.array_equiv([1, 2], [[1, 2], [1, 3]])
def test_numeric():
    # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc',
    # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast',
    # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer',
    # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose',
    # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types',
    # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray',
    # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like',
    # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot',
    # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll',
    # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string',
    # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str',
    # 'set_string_function', 'little_endian', 'require', 'fromiter',
    # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load',
    # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity',
    # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr',
    # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate',
    # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_',
    # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul'
    x = np.arange(6)
    x = x.reshape((2, 3))
    np.zeros_like(x)
    y = np.arange(3, dtype=np.float)
    np.zeros_like(y)
    np.ones(5)
    np.ones((5,), dtype=np.int)
    np.ones((2, 1))
    s = (2,2)
    np.ones(s)
    x = np.arange(6)
    x = x.reshape((2, 3))
    np.ones_like(x)
    y = np.arange(3, dtype=np.float)
    np.ones_like(y)
    np.full((2, 2), np.inf)
    x = np.arange(6, dtype=np.int)
    np.full_like(x, 1)
    np.full_like(x, 0.1)
    np.full_like(y, 0.1)
    np.count_nonzero(np.eye(4))
    np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]])
    np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=0)
    np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1)
    a = [1, 2]
    np.asarray(a)
    a = np.array([1, 2])
    np.asarray(a) is a
    a = np.array([1, 2], dtype=np.float32)
    np.asarray(a, dtype=np.float32) is a
    np.asarray(a, dtype=np.float64) is a
    np.asarray(a) is a
    np.asanyarray(a) is a
    a = [1, 2]
    np.asanyarray(a)
    np.asanyarray(a) is a
    x = np.arange(6).reshape(2,3)
    np.ascontiguousarray(x, dtype=np.float32)
    x = np.arange(6).reshape(2,3)
    y = np.asfortranarray(x)
    x = np.arange(6).reshape(2,3)
    y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F'])
    a = np.array([[1, 2, 3], [4, 5, 6]], order='C')
    np.isfortran(a)
    b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN')
    np.isfortran(b)
    a = np.array([[1, 2, 3], [4, 5, 6]], order='C')
    np.isfortran(a)
    b = a.T
    np.isfortran(b)
    np.isfortran(np.array([1, 2], order='FORTRAN'))
    x = np.arange(6).reshape(2,3)
    np.argwhere(x>1)
    x = np.arange(-2, 3)
    np.flatnonzero(x)
    np.correlate([1, 2, 3], [0, 1, 0.5])
    np.correlate([1, 2, 3], [0, 1, 0.5], "same")
    np.correlate([1, 2, 3], [0, 1, 0.5], "full")
    np.correlate([1+1j, 2, 3-1j], [0, 1, 0.5j], 'full')
    np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full')
    np.convolve([1, 2, 3], [0, 1, 0.5])
    np.convolve([1,2,3],[0,1,0.5], 'same')
    np.convolve([1,2,3],[0,1,0.5], 'valid')
    rl = np.outer(np.ones((5,)), np.linspace(-2, 2, 5))
    # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,)))
    # grid = rl + im
    x = np.array(['a', 'b', 'c'], dtype=object)
    np.outer(x, [1, 2, 3])
    a = np.arange(60.).reshape(3,4,5)
    b = np.arange(24.).reshape(4,3,2)
    c = np.tensordot(a,b, axes=([1,0],[0,1]))
    c.shape
    # A slower but equivalent way of computing the same...
    d = np.zeros((5,2))
    a = np.array(range(1, 9))
    A = np.array(('a', 'b', 'c', 'd'), dtype=object)
    x = np.arange(10)
    np.roll(x, 2)
    x2 = np.reshape(x, (2,5))
    np.roll(x2, 1)
    np.roll(x2, 1, axis=0)
    np.roll(x2, 1, axis=1)
    a = np.ones((3,4,5,6))
    np.rollaxis(a, 3, 1).shape
    np.rollaxis(a, 2).shape
    np.rollaxis(a, 1, 4).shape
    x = np.zeros((3, 4, 5))
    np.moveaxis(x, 0, -1).shape
    np.moveaxis(x, -1, 0).shape
    np.transpose(x).shape
    np.moveaxis(x, [0, 1], [-1, -2]).shape
    np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape
    x = [1, 2, 3]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1, 2]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1, 2, 0]
    y = [4, 5, 6]
    np.cross(x, y)
    x = [1,2]
    y = [4,5]
    np.cross(x, y)
    x = np.array([[1,2,3], [4,5,6]])
    y = np.array([[4,5,6], [1,2,3]])
    np.cross(x, y)
    np.cross(x, y, axisc=0)
    x = np.array([[1,2,3], [4,5,6], [7, 8, 9]])
    y = np.array([[7, 8, 9], [4,5,6], [1,2,3]])
    np.cross(x, y)
    np.cross(x, y, axisa=0, axisb=0)
    # np.array_repr(np.array([1,2]))
    # np.array_repr(np.ma.array([0.]))
    # np.array_repr(np.array([], np.int32))
    x = np.array([1e-6, 4e-7, 2, 3])
    # np.array_repr(x, precision=6, suppress_small=True)
    # np.array_str(np.arange(3))
    a = np.arange(10)
    x = np.arange(4)
    np.set_string_function(lambda x:'random', repr=False)
    grid = np.indices((2, 3))
    grid.shape
    grid[0]        # row indices
    grid[1]        # column indices
    x = np.arange(20).reshape(5, 4)
    row, col = np.indices((2, 3))
    x[row, col]
    np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
    np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int)
    np.isscalar(3.1)
    np.isscalar([3.1])
    np.isscalar(False)
    # np.binary_repr(3)
    # np.binary_repr(-3)
    # np.binary_repr(3, width=4)
    # np.binary_repr(-3, width=3)
    # np.binary_repr(-3, width=5)
    # np.base_repr(5)
    # np.base_repr(6, 5)
    # np.base_repr(7, base=5, padding=3)
    # np.base_repr(10, base=16)
    # np.base_repr(32, base=16)
    np.identity(3)
    np.allclose([1e10,1e-7], [1.00001e10,1e-8])
    np.allclose([1e10,1e-8], [1.00001e10,1e-9])
    np.allclose([1e10,1e-8], [1.0001e10,1e-9])
    # np.allclose([1.0, np.nan], [1.0, np.nan])
    # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
    np.isclose([1e10,1e-7], [1.00001e10,1e-8])
    np.isclose([1e10,1e-8], [1.00001e10,1e-9])
    np.isclose([1e10,1e-8], [1.0001e10,1e-9])
    # np.isclose([1.0, np.nan], [1.0, np.nan])
    # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
    np.array_equal([1, 2], [1, 2])
    np.array_equal(np.array([1, 2]), np.array([1, 2]))
    np.array_equal([1, 2], [1, 2, 3])
    np.array_equal([1, 2], [1, 4])
    np.array_equiv([1, 2], [1, 2])
    np.array_equiv([1, 2], [1, 3])
    np.array_equiv([1, 2], [[1, 2], [1, 2]])
    np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]])
    np.array_equiv([1, 2], [[1, 2], [1, 3]])
示例#8
0
 def normalize(vec):
     amax = np.sum(vec, axis=1)
     normalized = np.mat(vec) / np.mat(amax).T
     return np.asarray(normalized)