Exemplo n.º 1
0
def nearest_word(vector, vector_lib, index2word, n=5, skip=0, 
                 chunk_size=100000, use_ne=False, use_shortdot=True,
                 thresh=0.0):
    words = []
    if use_ne:
        d = ne.evaluate('sum(vector_lib * vector, axis=1)')
        idx = np.argsort(d)[::-1]
        words   = [index2word[i] for i in idx[:n]]
    elif use_shortdot:
        d = np.zeros(vector_lib.shape[0], dtype='f4')
        shortdot.shortdot(vector_lib, vector, d, 100, thresh)
        idx = np.argsort(d)[::-1]
        words   = [index2word[i] for i in idx[:n]]
    else:
        sims = []
        offset = 0
        for vl in chunks(vector_lib, chunk_size):
            d = similarity(vl, vector)
            da = np.argsort(d)[::-1]
            for idx in da[:n]:
                words.append(index2word[idx + offset])
                sims.append(d[idx])
            offset += chunk_size
        idx = np.argsort(sims)[::-1]
        words = [words[i] for i in idx[:n]]
    vectors = [vector_lib[i] for i in idx[:n] ]
    sim = [d[i] for i in idx[:n]]
    return words, vectors, sim
Exemplo n.º 2
0
def nearest_word(vector,
                 vector_lib,
                 index2word,
                 n=5,
                 skip=0,
                 chunk_size=100000,
                 use_ne=False,
                 use_shortdot=True,
                 thresh=0.0):
    words = []
    if use_ne:
        d = ne.evaluate('sum(vector_lib * vector, axis=1)')
        idx = np.argsort(d)[::-1]
        words = [index2word[i] for i in idx[:n]]
    elif use_shortdot:
        d = np.zeros(vector_lib.shape[0], dtype='f4')
        shortdot.shortdot(vector_lib, vector, d, 100, thresh)
        idx = np.argsort(d)[::-1]
        words = [index2word[i] for i in idx[:n]]
    else:
        sims = []
        offset = 0
        for vl in chunks(vector_lib, chunk_size):
            d = similarity(vl, vector)
            da = np.argsort(d)[::-1]
            for idx in da[:n]:
                words.append(index2word[idx + offset])
                sims.append(d[idx])
            offset += chunk_size
        idx = np.argsort(sims)[::-1]
        words = [words[i] for i in idx[:n]]
    vectors = [vector_lib[i] for i in idx[:n]]
    sim = [d[i] for i in idx[:n]]
    return words, vectors, sim
Exemplo n.º 3
0
    thresh = -1.0
    print "created matrix"
else:
    A = np.load("/nobackupp5/cmoody3/data/ids/trained/vectors.fullwiki.1000.s50.num.npy")
    A = A.astype('f4')
    B = A[A.shape[0]/2]
    C = np.zeros(A.shape[0]).astype('f8')
    thresh = 0.0
    rows = A.shape[0]
    dims = A.shape[1]
C = C.astype('f4')
n = 20
start = time.time()
for i in range(n):
    C = np.zeros(rows).astype('f4')
    skipped = shortdot.shortdot(A, B, C, 50, thresh)
stop = time.time()
frac = skipped * 1.0 / (rows * dims)
print "finished cython, skipped %i, %1.1f%%"  % (skipped, frac * 100.0)
cy = (stop - start) * 1.0 / n * 1e6
print 'cython top 5:', np.sort(C)[-5:], np.argsort(C)[-5:]

start = time.time()
for i in range(n):
    x = np.zeros(rows).astype('f4')
    D = np.dot(A, B)
stop = time.time()
py = (stop - start) * 1.0 / n * 1e6
print 'numpy  top 5:', np.sort(D)[-5:], np.argsort(D)[-5:]
comp = np.where(np.argsort(D)[::-1] != np.argsort(C)[::-1])[0]
print "first inequal indices", comp[:10]
Exemplo n.º 4
0
else:
    A = np.load(
        "/nobackupp5/cmoody3/data/ids/trained/vectors.fullwiki.1000.s50.num.npy"
    )
    A = A.astype('f4')
    B = A[A.shape[0] / 2]
    C = np.zeros(A.shape[0]).astype('f8')
    thresh = 0.0
    rows = A.shape[0]
    dims = A.shape[1]
C = C.astype('f4')
n = 20
start = time.time()
for i in range(n):
    C = np.zeros(rows).astype('f4')
    skipped = shortdot.shortdot(A, B, C, 50, thresh)
stop = time.time()
frac = skipped * 1.0 / (rows * dims)
print "finished cython, skipped %i, %1.1f%%" % (skipped, frac * 100.0)
cy = (stop - start) * 1.0 / n * 1e6
print 'cython top 5:', np.sort(C)[-5:], np.argsort(C)[-5:]

start = time.time()
for i in range(n):
    x = np.zeros(rows).astype('f4')
    D = np.dot(A, B)
stop = time.time()
py = (stop - start) * 1.0 / n * 1e6
print 'numpy  top 5:', np.sort(D)[-5:], np.argsort(D)[-5:]
comp = np.where(np.argsort(D)[::-1] != np.argsort(C)[::-1])[0]
print "first inequal indices", comp[:10]