Пример #1
0
def predict():
    req_json = request.get_json(silent=True)
    text = req_json["text"]
    vector = util.get_vectors(text, wordvectors, stop_words)
    vc = np.array([vector]).reshape(1, -1)
    proba = model.predict_proba(vc)[0]
    cat = model.predict(vc)[0]
    code, label = util.get_result(proba, cat)
    response = {"code": code, "label": label}
    return response
Пример #2
0
import annoy
import random
import PIL.Image, PIL.ImageOps
import numpy
import util

annoy_fn = 'mnist.annoy'
data_fn = 'mnist.pkl.gz'

if not os.path.exists(annoy_fn):
    if not os.path.exists(data_fn):
        print 'downloading'
        urlretrieve('http://deeplearning.net/data/mnist/mnist.pkl.gz', data_fn)

    a = annoy.AnnoyIndex(784, 'euclidean')
    for i, pic in util.get_vectors(data_fn):
        a.add_item(i, pic)

    print 'building'
    a.build(10)
    a.save(annoy_fn)

a = annoy.AnnoyIndex(784, 'euclidean')
a.load(annoy_fn)

pics = 5
nns = 10
img_size = 100
margin = 16

main_image = PIL.Image.new('RGB', (img_size * nns + margin, img_size * pics), 'white')
from util import get_vectors

if len(sys.argv) < 2:
    print __doc__
    raise

fn = sys.argv[1]
search_k = 100000
if len(sys.argv) > 2:
    search_k = int(sys.argv[2])

fn_annoy = fn + '.annoy'
fn_lmdb = fn + '.lmdb'  # stores word <-> id mapping

word, vec = get_vectors(fn).next()
size = len(vec)
env = lmdb.open(fn_lmdb, map_size=int(1e9))

if not os.path.exists(fn_annoy) or not os.path.exists(fn_lmdb):
    i = 0
    a = annoy.AnnoyIndex(size)
    with env.begin(write=True) as txn:
        for word, vec in get_vectors(sys.argv[1]):
            a.add_item(i, vec)
            id = 'i%d' % i
            word = 'w' + word
            txn.put(id, word)
            txn.put(word, id)
            i += 1
            if i % 1000 == 0:
Пример #4
0
from util import get_vectors

if len(sys.argv) < 2:
    print __doc__
    raise

fn = sys.argv[1]
search_k = 100000
if len(sys.argv) > 2:
    search_k = int(sys.argv[2])

fn_annoy = fn + '.annoy'
fn_lmdb = fn + '.lmdb' # stores word <-> id mapping

word, vec = get_vectors(fn).next()
size = len(vec)
env = lmdb.open(fn_lmdb, map_size=int(1e9))

if not os.path.exists(fn_annoy) or not os.path.exists(fn_lmdb):
    i = 0
    a = annoy.AnnoyIndex(size)
    with env.begin(write=True) as txn:
        for word, vec in get_vectors(sys.argv[1]):
            a.add_item(i, vec)
            id = 'i%d' % i
            word = 'w' + word
            txn.put(id, word)
            txn.put(word, id)
            i += 1
            if i % 1000 == 0:
Пример #5
0
fs_synt = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
ts_synt = []
for f in fs_synt:
    print f, '...'
    dataset = get_gaussian(n, f)
    avgs = get_avgs(dataset)
    ts_synt.append(avgs)

fs_real = []
ts_real = []
es_real = []
for fn in sys.argv[1:]:
    print fn, '...'
    es_real.append(fn.split('/')[-1])
    dataset = [numpy.array(x) for item, x in get_vectors(fn, n)]
    f = len(dataset[0])
    avgs = get_avgs(dataset, d=distance.cosine)
    fs_real.append(f)
    ts_real.append(avgs)

print fs_real, ts_real, es_real

def configure_ax():
    fig, ax = plt.subplots()
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.grid(True, 'both')
    ax.set_xlabel('Number of dimensions')
    ax.set_ylabel('Euclidean/cosine distance')
    return fig, ax