def predict(): req_json = request.get_json(silent=True) text = req_json["text"] vector = util.get_vectors(text, wordvectors, stop_words) vc = np.array([vector]).reshape(1, -1) proba = model.predict_proba(vc)[0] cat = model.predict(vc)[0] code, label = util.get_result(proba, cat) response = {"code": code, "label": label} return response
import annoy import random import PIL.Image, PIL.ImageOps import numpy import util annoy_fn = 'mnist.annoy' data_fn = 'mnist.pkl.gz' if not os.path.exists(annoy_fn): if not os.path.exists(data_fn): print 'downloading' urlretrieve('http://deeplearning.net/data/mnist/mnist.pkl.gz', data_fn) a = annoy.AnnoyIndex(784, 'euclidean') for i, pic in util.get_vectors(data_fn): a.add_item(i, pic) print 'building' a.build(10) a.save(annoy_fn) a = annoy.AnnoyIndex(784, 'euclidean') a.load(annoy_fn) pics = 5 nns = 10 img_size = 100 margin = 16 main_image = PIL.Image.new('RGB', (img_size * nns + margin, img_size * pics), 'white')
from util import get_vectors if len(sys.argv) < 2: print __doc__ raise fn = sys.argv[1] search_k = 100000 if len(sys.argv) > 2: search_k = int(sys.argv[2]) fn_annoy = fn + '.annoy' fn_lmdb = fn + '.lmdb' # stores word <-> id mapping word, vec = get_vectors(fn).next() size = len(vec) env = lmdb.open(fn_lmdb, map_size=int(1e9)) if not os.path.exists(fn_annoy) or not os.path.exists(fn_lmdb): i = 0 a = annoy.AnnoyIndex(size) with env.begin(write=True) as txn: for word, vec in get_vectors(sys.argv[1]): a.add_item(i, vec) id = 'i%d' % i word = 'w' + word txn.put(id, word) txn.put(word, id) i += 1 if i % 1000 == 0:
fs_synt = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] ts_synt = [] for f in fs_synt: print f, '...' dataset = get_gaussian(n, f) avgs = get_avgs(dataset) ts_synt.append(avgs) fs_real = [] ts_real = [] es_real = [] for fn in sys.argv[1:]: print fn, '...' es_real.append(fn.split('/')[-1]) dataset = [numpy.array(x) for item, x in get_vectors(fn, n)] f = len(dataset[0]) avgs = get_avgs(dataset, d=distance.cosine) fs_real.append(f) ts_real.append(avgs) print fs_real, ts_real, es_real def configure_ax(): fig, ax = plt.subplots() ax.set_xscale('log') ax.set_yscale('log') ax.grid(True, 'both') ax.set_xlabel('Number of dimensions') ax.set_ylabel('Euclidean/cosine distance') return fig, ax