def main(): script_dir = os.path.dirname(os.path.realpath(__file__)) app.static('', os.path.join(script_dir, 'static', 'index.html')) app.static('/assets', os.path.join(script_dir, 'static', 'assets')) app.static('/images', str(get_data_home() / 'images')) app.static('/thumbnails', str(get_data_home() / 'thumbnails')) app.run(host='0.0.0.0', port=8080)
def get_config_single_host(initialize=False): config = 'sqlite:///' + str(get_data_home() / 'index.db?check_same_thread=False') engine = create_engine(config) session = sessionmaker(bind=engine)() if initialize: Base.metadata.create_all(engine) return session
def Neighborhood(session): index = AnnoyIndex(VEC_SPACE_DIMENSIONS) log('Enrolling vectors in vector index.') query = session.query(Vector.id, Vector.vec).join(Example).filter(Example.type == Type.JPEG) for vec_id, vec in tqdm(query, desc='Vectors enrolled', total=query.count(), unit=' vecs'): vec = np.frombuffer(vec, dtype=np.float32) index.add_item(vec_id, vec) log('Building and saving vector index to disk.') index.build(10) index.save(str(get_data_home() / 'vectors.tree'))
def ImageVectors(session): log('Inferring image vectors of registered images.') model_fn, input_fn, _ = get_img2vec_fns(session, mode=ModeKeys.PREDICT) estimator = Estimator(model_fn, model_dir=str(get_models_home() / 'imgvecs')) ids = sorted([int(i) for i in os.listdir(get_data_home() / 'images')]) for imgid, vec in tqdm(zip(ids, estimator.predict(input_fn)), unit=' vecs', desc='Image vectors', total=len(ids)): row = Vector(id=imgid, vec=vec) session.merge(row) if (imgid % 1800) == 0: session.flush() session.commit()
from sanic import Sanic from sanic.response import json from urllib.parse import unquote from annoy import AnnoyIndex from binah.model import Image, get_config_single_host from binah.util import get_data_home import tensorflow_hub as hub import tensorflow as tf from binah.config import SENT_ENCODER, VEC_SPACE_DIMENSIONS import os app = Sanic(__name__) tf.logging.set_verbosity(tf.logging.ERROR) space = AnnoyIndex(VEC_SPACE_DIMENSIONS) space.load(str(get_data_home() / 'vectors.tree')) graph = tf.Graph() with graph.as_default(): sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0})) sess.as_default() str2vec = hub.Module(SENT_ENCODER) sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) @app.route('/q/<query>') async def text_query(request, query): with graph.as_default(): res = sess.run(str2vec([unquote(query)])) res = space.get_nns_by_vector(res[0], 60) return json(res)
import PIL from binah.util import get_data_home, log from binah.model import Image from pathlib import Path from multiprocessing import cpu_count, Pool THUMBNAIL_SIZE = (360, 203) THUMBNAIL_DIR = get_data_home() / 'thumbnails' if not THUMBNAIL_DIR.is_dir(): Path.mkdir(THUMBNAIL_DIR, parents=True) IMAGES_DIR = get_data_home() / 'images' def thumbnail(imgid): fname = str(imgid) path = str(IMAGES_DIR / fname) with PIL.Image.open(path) as img: img.thumbnail(THUMBNAIL_SIZE) img.save(str(THUMBNAIL_DIR / fname), "JPEG") def Thumbnails(session): files = [imgid for (imgid,) in session.query(Image.id)] log('Creating thumbnails') pool = Pool(cpu_count()) pool.map(thumbnail, files)
def __init__(self, session, has_files=False): self.home = get_data_home() if has_files: self.home = self.home / 'images' self.home.mkdir(parents=True, exist_ok=True) self.session = session
from pytube import Playlist, YouTube as PyYouTube from binah.util import get_data_home from binah.model import Video, Example, Type, Lifecycle, License, Dataset import os PLAYLISTS = ['https://www.youtube.com/watch?v=-yOPQN19c98&list=PL9uNqONsJ8Q9hmgft3ZsDiObSAaoJG6e0'] VIDEO_DIR = get_data_home() / 'videos' VIDEO_DIR.mkdir(parents=True, exist_ok=True) def _create_or_use_youtube_license(session): query = session.query(License.id, License.name).filter(License.name == 'YouTube EULA').first() if query: lic_id = query.id else: row = License(name='YouTube EULA', url='https://www.youtube.com/static?template=terms') session.add(row) session.flush() lic_id = row.id return lic_id def _create_or_use_youtube_dataset(session): query = session.query(Dataset.id, Dataset.name).filter(Dataset.name == 'YouTube').first() if query: dataset_id = query.id else: row = Dataset(name='YouTube', desc="The world's most popular video sharing website.", url='https://www.youtube.com/') session.add(row) session.flush() dataset_id = row.id
def get_img2vec_fns(session, mode=ModeKeys.TRAIN): # Ready the image embedding for fine tuning on the vector img2vec = hub.Module(IMG_ENCODER, trainable=True) _, out_dimensions = img2vec.get_output_info_dict()['default'].get_shape() out_dimensions = int(out_dimensions) width, height = hub.get_expected_image_size(img2vec) images_path = get_data_home() / 'images' def infer_input_fn(): query = session.query(Image.id).order_by(Image.id) def generate_example(): for example, in query: example = str(images_path / str(example)) yield example string_shape = tf.TensorShape([]) dataset = tf.data.Dataset.from_generator(generate_example, tf.string, string_shape) def decode_and_resize(fd): out = tf.read_file(fd) # In TensorFlow, "decode_jpeg" can decode PNG files too. # "decode_image" will not work at all, because it doesn't return the # tensor's shape. out = tf.image.decode_jpeg(out, channels=3, try_recover_truncated=True) out = tf.image.resize_images(out, [height, width], align_corners=True) return {'x': out} dataset = dataset.map(decode_and_resize) dataset = dataset.batch(1) features = dataset.make_one_shot_iterator().get_next() return features def train_input_fn(): sql = text( 'select captions.image_id, vectors.vec from captions inner join vectors on captions.text_id=vectors.id' ) query = session.query(Caption.image_id, Vector.vec).from_statement(sql) def generate_example_pair(): for example, vec in query: example = str(images_path / str(example)) vec = np.frombuffer(vec, dtype=np.float32) yield example, vec string_shape = tf.TensorShape([]) vector_shape = tf.TensorShape(tf.Dimension(VEC_SPACE_DIMENSIONS)) dataset = tf.data.Dataset.from_generator(generate_example_pair, (tf.string, tf.float32), (string_shape, vector_shape)) def decode_and_resize(fd, vecs): out = tf.read_file(fd) # In TensorFlow, "decode_jpeg" can decode PNG files too. # "decode_image" will not work at all, because it doesn't return the # tensor's shape. out = tf.image.decode_jpeg(out, channels=3, try_recover_truncated=True) out = tf.image.resize_images(out, [height, width], align_corners=True) return {'x': out}, vecs dataset = dataset.map(decode_and_resize) dataset = dataset.batch(IMG_BATCH_SIZE) dataset = dataset.repeat(IMG_EPOCHS) features, labels = dataset.make_one_shot_iterator().get_next() return features, labels def model_fn(features, labels, mode): tf.logging.set_verbosity(tf.logging.WARN) model = hub.Module(IMG_ENCODER, trainable=True) tf.logging.set_verbosity(tf.logging.INFO) model = model(features['x']) regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) output = tf.layers.dense(model, VEC_SPACE_DIMENSIONS, activation=tf.nn.relu) output = tf.layers.dense(model, VEC_SPACE_DIMENSIONS, activation=tf.nn.relu) output = tf.layers.dense(model, VEC_SPACE_DIMENSIONS, activation=tf.nn.tanh) if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL: loss = mean_squared_error(labels, output) regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = loss + 0.25 * sum(regularizer) if mode == ModeKeys.TRAIN: train_op = AdamOptimizer(learning_rate=0.00001).minimize( loss=loss, global_step=get_global_step()) return EstimatorSpec(mode=mode, loss=loss, train_op=train_op) elif mode == ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.mean_cosine_distance(labels, output, 0) } return EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) elif mode == ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=output) def reciever_fn(): feature_spec = {'image': tf.FixedLenFeature([], dtype=tf.string)} tfexample = tf.placeholder(dtype=tf.string, name='input_image_tensor', shape=[]) received_tensors = {'image': tfexample} parsed_example = tf.parse_example([tfexample], feature_spec) out = tf.image.decode_jpeg(parsed_example['image'], channels=3, try_recover_truncated=True) out = tf.image.resize_images(out, [height, width], align_corners=True) out = {'x': out} return tf.estimator.export.ServingInputReceiver(out, received_tensors) if mode == ModeKeys.TRAIN: return model_fn, train_input_fn, reciever_fn else: return model_fn, infer_input_fn, reciever_fn
import cv2 import PIL from pathlib import Path from binah.util import get_data_home, log from binah.model import Video, Image, Type, Lifecycle, Example import piexif import ujson as json THRESHOLD = 0.1 VIDEO_DIR = get_data_home() / 'videos' IMAGES_DIR = get_data_home() / 'images' def _preprocess_frame(frame): """ Preprocess frame to be more invariant to noise and other irrelevant differences. """ frame = cv2.resize(frame, None, fx=0.2, fy=0.2, interpolation=cv2.INTER_AREA) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = cv2.GaussianBlur(frame, (9, 9), 0.0) return frame def _emit_keyframes(path): last_frame = [] frame_idx = 0