def get_model(latest_checkpoint, strategy, model_source, model_name, image_size,
              trainable_layer, fc_size, category_labels, loss_function, tfa_metrics):
    with strategy.scope():
        if model_source == 'keras':
            include_top = False

            if model_name == 'VGG16':
                base_model = tf.keras.applications.VGG16(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('VGG16')
            elif model_name == 'VGG19':
                base_model = tf.keras.applications.VGG19(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('VGG19')
            elif model_name == 'MobileNetV2':
                base_model = tf.keras.applications.MobileNetV2(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('MobileNetV2')
            elif model_name == 'InceptionV3':
                base_model = tf.keras.applications.InceptionV3(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('InceptionV3')
            elif model_name == 'Xception':
                base_model = tf.keras.applications.Xception(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('Xception')
            elif model_name == 'ResNet50':
                base_model = tf.keras.applications.ResNet50(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('ResNet50')
            elif model_name == 'DenseNet201':
                base_model = tf.keras.applications.densenet.DenseNet201(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('DenseNet201')
            elif model_name == 'NASNetLarge':
                base_model = tf.keras.applications.nasnet.NASNetLarge(
                    input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet')
                print('NASNetLarge')
            else:
                print("Wrong model name")
                exit()

            if trainable_layer > 0:
                for layer in base_model.layers[:-trainable_layer]:
                    layer.trainable = False
            else:
                for layer in base_model.layers:
                    layer.trainable = False

        elif model_source == 'tfhub':
            if model_name == 'MobileNetV2':
                base_model_folder = '/mnt/AI/tfhub/7d894117f08a295a627d24c65df048e34e7ac7d4/'
                print('MobileNetV2')
            elif model_name == 'InceptionV3':
                base_model_folder = '/mnt/AI/tfhub/3f675e18714cfa891d083a31557195a0508e560d/'
                print('InceptionV3')
            elif model_name == 'ResNet50':
                base_model_folder = '/mnt/AI/tfhub/5e690529696a1ca5ff36a5e9c7f7255180ef2364/'
                print('ResNet50')
            elif model_name == 'NASNetLarge':
                base_model_folder = '/mnt/AI/tfhub/c57f54b3f7d0ff4ab1eba180075fb0afe4101034/'
                print('NASNetLarge')
            else:
                print("Wrong model name")
                exit()

            if trainable_layer == 0:
                base_model_trainable = False
            else:
                base_model_trainable = True

            base_model = tf.keras.Sequential([
                hub.KerasLayer(hub.load(base_model_folder),
                               trainable=base_model_trainable,
                               input_shape=(image_size, image_size, 3))
            ])

        else:
            print("Wrong model source")
            exit()

        x = base_model.output
        if model_source == 'keras':
            x = tf.keras.layers.GlobalAveragePooling2D()(x)
        if fc_size > 0:
            x = tf.keras.layers.Dense(fc_size, activation='relu')(x)
            x = tf.keras.layers.Dropout(0.25)(x)
        predictions = tf.keras.layers.Dense(
            len(category_labels), activation=tf.nn.softmax, name='predictions')(x)
        model_created = tf.keras.Model(inputs=base_model.input, outputs=predictions)

        # model_created.summary()

        # for layer in model_created.layers:
        #     print(layer, layer.trainable)

        if loss_function == 'fl':
            loss = tfa.losses.SigmoidFocalCrossEntropy()

        if loss_function == 'ce' or loss_function == 'wce':
            loss = tf.keras.losses.CategoricalCrossentropy()
            if 2 == len(category_labels):
                loss = tf.keras.losses.BinaryCrossentropy()

        if loss_function == 'ck':
            loss = CohenKappaLoss(len(category_labels))

        metrics = [tf.keras.metrics.CategoricalAccuracy()]
        if 2 == len(category_labels):
            metrics = [tf.keras.metrics.BinaryAccuracy()]

        metrics += [tf.keras.metrics.AUC(name='auc'),
                    tf.keras.metrics.Precision(name='precision'),
                    tf.keras.metrics.Recall(name='recall'),
                    # tf.keras.metrics.TruePositives(name='true_positives'),
                    # tf.keras.metrics.FalsePositives(name='false_positives'),
                    # tf.keras.metrics.TrueNegatives(name='true_negatives'),
                    # tf.keras.metrics.FalseNegatives(name='false_negatives'),
                    # tfa.metrics.CohenKappa(num_classes=len(category_labels)),
                    tfa.metrics.F1Score(num_classes=len(category_labels)),
                    tfa.metrics.FBetaScore(num_classes=len(category_labels))]

        if tfa_metrics != 0:
            metrics += [tfa.metrics.CohenKappa(num_classes=len(category_labels))]

        model_created.compile(optimizer=tf.keras.optimizers.Adam(), loss=loss, metrics=metrics)
        # model_created.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=loss, metrics=metrics)

        if latest_checkpoint:
            model_created.load_weights(latest_checkpoint)

        return model_created
Exemplo n.º 2
0
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from PIL import Image

img = Image.open('test.jpg')
np_array = np.array(img)
tf_img = tf.convert_to_tensor(np_array, dtype=tf.float32)
tf_img = tf_img[tf.newaxis, ...]

localizer = hub.load("https://tfhub.dev/google/object_detection/mobile_object_localizer_v1/1")

print(localizer.signatures['default'](tf_img))
"""More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/)."""

#@title Load the Universal Sentence Encoder's TF Hub module
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns

module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)

#@title Compute a representation for each message, showing various lengths supported.
word = "Elephant"
sentence = "I am a sentence for which I would like to get its embedding."
paragraph = (
    "Universal Sentence Encoder embeddings also support short paragraphs. "
    "There is no hard limit on how long the paragraph is. Roughly, the longer "
    "the more 'diluted' the embedding will be.")
messages = [word, sentence, paragraph]

# Reduce logging output.
logging.set_verbosity(logging.ERROR)
Exemplo n.º 4
0
import tensorflow_hub as hub
import numpy as np
import tensorflow_text
import re

# Load model from tensorflow hub
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/2")
# Load model from local
# embed = hub.KerasLayer('./model/')

def text_preprocess(text,lang):
    if lang=='en':
        # TEXT CLENAING
        TEXT_CLEANING_RE = "[^A-Za-z0-9]"
        # Remove link,user and special characters
        ptext = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower())
        # Remove extra whitespace
        ptext = re.sub('[\s]{2,}',' ',ptext).strip()
        return ptext
    elif lang=='zh':
        # TEXT CLENAING
        TEXT_CLEANING_RE = "[^\u4E00-\u9FFF0-9]"
        # Remove link,user and special characters
        ptext = re.sub(TEXT_CLEANING_RE, ' ', text)
        # Remove extra whitespace
        ptext = re.sub('[\s]{2,}',' ',ptext).strip()
        return ptext
    else:
        return text
        
def evaluate(sentences1,sentences2,trans_type):
    print(lang)
    print("  train: " + str(len(train_dataset_by_lang[lang])))
    print("  dev: " + str(len(dev_dataset_by_lang[lang])))

train_set = [
    data for data_by_lang in train_dataset_by_lang.values()
    for data in data_by_lang
]
dev_set = [
    data for data_by_lang in dev_dataset_by_lang.values()
    for data in data_by_lang
]

print(len(train_set), len(dev_set))

preprocessor = hub.load(
    "https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/2")
tokenize = hub.KerasLayer(preprocessor.tokenize)
bert_pack_inputs = hub.KerasLayer(preprocessor.bert_pack_inputs,
                                  arguments=dict(seq_length=128))
single_bert_input = hub.KerasLayer(preprocessor)


def take_first(item):
    return {
        "input_mask": item["input_mask"][0],
        "input_type_ids": item["input_type_ids"][0],
        "input_word_ids": item["input_word_ids"][0],
    }


@tf.function(input_signature=[
Exemplo n.º 6
0
    return filtered_dic


def run_detector(detector, path):
    img = load_img(path)

    converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
    start_time = time.time()
    result = detector(converted_img)
    end_time = time.time()

    result = {key: value.numpy() for key, value in result.items()}
    filtered_dic = create_filtered_dic(filter_result(result, 0.3, 10), result)

    print("Found %d objects." % len(result["detection_scores"]))
    print("Inference time: ", end_time - start_time)

    image_with_boxes = draw_boxes(
        img.numpy(), filtered_dic["detection_boxes"],
        filtered_dic["detection_class_entities"], filtered_dic["detection_scores"])

    display_image(image_with_boxes)

    crop_out_boxes(img.numpy(), filtered_dic["detection_boxes"])


module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"

detector = hub.load(module_handle).signatures['default']
run_detector(detector, "img_19.jpg")
Exemplo n.º 7
0
import tensorflow_hub as hub
import cv2
import numpy as np
import tensorflow as tf

detector = hub.load("/home/grandpadzb/tfhub_modules/ssd_mobilenet_v2_2")
print("Complete loading")

camera = cv2.VideoCapture(0)

while (cv2.waitKey(1) != 113):
    _, src = camera.read()
    # src = cv2.imread("/home/grandpadzb/MathscriptsLib/selfDifineNetwork/party.jpg")
    src = cv2.resize(src, dsize=(320, 320))
    src = src[np.newaxis, :]
    img = tf.convert_to_tensor(src, dtype="uint8")

    # ==========================
    output = detector(img)
    figure_num = 0
    for i in range(int(output["num_detections"].numpy()[0])):
        class_index = output["detection_classes"].numpy()[0][i]
        if class_index == 1.0:
            box = np.fix(output["detection_boxes"].numpy()[0][i] * 320)
            cv2.rectangle(src[0, :], (box[1], box[0]), (box[3], box[2]),
                          (255, 255, 255), 2)
            figure_num += 1
        if figure_num >= 1:
            break
    cv2.imshow("result", src[0, :])
    src = src[0, :]
Exemplo n.º 8
0
 def __init__(self, model: EmbeddingModel):
     self.model = hub.load(model.model_name)
Exemplo n.º 9
0
# %decor_header Fast Examples Of Neural Networks

get_file('01_00_002.png', 'paintings/')
content_image = load_img('01_00_002.png')
x=tf.keras.applications.vgg19\
.preprocess_input(content_image*255)
x = tf.image.resize(x, (224, 224))
vgg19=tf.keras.applications\
.VGG19(include_top=True,weights='imagenet')
prediction_probabilities = vgg19(x)
predicted_top5=tf.keras.applications.vgg19\
.decode_predictions(prediction_probabilities.numpy())[0]
[print([class_name, prob]) for (number, class_name, prob) in predicted_top5]
tensor_to_image(content_image)

hub_module = th.load(tfhub_path)
get_file('00_03_002.png', 'paintings/')
get_file('00_00_001.png', 'patterns/')
content_image = load_img('00_03_002.png')
style_image = load_img('00_00_001.png')
stylized_image = hub_module(tf.constant(content_image),
                            tf.constant(style_image))[0]
tensor_to_image(stylized_image)

# Commented out IPython magic to ensure Python compatibility.
# %decor_header Some Variants Of Keras Models


def mlp_model(img_size, num_classes):
    model = tf.keras.models.Sequential([
        tkl.Flatten(input_shape=(img_size, img_size, 3)),
Exemplo n.º 10
0
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, float):
            return str(obj)
        elif isinstance(obj, PIL.Image.Image):
            return str(obj.size)
        return json.JSONEncoder.default(self, obj)

ssd = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1")

model_yn1 = tf.keras.models.load_model('yn1_model.h5')
model_eye = tf.keras.models.load_model('eye_crop_model.h5')

app = Flask(__name__)

def sharpness(img):
    im = img.convert('L') # to grayscale
    array = np.asarray(im, dtype=np.int32)

    gy, gx = np.gradient(array)
    gnorm = np.sqrt(gx**2 + gy**2)
    return np.average(gnorm)

def bird_eye(oc):
Exemplo n.º 11
0
  if category not in categories:
    categories[category] = []
  categories[category].append(video)
print("Found %d videos in %d categories." % (len(ucf_videos), len(categories)))

for category, sequences in categories.items():
  summary = ", ".join(sequences[:2])
  print("%-20s %4d videos (%s, ...)" % (category, len(sequences), summary))

# Get a sample cricket video.
video_path = fetch_ucf_video("v_CricketShot_g04_c02.avi")
sample_video = load_video(video_path)

sample_video.shape

i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

def predict(sample_video):
  # Add a batch axis to the to the sample video.
  model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]

  logits = i3d(model_input)['default'][0]
  probabilities = tf.nn.softmax(logits)

  print("Top 5 actions:")
  for i in np.argsort(probabilities)[::-1][:5]:
    print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")

predict(sample_video)

## https://commons.wikimedia.org/wiki/Category
Exemplo n.º 12
0
async def setup_model():
    model = hub.load(model_file_url)
    return model
Exemplo n.º 13
0
def load_model():
    #model = tf.keras.applications.MobileNetV2(weights="imagenet")
    model = hub.load(
        'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'
    )
    return model
Exemplo n.º 14
0
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pyautogui
import win32api, win32con, win32gui
import cv2
import math
import time

detector = hub.load(
    "https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1")
size_scale = 3

while True:
    # Get rect of Window
    hwnd = win32gui.FindWindow(None, 'Counter-Strike: Global Offensive')
    #hwnd = win32gui.FindWindow("UnrealWindow", None) # Fortnite
    rect = win32gui.GetWindowRect(hwnd)
    region = rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]

    # Get image of screen
    ori_img = np.array(pyautogui.screenshot(region=region))
    ori_img = cv2.resize(
        ori_img,
        (ori_img.shape[1] // size_scale, ori_img.shape[0] // size_scale))
    image = np.expand_dims(ori_img, 0)
    img_w, img_h = image.shape[2], image.shape[1]

    # Detection
    result = detector(image)
    result = {key: value.numpy() for key, value in result.items()}
Exemplo n.º 15
0
# Print Tensorflow version
log.info("TensorFlow version: %s", tf.__version__)

# Check available GPU devices.
log.info("The following GPU devices are available: %s" %
         tf.test.gpu_device_name())

# Object detection module
module_env = os.environ.get("MODULE")
log.info("MODULE env: %s", module_env)
module_path = "/model_faster_rcnn" if module_env == "FASTER_RCNN" else "/model_ssd"

log.info("Loading module_env from: %s", module_path)

start_time = time.time()
tf_hub_module = hub.load(module_path).signatures["default"]
end_time = time.time()

log.info("Loading module time: %.2f", end_time - start_time)

object_detector = ObjectDetectorFromBase64(tf_hub_module)


@app.get("/healthcheck")
def healthcheck():
    return "OK"


@app.post("/predict")
def detect_objects_base64(results: dict = Depends(object_detector)):
    return results
Exemplo n.º 16
0
        senta = row[0].strip()
        sentb = row[1].strip()
        senta = re.sub(pattern, '', senta)
        sentb = re.sub(pattern, '', sentb)
        veca = embed(senta)["outputs"]
        vecb = embed(sentb)["outputs"]
        score = np.inner(veca, vecb)[0]
        if str(row[-1]) == '1':
            yes_thresholds.append(score)
        else:
            no_thresholds.append(score)

    if len(yes_thresholds) == 0 or len(no_thresholds) == 0:
        print("thresholds == []")
        return

    print('yes_thresholds = avg:{} max:{} min:{}'.format(
        sum(yes_thresholds) / len(yes_thresholds), max(yes_thresholds),
        min(yes_thresholds)))
    print('no_thresholds = avg:{} max:{} min:{}'.format(
        sum(no_thresholds) / len(no_thresholds), max(no_thresholds),
        min(no_thresholds)))


if __name__ == "__main__":
    pattern = r'、|《|》|~|`|!|@|#|¥|%|…|&|(|)|;|;|×|—|-|=|\(|\)|>|<|\\|/|_|。|,|"|”|【|】|\[|\]|{|}|'
    pattern += r'正常|等|关于|设计|竞争性系统|项目|公告|.标|工标|单一|工程|合同|来源|失败|公告|流标|废标|终止|暂停|中止|更改|变更|更正|补遗|补充|澄清|延期|交易|结果|公示|成交|中选|中标|比选|比价|竞标|竞价|限制价|控制价|控价|限价|询价|询比|预公告|预公示|预审|抽签|选人|采购|邀请|需求|招标|磋商|谈判|竞争性磋商'
    embed = hub.load("./universal-sentence-encoder-multilingual_2")
    # get_new_annotated_data()
    main()
Exemplo n.º 17
0
# -*- coding: utf-8 -*-
"""
https://towardsdatascience.com/use-cases-of-googles-universal-sentence-encoder-in-production-dd5aaab4fc15
Created on Sun Dec  1 14:26:45 2019

@author: Amaan
"""


import tensorflow_hub as hub
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 
#embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/3")
embed = hub.load("model/tf2/universal-sentence-encoder-v3")




# Sentence Similarity
messages = [
    "EPS estimates for APPL is expected to increase by 20 bps",
    "ROI estimates for APPL is expected to increase by 9%",
    "ROI estimates for GOOG is expected to increase by 9%",
    "ROI estimates for GOOG is expected to increase by 2%"

]

encoding_matrix = embed(messages)["outputs"]
corr = np.inner(encoding_matrix, encoding_matrix)
Exemplo n.º 18
0
import io
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

app = FastAPI(root_path="/api/")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_headers=["*"],
    allow_methods=["*"],
)

# loading hub module
hub_module = hub.load(
    "https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/1")


def tensor_to_image(tensor):
    tensor = tensor * 255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor) > 3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return Image.fromarray(tensor)


def load_img(path_to_img):
    max_dim = 512
    img = tf.io.read_file(path_to_img)
    img = tf.image.decode_image(img, channels=3)
Exemplo n.º 19
0
def get_use_vectors(source_texts: List, target_texts: List, model_path: str):
    use_embeddings = hub.load(model_path)
    source_vecs = use_embeddings(source_texts)['outputs'].numpy()
    target_vecs = use_embeddings(target_texts)['outputs'].numpy()
    return source_vecs, target_vecs
Exemplo n.º 20
0
from sklearn.externals import joblib

knn = joblib.load('/home/thanaphat_phetkrow/API/knn.pkl')
from sklearn.decomposition import PCA

pca = joblib.load('/home/thanaphat_phetkrow/API/pca.pkl')
from sklearn.preprocessing import StandardScaler

scaler = joblib.load('/home/thanaphat_phetkrow/API/scaler.pkl')

import tensorflow_hub as hub
import numpy as np
import tensorflow_text

pre = pretext.pretextprocessing()
embed = hub.load("/home/thanaphat_phetkrow/API/model3")

import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
from firebase_admin import storage

cred = credentials.Certificate(
    '/home/thanaphat_phetkrow/API/serviceAccountKey.json')
firebase_admin.initialize_app(cred, {'storageBucket': 'fir-c1ec0.appspot.com'})
db = firestore.client()
bucket = storage.bucket()
batch = db.batch()
increment = firestore.Increment(1)

from linebot import LineBotApi

# ### 6.2. USE Embeddings

# In[ ]:


# 1. Loading USE vector representation of all questions in the dataset
use_embeddings = np.load('use_embeddings.npy')


# In[ ]:


# 2. Laoding pretrained USE model
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")


# In[ ]:


# 3. A function to create vector representation of query
def USE_sentence_vec(query):
  clean_query = text_preprocessor([query],  stop_word = False, remove_digits = False)
  use_out = use_model(clean_query)

  return use_out


# In[ ]:
Exemplo n.º 22
0
from flask import Flask
import numpy as np
from tensorflow import keras
import tensorflow_hub as hub
import tensorflow as tf
from tensorflow.python.keras import backend as K

model = keras.models.load_model('/app/next_word_predictor')
vocab_arr = np.load('/app/vocab_arr.npy')
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

app = Flask(__name__)


@app.route('/')
def hello_world():
    return 'hello!'


@app.route('/getNextWord/<sent>')
def getNextWord(sent):
    return vocab_arr[np.argmax(model.predict(embed([sent]).numpy())[-1])]


# if __name__ == '__main__':
#   app.run(host='0.0.0.0', port=80, debug=False)
Exemplo n.º 23
0
def create_data(se_path,
                excluded_ids_path,
                target_folder,
                n_train_queries,
                n_dev_queries,
                n_dev_queries_max_percentage,
                n_max_questions,
                pool_size,
                pooling,
                gpu=False):

    if excluded_ids_path and os.path.exists(excluded_ids_path):
        with open(excluded_ids_path, 'r') as f:
            excluded_q_ids = set([l.strip() for l in f])
    else:
        logger.info(
            'Either no excluded ids path given, or path does not exist! {}'.
            format(excluded_ids_path))
        excluded_q_ids = set()

    questiondict, qids = read_questions(se_path, excluded_ids=excluded_q_ids)
    qids = set(qids)

    accepted_answer_ids = set(
        [q['ANSWER'] for q in questiondict.values() if q['ANSWER']])
    answerdict, aids = read_answers(se_path, accepted_answer_ids)
    duplicatesdict = read_duplicates(se_path, qids)

    if not path.exists(target_folder):
        os.makedirs(target_folder, exist_ok=True)

    logger.info('len(excluded_ids)={}'.format(len(excluded_q_ids)))
    logger.info('Now validating that no excluded ids were returned')
    for qid in qids:
        assert qid not in excluded_q_ids
    logger.info('[ok] did not include any excluded ids')

    # the number of included questions in the dataset. this can be more than the number of queries so that we have
    # more variation for random sampling while not including too many queries (for large forums)
    #
    # we make sure to prefer ones with correct answers and duplicates before adding ones without
    q_with_duplicates = set(duplicatesdict.keys())
    q_with_only_a = set([k for (k, q) in questiondict.items() if q['ANSWER']
                         ]) - q_with_duplicates
    rest = list(qids - (q_with_duplicates | q_with_only_a))

    sampled_qids = list(q_with_duplicates)
    if len(sampled_qids) < n_max_questions:
        q_with_only_a = list(q_with_only_a)
        random.shuffle(q_with_only_a)
        sampled_qids += q_with_only_a[:n_max_questions - len(sampled_qids)]

        if len(sampled_qids) < n_max_questions:
            random.shuffle(rest)
            sampled_qids += rest[:n_max_questions - len(sampled_qids)]
    else:
        sampled_qids = random.sample(sampled_qids, n_max_questions)

    logger.info('N sampled_qids = {}'.format(len(sampled_qids)))

    # adjust number of dev queries, if needed
    n_dev_queries = min(int(len(sampled_qids) * n_dev_queries_max_percentage),
                        n_dev_queries)
    logger.info(
        'n_dev_queries_max_percentage={}'.format(n_dev_queries_max_percentage))
    logger.info('n_questions(all)={}'.format(len(qids)))
    logger.info('n_questions(sampled)={}'.format(len(sampled_qids)))
    logger.info('n_train_queries={}'.format(n_train_queries))
    logger.info('n_dev_queries={}'.format(n_dev_queries))

    n_sample_train_dev = n_train_queries + n_dev_queries
    if len(qids) < n_sample_train_dev:
        logger.info(
            'Number of questions in SE dump less than train+dev (={})'.format(
                n_sample_train_dev))
        n_sample_train_dev = len(qids)

    sampled_qids_train_dev = random.sample(sampled_qids, n_sample_train_dev)
    sampled_qids_train = sampled_qids_train_dev[:-n_dev_queries]
    sampled_qids_dev = sampled_qids_train_dev[-n_dev_queries:]

    logger.info('N sampled_qids_train = {}'.format(len(sampled_qids_train)))
    logger.info('N sampled_qids_dev = {}'.format(len(sampled_qids_dev)))

    with gzip.open(target_folder + "/questions.tsv.gz", 'wt',
                   encoding='utf-8') as f:
        for qid in sampled_qids:
            title = _clean_text(questiondict[qid]['TITLE'])
            body = _clean_text(questiondict[qid]['BODY'])

            answer = ''
            answer_id = questiondict[qid]['ANSWER']
            if answer_id and answer_id in answerdict:
                answer = _clean_text(answerdict[answer_id]['BODY'])

            duplicates = ','.join([d for d in duplicatesdict[qid]])

            f.write('{}\t{}\t{}\t{}\t{}\n'.format(qid, title, body, answer,
                                                  duplicates))

    with gzip.open(target_folder + "/train.tsv.gz", 'wt',
                   encoding='utf-8') as f:
        for qid in sampled_qids_train:
            f.write('{}\n'.format(qid))

    with gzip.open(target_folder + "/dev.tsv.gz", 'wt', encoding='utf-8') as f:
        if pooling != "none":
            # pooling with USE sentence embeddings over question titles and similarity search
            logger.info(
                'Building FAISS index with sentence embeddings of Q titles')
            if pooling != 'use':
                raise Exception('Unknown pooling method "{}"'.format(pooling))

            module = hub.load(
                'https://tfhub.dev/google/universal-sentence-encoder-qa/3')
            dim = 512

            titles = [questiondict[qid]['TITLE'] for qid in sampled_qids]
            titles = [t.lower() for t in titles]

            logger.info('Computing all embeddings...')
            embeddings = np.empty((0, dim)).astype('float32')
            for i in tqdm(range(0, len(sampled_qids), 128)):
                e = module.signatures['question_encoder'](tf.constant(
                    titles[i:i + 128]))['outputs'].numpy()
                embeddings = np.vstack((embeddings, e))

            logger.info('Normalizing embeddings...')
            embeddings = embeddings / LA.norm(embeddings, axis=0)
            # normalize embeddings so that IP-index does cosine similarity

            logger.info('Adding embeddings to FAISS index...')
            logger.info('embeddings shape: {}'.format(embeddings.shape))
            index = faiss.IndexFlatIP(512)
            index.add(embeddings)
            if gpu:
                res = faiss.StandardGpuResources()  # use a single GPU
                index = faiss.index_cpu_to_gpu(res, 0, index)

            logger.info('Querying FAISS...')
            for i, qid in tqdm(enumerate(sampled_qids_dev)):
                embedding = index.reconstruct(sampled_qids.index(qid))
                _, similar_items = index.search(np.reshape(embedding, [1, -1]),
                                                pool_size)
                similar_items_qids = [
                    sampled_qids[j] for j in similar_items[0]
                ]
                neg = ' '.join(similar_items_qids)
                f.write('{}\t{}\n'.format(qid, neg))

                # print some examples
                if i < 3:
                    logger.info('Query: {}'.format(questiondict[qid]['TITLE']))
                    for qid_similar in similar_items_qids[:3]:
                        logger.info('=>: {}'.format(
                            questiondict[qid_similar]['TITLE']))
                    logger.info('-' * 10)
        else:
            # pooling with random sampling
            for qid in sampled_qids_dev:
                neg = ' '.join([
                    str(i)
                    for i in random.sample(sampled_qids_train_dev, pool_size)
                ])
                f.write('{}\t{}\n'.format(qid, neg))
Exemplo n.º 24
0
def get_elmo():
    module_url = "https://tfhub.dev/google/elmo/2"
    return hub.load(module_url)
Exemplo n.º 25
0
import os
import tensorflow as tf
# Load compressed models from tensorflow_hub
os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED'

import numpy as np
import PIL.Image
from uuid import uuid4
import time
import functools
import tensorflow_hub as hub
from uuid import uuid4
import shutil
import requests

hub_model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')

def tensor_to_image(tensor):
  tensor = tensor*255
  tensor = np.array(tensor, dtype=np.uint8)
  if np.ndim(tensor)>3:
    assert tensor.shape[0] == 1
    tensor = tensor[0]
  return PIL.Image.fromarray(tensor)

def save_url_to_database(img_url):
  
  url = 'https://go-deployment.herokuapp.com/img_url'
  x = requests.post(url, json={"firebase_url": img_url})

  if x.status_code == 200:
Exemplo n.º 26
0
 def __init__(self, model_url: str = 'https://tfhub.dev/google/yamnet/1'):
     self.model_url = model_url
     self.model = hub.load(self.model_url)
     self.model_name = self.model_url.replace('https://tfhub.dev/google/',
                                              '').replace('/', '_')
     self.vector_length = 1024
Exemplo n.º 27
0
def main():
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger = logging.getLogger(__name__)
    args = get_args()
    print_args(args)
    initialization.init_output_dir(args)
    initialization.save_args(args)
    task = get_task(args.task_name, args.data_dir)

    # load model
    print("loading Universal Sentence Encoder......")
    USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

    # prepare dataset
    label_list = task.get_labels()
    label_map = {v: i for i, v in enumerate(label_list)}

    print("loading raw data ... ")
    train_examples = task.get_train_examples()
    val_examples = task.get_dev_examples()
    test_examples = task.get_test_examples()

    print("converting to data loader ... ")
    train_loader = get_dataloader(train_examples, label_map,
                                  args.train_batch_size)
    val_loader = get_dataloader(val_examples, label_map, args.eval_batch_size)
    test_loader = get_dataloader(test_examples, label_map,
                                 args.test_batch_size)

    # run embedding for train set
    print("Run embedding for train set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=train_loader, model=USE, args=args, mode='train')

    print("Run embedding for dev set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=val_loader, model=USE, args=args, mode='dev')

    print("Run embedding for test set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=test_loader, model=USE, args=args, mode='test')

    # HACK FOR MNLI mis-matched
    if args.task_name == 'mnli':
        print("Run Embedding for MNLI Mis-Matched Datasets")
        print("loading raw data ... ")
        mm_val_example = MnliMismatchedProcessor().get_dev_examples(
            args.data_dir)
        mm_test_examples = MnliMismatchedProcessor().get_test_examples(
            args.data_dir)
        print("converting to data loader ... ")
        mm_val_loader = get_dataloader(mm_val_example, label_map,
                                       args.eval_batch_size)
        mm_test_loader = get_dataloader(mm_test_examples, label_map,
                                        args.test_batch_size)

        print("Run embedding for mm_dev set")
        for _ in trange(1, desc="Epoch"):
            run_encoding(loader=mm_val_loader,
                         model=USE,
                         args=args,
                         mode='mm_dev')

        print("Run embedding for test set")
        for _ in trange(1, desc="Epoch"):
            run_encoding(loader=mm_test_loader,
                         model=USE,
                         args=args,
                         mode='mm_test')
Exemplo n.º 28
0
def preprocess(path_interactions: str = "interactions.csv",
               path_items: str = "items.csv",
               path_serialised: str = ".",
               embeds_use_url: str = EMBEDS_USE_URL,
               embeds_mf_dim: int = 8):
    """
    Read data, fit the data, convert title to USE & MF embeddings and
    serialise these embeddings.

    Args:
        path_interactions (str, optional): Path to interactions file.
            Defaults to "interactions.csv".
        path_items (str, optional): Path to items file.
            Defaults to "items.csv".
        path_serialised (str, optional): Directory to save the serialised data.
            Defaults to ".".
        embeds_use_url (str, optional): The URL of the USE encoder model to use
            from TF Hub. Defaults to EMBEDS_USE_URL (version 4).
        embeds_mf_dim (int, optional): The no. of dimensions of the MF embedding.
            Defaults to 8.

    Raises:
        FileNotFoundError: If "path_interactions" is invalid.
        FileNotFoundError: If "path_serialised" is invalid.
        ColumnNotFoundError: If "user", "item", and "interaction"
            columns are not found in the interactions file
        ColumnNotFoundError: If "title" column not found in the
            items file
    """
    # Check file paths
    path_interactions = Path(path_interactions)
    path_items = Path(path_items)
    path_serialised = Path(path_serialised)
    if not path_interactions.exists():
        raise FileNotFoundError("Specify a file for interactions")
    if not path_items.exists():
        raise FileNotFoundError("Specify a file for items")
    if not path_serialised.exists():
        path_serialised.mkdir()

    # Read data
    df_intxn = pd.read_csv(path_interactions)
    df_items = pd.read_csv(path_items, index_col="id")
    if set(df_intxn.columns) - set(["interaction", "item", "user"]):
        raise ColumnNotFoundError(
            "These columns must be present in interactions: "
            f"{str(['interaction', 'item', 'user'])}")
    if "title" not in df_items.columns:
        raise ColumnNotFoundError("`title` must be present in items")

    # Aggregate interactions data
    df_intxn = df_intxn.groupby(["user", "item"]).sum().reset_index()

    # Format to usable data
    # Titles are batched to encoder the titles
    # CSR matrix is a sparse matrix that is used in implicit
    titles = df_items["title"].tolist()
    batched_titles = batch(titles)
    mat = csr_matrix(
        (df_intxn["interaction"], (df_intxn["item"], df_intxn["user"])))

    # USE model
    model_use = hub.load(embeds_use_url)

    # MF Model
    model_mf = implicit.als.AlternatingLeastSquares(factors=embeds_mf_dim)
    model_mf.fit(mat)

    # Title embeddings encoded using USE & MF respectively
    embeds_use = [model_use(batched).numpy() for batched in batched_titles]
    embeds_use = np.vstack(embeds_use)
    embeds_mf = model_mf.item_factors.copy()

    # Serialise embeddings
    np.save(path_serialised / "embeds_use.npy", embeds_use)
    np.save(path_serialised / "embeds_mf.npy", embeds_mf)
Exemplo n.º 29
0
 def __init__(self):
     self.model_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3"
     self.model = hub.load(self.model_url)
     self.model_name = self.model_url.replace('https://tfhub.dev/google/',
                                              '').replace('/', '_')
     self.vector_length = 512
Exemplo n.º 30
0
def eval_and_report():
    """Eval on voxceleb."""
    tf.logging.info('samples_key: %s', FLAGS.samples_key)
    logging.info('Logdir: %s', FLAGS.logdir)
    logging.info('Batch size: %s', FLAGS.batch_size)

    writer = tf.summary.create_file_writer(FLAGS.eval_dir)
    model = models.get_keras_model(
        bottleneck_dimension=FLAGS.bottleneck_dimension,
        output_dimension=FLAGS.output_dimension,
        alpha=FLAGS.alpha,
        mobilenet_size=FLAGS.mobilenet_size,
        frontend=not FLAGS.precomputed_frontend_and_targets,
        avg_pool=FLAGS.average_pool)
    checkpoint = tf.train.Checkpoint(model=model)

    for ckpt in tf.train.checkpoints_iterator(FLAGS.logdir,
                                              timeout=FLAGS.timeout):
        assert 'ckpt-' in ckpt, ckpt
        step = ckpt.split('ckpt-')[-1]
        logging.info('Starting to evaluate step: %s.', step)

        checkpoint.restore(ckpt)

        logging.info('Loaded weights for eval step: %s.', step)

        reader = tf.data.TFRecordDataset
        ds = get_data.get_data(file_pattern=FLAGS.file_pattern,
                               teacher_fn=get_data.savedmodel_to_func(
                                   hub.load(FLAGS.teacher_model_hub),
                                   FLAGS.output_key),
                               output_dimension=FLAGS.output_dimension,
                               reader=reader,
                               samples_key=FLAGS.samples_key,
                               min_length=FLAGS.min_length,
                               batch_size=FLAGS.batch_size,
                               loop_forever=False,
                               shuffle=False)
        logging.info('Got dataset for eval step: %s.', step)
        if FLAGS.take_fixed_data:
            ds = ds.take(FLAGS.take_fixed_data)

        mse_m = tf.keras.metrics.MeanSquaredError()
        mae_m = tf.keras.metrics.MeanAbsoluteError()

        logging.info('Starting the ds loop...')
        count, ex_count = 0, 0
        s = time.time()
        for wav_samples, targets in ds:
            wav_samples.shape.assert_is_compatible_with(
                [None, FLAGS.min_length])
            targets.shape.assert_is_compatible_with(
                [None, FLAGS.output_dimension])

            logits = model(wav_samples, training=False)
            logits.shape.assert_is_compatible_with(targets.shape)

            mse_m.update_state(y_true=targets, y_pred=logits)
            mae_m.update_state(y_true=targets, y_pred=logits)
            ex_count += logits.shape[0]
            count += 1
            logging.info('Saw %i examples after %i iterations as %.2f secs...',
                         ex_count, count,
                         time.time() - s)
        with writer.as_default():
            tf.summary.scalar('mse', mse_m.result().numpy(), step=int(step))
            tf.summary.scalar('mae', mae_m.result().numpy(), step=int(step))
        logging.info('Done with eval step: %s in %.2f secs.', step,
                     time.time() - s)