def parse_sentence(record): import redisAI import numpy as np global tokenizer if not tokenizer: tokenizer = loadTokeniser() hash_tag = "{%s}" % hashtag() for idx, value in sorted(record['value'].items(), key=lambda item: int(item[0])): tokens = tokenizer.encode(value, add_special_tokens=False, max_length=511, truncation=True, return_tensors="np") tokens = np.append(tokens, tokenizer.sep_token_id).astype(np.int64) tensor = redisAI.createTensorFromBlob('INT64', tokens.shape, tokens.tobytes()) key_prefix = 'sentence:' sentence_key = remove_prefix(record['key'], key_prefix) token_key = f"tokenized:bert:qa:{sentence_key}:{idx}" # execute('SET', token_key, tokens) redisAI.setTensorInKey(token_key, tensor) execute('SADD', f'processed_docs_stage3_tokenized{hash_tag}', token_key)
def parse_sentence(record): import redisAI import numpy as np global tokenizer global model if not tokenizer: tokenizer, model = loadTokeniser() article_text = [] for _, value in sorted(record['value'].items(), key=lambda item: int(item[0])): article_text.append(value) full_text = " ".join(article_text) inputs = tokenizer.encode_plus(full_text, max_length=512, add_special_tokens=True, return_tensors="np") input_ids = inputs['input_ids'].astype(np.int16) log(str(input_ids.shape)) log(str(input_ids)) # attention_mask = inputs['attention_mask'] # token_type_ids = inputs['token_type_ids'] key_prefix = 'sentence:' article_key = remove_prefix(record['key'], key_prefix) token_key = f"tokenized:T5:sum:{article_key}" tensor = redisAI.createTensorFromBlob('INT16', input_ids.shape, input_ids.tobytes()) redisAI.setTensorInKey(token_key, tensor) execute('SADD', 'processed_docs_stage3_sum', token_key)
def hashToTensor(record): hash_key = record['key'] hash = record['value'] values = np.empty((1, 30), dtype=np.float32) for i, key in enumerate(hash.keys()): values[0][i] = hash[key] tensor = redisAI.createTensorFromBlob('FLOAT', values.shape, values.tobytes()) redisAI.setTensorInKey(hash_key + '_tensor', tensor)
def addTensors(x): tensors = redisAI.mgetTensorsFromKeyspace(['tensor_a']) tensors.append(redisAI.getTensorFromKey('tensor_b')) log(str(redisAI.tensorGetDims(tensors[0]))) log(str(redisAI.tensorGetDims(tensors[1]))) scriptRunner = redisAI.createScriptRunner('my_script', 'concat_tensors') redisAI.scriptRunnerAddInputList(scriptRunner, tensors) redisAI.scriptRunnerAddOutput(scriptRunner) script_reply = redisAI.scriptRunnerRun(scriptRunner) redisAI.setTensorInKey('script_reply', script_reply[0]) redisAI.msetTensorsInKeyspace({'script_reply_1': script_reply[0]})
def is_fraud(record): # Retrieve tensors from keyspace # Range query with limit 100. (Without limit it can return 100-150K results which reduce performance) ref_data_keys = execute("ZRANGEBYSCORE", "references", record[1], record[2], "LIMIT", "0", "100") # Set "_tensor" suffix for every returned key keys = [x + "_tensor" for x in ref_data_keys] # Append the new transaction tensor key keys.append(record[3]) # Do mgetTensors from the keyspace tensors = redisAI.mgetTensorsFromKeyspace(keys) # Take the reference data tensors and the sample data ref_data = tensors[:len(tensors) - 2] new_sample = tensors[len(tensors) - 1] # Create a new reference tensor out the of the reference data from the keyspace, with a torch script scriptRunner = redisAI.createScriptRunner('concat_script', 'concat_tensors') redisAI.scriptRunnerAddInputList(scriptRunner, ref_data) redisAI.scriptRunnerAddOutput(scriptRunner) # Run two models over the reference data and the transaction ref_data = redisAI.scriptRunnerRun(scriptRunner)[0] modelRunner = redisAI.createModelRunner('model_1') redisAI.modelRunnerAddInput(modelRunner, 'transaction', new_sample) redisAI.modelRunnerAddInput(modelRunner, 'reference', ref_data) redisAI.modelRunnerAddOutput(modelRunner, 'output') output_1 = redisAI.modelRunnerRun(modelRunner)[0] modelRunner = redisAI.createModelRunner('model_2') redisAI.modelRunnerAddInput(modelRunner, 'transaction', new_sample) redisAI.modelRunnerAddInput(modelRunner, 'reference', ref_data) redisAI.modelRunnerAddOutput(modelRunner, 'output') output_2 = redisAI.modelRunnerRun(modelRunner)[0] # Average the results with numpy and set in keyspace shape = redisAI.tensorGetDims(output_1) reply_ndarray_0 = np.frombuffer(redisAI.tensorGetDataAsBlob(output_1), dtype=np.float32).reshape(shape) reply_ndarray_1 = np.frombuffer(redisAI.tensorGetDataAsBlob(output_2), dtype=np.float32).reshape(shape) res = (reply_ndarray_0 + reply_ndarray_1) / 2.0 output = redisAI.createTensorFromBlob('FLOAT', res.shape, res.tobytes()) redisAI.setTensorInKey('model_result', output)