Пример #1
0
def parse_sentence(record):
    import redisAI
    import numpy as np
    global tokenizer
    if not tokenizer:
        tokenizer = loadTokeniser()
    hash_tag = "{%s}" % hashtag()

    for idx, value in sorted(record['value'].items(),
                             key=lambda item: int(item[0])):
        tokens = tokenizer.encode(value,
                                  add_special_tokens=False,
                                  max_length=511,
                                  truncation=True,
                                  return_tensors="np")
        tokens = np.append(tokens, tokenizer.sep_token_id).astype(np.int64)
        tensor = redisAI.createTensorFromBlob('INT64', tokens.shape,
                                              tokens.tobytes())

        key_prefix = 'sentence:'
        sentence_key = remove_prefix(record['key'], key_prefix)
        token_key = f"tokenized:bert:qa:{sentence_key}:{idx}"
        # execute('SET', token_key, tokens)
        redisAI.setTensorInKey(token_key, tensor)
        execute('SADD', f'processed_docs_stage3_tokenized{hash_tag}',
                token_key)
def parse_sentence(record):
    import redisAI
    import numpy as np
    global tokenizer
    global model
    if not tokenizer:
        tokenizer, model = loadTokeniser()

    article_text = []
    for _, value in sorted(record['value'].items(),
                           key=lambda item: int(item[0])):
        article_text.append(value)
    full_text = " ".join(article_text)
    inputs = tokenizer.encode_plus(full_text,
                                   max_length=512,
                                   add_special_tokens=True,
                                   return_tensors="np")

    input_ids = inputs['input_ids'].astype(np.int16)
    log(str(input_ids.shape))
    log(str(input_ids))
    # attention_mask = inputs['attention_mask']
    # token_type_ids = inputs['token_type_ids']
    key_prefix = 'sentence:'
    article_key = remove_prefix(record['key'], key_prefix)
    token_key = f"tokenized:T5:sum:{article_key}"
    tensor = redisAI.createTensorFromBlob('INT16', input_ids.shape,
                                          input_ids.tobytes())
    redisAI.setTensorInKey(token_key, tensor)
    execute('SADD', 'processed_docs_stage3_sum', token_key)
Пример #3
0
def hashToTensor(record):
    hash_key = record['key']
    hash = record['value']
    values = np.empty((1, 30), dtype=np.float32)
    for i, key in enumerate(hash.keys()):
        values[0][i] = hash[key]
    tensor = redisAI.createTensorFromBlob('FLOAT', values.shape,
                                          values.tobytes())
    redisAI.setTensorInKey(hash_key + '_tensor', tensor)
def addTensors(x):
    tensors = redisAI.mgetTensorsFromKeyspace(['tensor_a'])
    tensors.append(redisAI.getTensorFromKey('tensor_b'))
    log(str(redisAI.tensorGetDims(tensors[0])))
    log(str(redisAI.tensorGetDims(tensors[1])))
    scriptRunner = redisAI.createScriptRunner('my_script', 'concat_tensors')
    redisAI.scriptRunnerAddInputList(scriptRunner, tensors)
    redisAI.scriptRunnerAddOutput(scriptRunner)
    script_reply = redisAI.scriptRunnerRun(scriptRunner)
    redisAI.setTensorInKey('script_reply', script_reply[0])
    redisAI.msetTensorsInKeyspace({'script_reply_1': script_reply[0]})
Пример #5
0
def is_fraud(record):
    # Retrieve tensors from keyspace
    # Range query with limit 100. (Without limit it can return 100-150K results which reduce performance)
    ref_data_keys = execute("ZRANGEBYSCORE", "references", record[1],
                            record[2], "LIMIT", "0", "100")
    # Set "_tensor" suffix for every returned key
    keys = [x + "_tensor" for x in ref_data_keys]
    # Append the new transaction tensor key
    keys.append(record[3])
    # Do mgetTensors from the keyspace
    tensors = redisAI.mgetTensorsFromKeyspace(keys)

    # Take the reference data tensors and the sample data
    ref_data = tensors[:len(tensors) - 2]
    new_sample = tensors[len(tensors) - 1]

    # Create a new reference tensor out the of the reference data from the keyspace, with a torch script
    scriptRunner = redisAI.createScriptRunner('concat_script',
                                              'concat_tensors')
    redisAI.scriptRunnerAddInputList(scriptRunner, ref_data)
    redisAI.scriptRunnerAddOutput(scriptRunner)

    # Run two models over the reference data and the transaction
    ref_data = redisAI.scriptRunnerRun(scriptRunner)[0]
    modelRunner = redisAI.createModelRunner('model_1')
    redisAI.modelRunnerAddInput(modelRunner, 'transaction', new_sample)
    redisAI.modelRunnerAddInput(modelRunner, 'reference', ref_data)
    redisAI.modelRunnerAddOutput(modelRunner, 'output')
    output_1 = redisAI.modelRunnerRun(modelRunner)[0]
    modelRunner = redisAI.createModelRunner('model_2')
    redisAI.modelRunnerAddInput(modelRunner, 'transaction', new_sample)
    redisAI.modelRunnerAddInput(modelRunner, 'reference', ref_data)
    redisAI.modelRunnerAddOutput(modelRunner, 'output')
    output_2 = redisAI.modelRunnerRun(modelRunner)[0]

    # Average the results with numpy and set in keyspace
    shape = redisAI.tensorGetDims(output_1)
    reply_ndarray_0 = np.frombuffer(redisAI.tensorGetDataAsBlob(output_1),
                                    dtype=np.float32).reshape(shape)
    reply_ndarray_1 = np.frombuffer(redisAI.tensorGetDataAsBlob(output_2),
                                    dtype=np.float32).reshape(shape)
    res = (reply_ndarray_0 + reply_ndarray_1) / 2.0
    output = redisAI.createTensorFromBlob('FLOAT', res.shape, res.tobytes())
    redisAI.setTensorInKey('model_result', output)