示例#1
0
def load_model(path, embeddings, loss_function, just_predict=True):
    parameters, model = io_helper.deserialize(path)

    print("Defining and initializing model...")
    classifier = CNN(embeddings=(parameters["embedding_size"], embeddings),
                     num_conv_layers=parameters["num_convolutions"],
                     filters=parameters["filters"],
                     k_max_pools=parameters["k_max_pools"],
                     manual_features_size=parameters["manual_features_size"])
    classifier.define_model(parameters["max_text_length"],
                            parameters["num_classes"],
                            loss_function,
                            -1,
                            l2_reg_factor=parameters["reg_factor"],
                            update_embeddings=parameters["upd_embs"])
    if not just_predict:
        classifier.define_optimization(
            learning_rate=parameters["learning_rate"])

    print("Initializing session...", flush=True)
    session = tf.InteractiveSession()
    session.run(tf.global_variables_initializer())

    classifier.set_variable_values(session, model)
    classifier.set_distinct_labels(parameters["dist_labels"])

    return classifier, session
示例#2
0
if not os.path.isfile(args.evaldata):
    print("Error: File with the evaluation dataset not found.")
    exit(code=1)

if not os.path.isfile(args.modelpath):
    print("Error: Model file not found.")
    exit(code=1)

embs_path = args.embs
simlex_path = args.evaldata
model_path = args.modelpath

# deserializing the model

hyps, vars = io_helper.deserialize(model_path)
print(hyps)
same_encoder, hidden_layer_sizes, distance_measure = hyps

# loading/merging word embeddings
t_embeddings = text_embeddings.Embeddings()
t_embeddings.load_embeddings(embs_path,
                             200000,
                             language='en',
                             print_loading=True,
                             skip_first_line=True)
t_embeddings.inverse_vocabularies()
vocabulary_size = len(t_embeddings.lang_vocabularies["en"])
embeddings = t_embeddings.lang_embeddings["en"].astype(np.float64)
embedding_size = t_embeddings.emb_sizes["en"]
示例#3
0
def load_labels_and_max_length(path):
    parameters, model = io_helper.deserialize(path)
    return parameters["dist_labels"], parameters["max_text_length"]
示例#4
0
# loading the pre-trained model
########################################################################################

print("Forwarded arguments: ")
model_name = os.path.join(dirname, config.MODEL)
print("Model name: " + str(model_name))

lang_query = config.QUERY_LANG
lang_doc = config.DOCS_LANG
preds_path = config.PREDS_PATH

print("Prediction language pair: " + lang_query + " " + lang_doc)

print("Deserializing the model...")
model_serialization_path = model_name
hyperparams, variables = io_helper.deserialize(model_serialization_path)
print("Hyperparameters: ")
print(hyperparams)

hyp_first_enc, hyp_second_encoder, batch_size, same_encoder, cross_attention, self_attention, share_cross_attention, share_intra_attention, bilinear_product_score = hyperparams
state_size, max_len, forward_cell_type, backward_cell_type = hyp_first_enc

########################################################################################
# loading/merging word embeddings
########################################################################################

vocab_q = pickle.load(
    open(os.path.join(dirname, config.QUERY_LANG_VOCAB), "rb"))
vectors_q = np.load(os.path.join(dirname, config.QUERY_LANG_EMBS))
norms_q = vectors_q / np.transpose([np.linalg.norm(vectors_q, 2, 1)])