# initialize objects print('Initializing objects ...') print('Initializing word embeddings ...') t1 = time.time() word_embeddings = WordEmbeddings(word_embeddings_file_path) t2 = time.time() print('\tTook %f seconds' % (t2 - t1)) print('Initializing tokenizer ...') tokenizer = Tokenizer() print('Initializing vectorizer ...') vectorizer = Vectorizer(word_embeddings, tokenizer) #### testing dataset #### print('Vectorizing testing dataset ...') ids, test_a_vectors, test_b_vectors, test_gold = vectorizer.vectorize_df( test_df) test_max_a_length = len(max(test_a_vectors, key=len)) test_max_b_length = len(max(test_b_vectors, key=len)) print('maximum number of tokens per sentence A in testing set is %d' % test_max_a_length) print('maximum number of tokens per sentence B in testing set is %d' % test_max_b_length) max_len = max([test_max_a_length, test_max_b_length]) # padding print('Padding testing dataset ...') test_a_vectors = pad_tensor(test_a_vectors, max_len) test_b_vectors = pad_tensor(test_b_vectors, max_len) print('Loading the model ...') siamese = SiameseModel(False)
# initialize objects print('Initializing objects ...') print('Initializing word embeddings ...') t1 = time.time() word_embeddings = WordEmbeddings(word_embeddings_file_path) t2 = time.time() print('\tTook %f seconds' % (t2 - t1)) print('Initializing tokenizer ...') tokenizer = Tokenizer() print('Initializing vectorizer ...') vectorizer = Vectorizer(word_embeddings, tokenizer) #### training dataset #### # vectorizing ids, train_a_vectors, train_b_vectors, train_gold = vectorizer.vectorize_df(df) train_max_a_length = len(max(train_a_vectors, key=len)) train_max_b_length = len(max(train_b_vectors, key=len)) print('maximum number of tokens per sentence A in training set is %d' % train_max_a_length) print('maximum number of tokens per sentence B in training set is %d' % train_max_b_length) max_len = max([train_max_a_length, train_max_b_length]) # padding train_a_vectors = pad_tensor(train_a_vectors, max_len) train_b_vectors = pad_tensor(train_b_vectors, max_len) print('Training the model ...') siamese = SiameseModel() validation_data = None
# initialize objects print('Initializing objects ...') print('Initializing word embeddings ...') t1 = time.time() word_embeddings = WordEmbeddings(word_embeddings_file_path) t2 = time.time() print('\tTook %f seconds' % (t2 - t1)) print('Initializing tokenizer ...') tokenizer = Tokenizer() print('Initializing vectorizer ...') vectorizer = Vectorizer(word_embeddings, tokenizer) #### training dataset #### # vectorizing train_a_vectors, train_b_vectors, train_gold = vectorizer.vectorize_df(train_df) train_max_a_length = len(max(train_a_vectors, key=len)) train_max_b_length = len(max(train_b_vectors, key=len)) print('maximum number of tokens per sentence A in training set is %d' % train_max_a_length) print('maximum number of tokens per sentence B in training set is %d' % train_max_b_length) max_len = max([train_max_a_length, train_max_b_length]) # padding train_a_vectors = pad_tensor(train_a_vectors, max_len) train_b_vectors = pad_tensor(train_b_vectors, max_len) #### development dataset #### # vectorizing dev_a_vectors, dev_b_vectors, dev_gold = vectorizer.vectorize_df(dev_df) dev_max_a_length = len(max(dev_a_vectors, key=len))