def _load_vector_space_mapper(model_1_path, model_2_path, bilingual_path): """Build a vector space mapper from model 1,2 and bilingual dict.""" model_1 = Word2Vec.load(model_1_path) model_2 = Word2Vec.load(model_2_path) bilingual_dict = bg.load_bilingual_dictionary(bilingual_path) tvecs_vm = VectorSpaceMapper(model_1, model_2, bilingual_dict) tvecs_vm.map_vector_spaces() return tvecs_vm
def obtain_mean_square_error_from_dataset(self, dataset_path, ): """ Obtain Mean Square Error from bilingual dataset. API Documentation: :param dataset_path: Path for the test bilingual dictionary. :type dataset_path: :class:`String` :return: %% of reduction of Mean Square Error after transformation. :rtype: :class:`Float` """ self.logger.info( 'Obtain mean square error from dataset: %s', dataset_path ) bilingual_dictionary = bg.load_bilingual_dictionary(dataset_path) avg = 0.0 count = 0.0 expected_with_tr = [] actual_with_tr = [] expected = [] actual = [] for tup in bilingual_dictionary: word_1 = tup[0] word_2 = tup[1] try: pr_vector_1 = self._predict_vec_from_word(word_1) vector_1 = self.model_1[word_1] vector_2 = self.model_2[word_2] expected.append(vector_1) actual.append(vector_2) expected_with_tr.append(pr_vector_1) actual_with_tr.append(vector_2) except KeyError: pass score = metrics.mean_squared_error(expected, actual) score_with_tr = metrics.mean_squared_error( expected_with_tr, actual_with_tr ) self.logger.info( 'Mean Square Error for Dataset without transformation: %s', score ) self.logger.info( 'Mean Square Error for Dataset' ' with transformation: %s', score_with_tr ) error_reduction = ((score - score_with_tr) / score) * 100 self.logger.info( 'Reduction in Mean Square Error' ' with transformation: %s %%', error_reduction ) return error_reduction
def obtain_mean_square_error_from_dataset( self, dataset_path, ): """ Obtain Mean Square Error from bilingual dataset. API Documentation: :param dataset_path: Path for the test bilingual dictionary. :type dataset_path: :class:`String` :return: %% of reduction of Mean Square Error after transformation. :rtype: :class:`Float` """ self.logger.info('Obtain mean square error from dataset: %s', dataset_path) bilingual_dictionary = bg.load_bilingual_dictionary(dataset_path) avg = 0.0 count = 0.0 expected_with_tr = [] actual_with_tr = [] expected = [] actual = [] for tup in bilingual_dictionary: word_1 = tup[0] word_2 = tup[1] try: pr_vector_1 = self._predict_vec_from_word(word_1) vector_1 = self.model_1[word_1] vector_2 = self.model_2[word_2] expected.append(vector_1) actual.append(vector_2) expected_with_tr.append(pr_vector_1) actual_with_tr.append(vector_2) except KeyError: pass score = metrics.mean_squared_error(expected, actual) score_with_tr = metrics.mean_squared_error(expected_with_tr, actual_with_tr) self.logger.info( 'Mean Square Error for Dataset without transformation: %s', score) self.logger.info( 'Mean Square Error for Dataset' ' with transformation: %s', score_with_tr) error_reduction = ((score - score_with_tr) / score) * 100 self.logger.info( 'Reduction in Mean Square Error' ' with transformation: %s %%', error_reduction) return error_reduction
from tvecs.bilingual_generator import bilingual_generator as bg from tvecs.vector_space_mapper import vector_space_mapper as vm from gensim.models import Word2Vec import os from gensim.models import KeyedVectors # train_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' # test_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' # bilingual_dict = bg.load_bilingual_dictionary( 'data/bilingual_dictionary/en-bn.txt') vector_space_mapper = vm.VectorSpaceMapper( model_1=Word2Vec.load(os.path.join('data', 'models', 't-vex-english-model')), model_2=KeyedVectors.load_word2vec_format( 'data/models/t-vex-bengali-model'), bilingual_dict=bilingual_dict) vector_space_mapper.map_vector_spaces() # print("Training MSE: {} %".format(vector_space_mapper.obtain_mean_square_error_from_dataset( # dataset_path=train_bilingual_corpus # ))) # # print("Testing MSE: {} %".format(vector_space_mapper.obtain_mean_square_error_from_dataset( # dataset_path=test_bilingual_corpus # ))) word = raw_input("Please input the english word :")
from tvecs.bilingual_generator import bilingual_generator as bg from tvecs.vector_space_mapper import vector_space_mapper as vm from gensim.models import KeyedVectors import os train_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' test_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' bilingual_dict = bg.load_bilingual_dictionary(train_bilingual_corpus) vector_space_mapper = vm.VectorSpaceMapper( model_1=KeyedVectors.load_word2vec_format( 'data/models/t-vex-english-fb-model'), model_2=KeyedVectors.load_word2vec_format( 'data/models/t-vex-french-model') #change , bilingual_dict=bilingual_dict) vector_space_mapper.map_vector_spaces() print("Training MSE: {} %".format( vector_space_mapper.obtain_mean_square_error_from_dataset( dataset_path=train_bilingual_corpus))) print("Testing MSE: {} %".format( vector_space_mapper.obtain_mean_square_error_from_dataset( dataset_path=test_bilingual_corpus))) word1 = raw_input("Please input the english word :") word2 = raw_input("Please input another english word :")
from tvecs.bilingual_generator import bilingual_generator as bg from tvecs.vector_space_mapper import vector_space_mapper as vm from gensim.models import Word2Vec import os # train_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' # test_bilingual_corpus = 'data/bilingual_dictionary/en-fr.txt' # bilingual_dict = bg.load_bilingual_dictionary( 'data/bilingual_dictionary/english_hindi_bd') vector_space_mapper = vm.VectorSpaceMapper( model_1=Word2Vec.load(os.path.join('data', 'models', 't-vex-english-model')), model_2=Word2Vec.load(os.path.join('data', 'models', 't-vex-hindi-model')), bilingual_dict=bilingual_dict) vector_space_mapper.map_vector_spaces() # print("Training MSE: {} %".format(vector_space_mapper.obtain_mean_square_error_from_dataset( # dataset_path=train_bilingual_corpus # ))) # # print("Testing MSE: {} %".format(vector_space_mapper.obtain_mean_square_error_from_dataset( # dataset_path=test_bilingual_corpus # ))) word = raw_input("Please input the english word :") while word != '///': print vector_space_mapper.get_recommendations_from_word(word.lower(),
score_with_tr = metrics.mean_squared_error(expected_with_tr, actual_with_tr) self.logger.info( 'Mean Square Error for Dataset without transformation: %s', score) self.logger.info( 'Mean Square Error for Dataset' ' with transformation: %s', score_with_tr) error_reduction = ((score - score_with_tr) / score) * 100 self.logger.info( 'Reduction in Mean Square Error' ' with transformation: %s %%', error_reduction) return error_reduction if __name__ == '__main__': log.set_logger_normal(LOGGER) model_1 = Word2Vec.load( os.path.join('data', 'models', 't-vex-english-model')) model_2 = Word2Vec.load(os.path.join('data', 'models', 't-vex-hindi-model')) bilingual_dict = bg.load_bilingual_dictionary( os.path.join('data', 'bilingual_dictionary', 'english_hindi_train_bd')) vm = VectorSpaceMapper(model_1, model_2, bilingual_dict) vm.map_vector_spaces() LOGGER.info('Evaluation of Testing Dataset') vm.obtain_mean_square_error_from_dataset(dataset_path=os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_test_bd')) LOGGER.info('Evaluation of Training Dataset') vm.obtain_mean_square_error_from_dataset(dataset_path=os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_train_bd'))
' with transformation: %s %%', error_reduction ) return error_reduction if __name__ == '__main__': log.set_logger_normal(LOGGER) model_1 = Word2Vec.load( os.path.join('data', 'models', 't-vex-english-model') ) model_2 = Word2Vec.load( os.path.join('data', 'models', 't-vex-hindi-model') ) bilingual_dict = bg.load_bilingual_dictionary( os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_train_bd' ) ) vm = VectorSpaceMapper(model_1, model_2, bilingual_dict) vm.map_vector_spaces() LOGGER.info( 'Evaluation of Testing Dataset' ) vm.obtain_mean_square_error_from_dataset(dataset_path=os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_test_bd' )) LOGGER.info( 'Evaluation of Training Dataset' ) vm.obtain_mean_square_error_from_dataset(dataset_path=os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_train_bd'
def bilingual_generator(lang1, lang2, bilingual_dict): """Load & returns previously generated bilingual dictionary.""" bilingual_dict = bg.load_bilingual_dictionary( bilingual_dict ) return bilingual_dict
def setup_class(cls): """ Setup Unit Testing for :class:`VectorSpaceMapper`. | *Test Suite ID* : V | | *Test Case Number* : 01 | | *Description* : Create an instance of | :mod:`tvecs.vector_space_mapper.vector_space_mapper`. | | *Preconditions* : Corpus data for both languages | and bilingual dictionary exists. | | *Test Parameters* : model_1, model_2, bilingual_dict | | *Test Data* : model_1 = English, model_2 = Hindi, bilingual_dict = | 'data/bilingual_dictionary/english_hindi_train_bd' | | *Expected Result* : Vector Space Mapping created | | *Actual Result* : Vector Space Mapping created | | **Status : Pass** | - Learns transformation between two models - :mod:`tvecs.model_generator.model_generator` - :mod:`tvecs.model_generator.model_generator` """ try: model_1 = mod.generate_model( preprocessor_type='HcCorpusPreprocessor', language='english', corpus_fname='test_english', corpus_dir_path=os.path.join('tests', 'resources'), output_dir_path=os.path.join('tests', 'resources'), need_preprocessing=True, output_fname='model_1' ) model_2 = mod.generate_model( preprocessor_type='HcCorpusPreprocessor', language='hindi', corpus_fname='test_hindi', corpus_dir_path=os.path.join('tests', 'resources'), output_dir_path=os.path.join('tests', 'resources'), need_preprocessing=True, output_fname='model_2' ) except Exception as err: pytest.fail( 'Model construction failed: %s' % err.message ) try: bilingual_dict = bg.load_bilingual_dictionary( os.path.join( 'data', 'bilingual_dictionary', 'english_hindi_train_bd' ) ) except Exception as err: pytest.fail( 'Bilingual Dictionary Construction failed: %s' % err.message ) try: cls.testing_obj = VectorSpaceMapper( model_1, model_2, bilingual_dict ) cls.testing_obj.map_vector_spaces() except BaseException as err: pytest.fail( 'Vector Space Mapping failed : %s' % err.message )