def test_feature_generator(self): data = PreprocessTags(True).load_data( r'..\data\toy_dataset.txt') feat_generator = Features() feat_generator.generate_tuple_corpus(data.x[0:10000], data.y[0:10000]) try: # feat_generator.get_tests() # loads last version saved pass except: pass for template in feat.templates_dict.values(): feat_generator.generate_lambdas(template['func'], template['tuples']) # feat_generator.add_lambdas(feat.suffix_funcs_all) # DONE # feat_generator.add_lambdas(feat.prefix_funcs_all) # DONE result = feat_generator.lambdas print(len(result)) with open(fr"../training/report_lambdas_dict.p", 'wb') as stream: pickle.dump(result, stream)
def test_predict(self): # tests = pass # Load Data data = PreprocessTags(True).load_data(r'..\data\train.wtag') word_num = 30 x = data.x[0:word_num] y = data.y[0:word_num] # generate tests - (comment out if file is updated) feat_generator = Features() feat_generator.generate_tuple_corpus(x, y) for template in feat.templates_dict.values(): feat_generator.generate_lambdas(template['func'], template['tuples']) feat_generator.save_tests() model1 = Model() a = model1.fit(x, y) x_test = x y_hat = model1.predict(x_test) print(y_hat) cm = model1.confusion(y_hat=y_hat, y=y) cm.to_csv(r'../training/confusion_matrix.csv')
def test_create_tuples(self): data = PreprocessTags(True).load_data(r'..\data\train.wtag') word_num = 1_000 tag_corp = pd.Series(data.y[0:word_num]).unique() # generate tests - (comment out if file is updated) feat_generator = Features() feat_generator.generate_tuple_corpus(data.x[0:word_num], data.y[0:word_num]) for template in feat.templates_dict.values(): feat_generator.generate_lambdas(template['func'], template['tuples']) feat_generator.save_tests() fm = FinkMos(data.x[0:word_num], data.y[0:word_num], tag_corp) fm.create_tuples() print("fm.weight_mat") print(fm.weight_mat) print("fm.tuple_5_list") print(fm.tuple_5_list) fm.create_feature_sparse_list_v2() # print(len(fm.f_matrix_list)) print(fm.f_matrix_list[0].shape) fm.minimize_loss() fm.v.dump('values')
import unittest import numpy as np import pandas as pd from models.model import Model import models.features as feat from models.features import Features from models.prerocesing import PreprocessTags from models.sentence_processor import FinkMos import os os.chdir(r'C:\Users\amoscoso\Documents\Technion\nlp\nlp_hw\tests') # %% data = PreprocessTags(True).load_data(r'..\data\train.wtag') word_num = 500 # generate tests - (comment out if file is updated) feat_generator = Features() feat_generator.generate_tuple_corpus(data.x[0:word_num], data.y[0:word_num]) for template in feat.templates_dict.values(): feat_generator.generate_lambdas(template['func'], template['tuples']) feat_generator.save_tests() test_data = PreprocessTags(True).load_data(r'..\data\test.wtag') # %% word_num = 500 test_number = 50 model1 = Model() model1.fit(data.x[0:word_num], data.y[0:word_num]) y_hat = model1.predict(test_data.x[:test_number]) model1.confusion(y_hat, data.y[:test_number])