def test_read_csv(self): from utils.file_utils import read_csv import numpy m = read_csv(data_file_path) m2 = numpy.asarray(m) self.assertGreater(len(m), 0) self.assertIsNotNone(m2)
def test_normal_matrix(self): from utils.file_utils import read_csv from ex1_linear_regression.normal import norm_matrix import numpy m = read_csv("resource/ex1data2.txt") m2 = numpy.asarray(m) normed_matrix = norm_matrix(m2) self.assertIsNotNone(normed_matrix)
def test_normal_eq_2(self): from utils.file_utils import read_csv from ex1_linear_regression.normalEqn import normal_eq_n import numpy m = read_csv("resource/ex1data2.txt") m2 = numpy.asarray(m) ret = normal_eq_n(m2) self.assertAlmostEqual(ret[0], 89597, delta=1) self.assertAlmostEqual(ret[1], 139, delta=1) self.assertAlmostEqual(ret[2], -8738, delta=1)
def test_normal_eq_1(self): from utils.file_utils import read_csv from ex1_linear_regression.normalEqn import normal_eq_n import numpy m = read_csv("resource/ex1data1.txt") m2 = numpy.asarray(m) ret = normal_eq_n(m2) alpha = 0.1 self.assertAlmostEqual(ret[0], -3.84, delta=alpha) self.assertAlmostEqual(ret[1], 1.18, delta=alpha)
def test_gcd_multi_vars_1(self): from ex1_linear_regression.file_utils import read_csv import numpy m = read_csv("resource/ex1data1.txt") m2 = numpy.asarray(m) self.assertIsNotNone(m2) from ex1_linear_regression.multi_vars import gcd_m alpha = 0.01 ret = gcd_m(m2, alpha) self.assertAlmostEqual(ret[0], -3.84, delta=alpha) self.assertAlmostEqual(ret[1], 1.18, delta=alpha)
def test_gcd_multi_vars_2(self): from utils.file_utils import read_csv from ex1_linear_regression.normal import norm_matrix import numpy m = read_csv("resource/ex1data2.txt") m2 = numpy.asarray(m) normed_matrix = norm_matrix(m2) self.assertIsNotNone(normed_matrix) from ex1_linear_regression.multi_vars import gcd_m ret = gcd_m(normed_matrix, 0.001) self.assertAlmostEqual(ret[0], 89597, delta=1) self.assertAlmostEqual(ret[1], 139, delta=1) self.assertAlmostEqual(ret[2], -8738, delta=1)
# "TPP_INSURED_INDUSTRY_missing", # "TPP_INSURED_INDUSTRY_high", # "TPP_INSURED_INDUSTRY_medium", # "TPP_INSURED_INDUSTRY_low", # "TPP_INSURED_INDUSTRY_others", # "TPP_INSURED_EDU_(A)Illiterate", # "TPP_INSURED_EDU_(D)Others", # "TPP_INSURED_EDU_(C)Grad & above", # "TPP_INSURED_EDU_(B)Schooling", "TPP_INSURED_INCOME"] ignore_col_list_lstm = ["POL_ID", "DATA_MONTH"] print("Reading the data...") ffn_train_data, ffn_train_label, _, _, _, _ = read_csv( ffn_train_path, split_ratio=split_ratio, header=True, ignore_cols=ignore_col_list_ffn, output_label="Lapse_Flag") lstm_train_data, _, _, _, _, _ = read_csv(lstm_train_path, split_ratio=split_ratio, header=True, ignore_cols=ignore_col_list_lstm, output_label="Lapse_Flag") print("ffn data") print(ffn_train_data[0]) print(len(ffn_train_data[0])) print(ffn_train_label[0]) print(len(ffn_train_label[0])) print("lstm data")
trans_train_path = sys.argv[1] # trans_test_path = "../data/trans_new_test.csv" trans_test_path = sys.argv[2] model_name = sys.argv[3] learning_rate = 0.001 epochs = 100 batch_size = 512 display_count = 1000 split_ratio = [100, 0, 0] print("Reading the data...") trans_train_data, trans_train_label, _, _, _, _ = read_csv( trans_train_path, split_ratio=split_ratio, header=True, ignore_cols=["POL_ID", "DATA_MONTH", "TB_POL_BILL_MODE_CD", "MI"], output_label="Lapse_Flag") trans_test_data, trans_test_label, _, _, _, _ = read_csv( trans_test_path, split_ratio=split_ratio, header=True, ignore_cols=["POL_ID", "DATA_MONTH", "TB_POL_BILL_MODE_CD", "MI"], output_label="Lapse_Flag") print(trans_train_data[0]) print("Train Data Size - ", len(trans_train_data)) print("Test Data Size - ", len(trans_test_data)) print("Splitting the data...")
import os import pandas as pd # infer_path = "/Users/vivek/sample.csv" infer_path = sys.argv[1] model_name = sys.argv[2] output_file = sys.argv[3] batch_size = 512 display_count = 1000 split_ratio = [100, 0, 0] print("Reading the data...") inference_data, inference_label, _, _, _, _ = read_csv( infer_path, split_ratio=split_ratio, header=True, ignore_cols=["POL_ID", "DATA_MONTH"], output_label="Lapse_Flag") print(inference_data[0]) print("Infer Data Size - ", len(inference_data)) print("Splitting the data...") infer_y = divide_batches(inference_label, batch_size) infer_batch_size = len(infer_y) saved_model_dir = "../maxlife_models/" if not os.path.isdir(saved_model_dir): os.mkdir(saved_model_dir)
# "TPP_INSURED_GENDER_MALE", # "TPP_INSURED_GENDER_null", # "TPP_INSURED_INDUSTRY_missing", # "TPP_INSURED_INDUSTRY_high", # "TPP_INSURED_INDUSTRY_medium", # "TPP_INSURED_INDUSTRY_low", # "TPP_INSURED_INDUSTRY_others", # "TPP_INSURED_EDU_(A)Illiterate", # "TPP_INSURED_EDU_(D)Others", # "TPP_INSURED_EDU_(C)Grad & above", # "TPP_INSURED_EDU_(B)Schooling", "TPP_INSURED_INCOME"] print("Reading the data...") trans_train_data, trans_train_label, _, _, _, _ = read_csv( trans_train_path, split_ratio=split_ratio, header=True, ignore_cols=ignore_col_list, output_label="Lapse_Flag") trans_test_data, trans_test_label, _, _, _, _ = read_csv( trans_test_path, split_ratio=split_ratio, header=True, ignore_cols=ignore_col_list, output_label="Lapse_Flag") print(trans_train_data[0]) print(trans_train_label[0]) pos_weight = len(trans_train_label) / sum(trans_train_label) print("Train Data Size - ", len(trans_train_data))