def test_titanic(self): class_ = self.class_ dataset = DatasetPackLoader().load_dataset("titanic") dataset = dataset.train_set Xs, Ys = dataset.full_batch(['Xs', 'Ys']) sample_X = Xs[:2] sample_Y = Ys[:2] model = class_(dataset.input_shapes) model.build() model.train(Xs, Ys, epoch=1) code = model.code(sample_X) print("code {code}".format(code=code)) recon = model.recon(sample_X, sample_Y) print("recon {recon}".format(recon=recon)) loss = model.metric(sample_X, sample_Y) print("loss {:}".format(loss)) # generate(self, zs, Ys) proba = model.proba(sample_X) print("proba {}".format(proba)) predict = model.predict(sample_X) print("predict {}".format(predict)) score = model.score(sample_X, sample_Y) print("score {}".format(score)) path = model.save() model = class_() model.load(path) print('model reloaded') code = model.code(sample_X) print("code {code}".format(code=code)) recon = model.recon(sample_X, sample_Y) print("recon {recon}".format(recon=recon)) loss = model.metric(sample_X, sample_Y) print("loss {:}".format(loss)) # generate(self, zs, Ys) proba = model.proba(sample_X) print("proba {}".format(proba)) predict = model.predict(sample_X) print("predict {}".format(predict)) score = model.score(sample_X, sample_Y) print("score {}".format(score))
def exp_titanic_id_static(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") dataset = dataset.set['train'] ret_dict = {} n = 100 for i in range(n): clf_pack = ClassifierPack() dataset.shuffle() train_set, valid_set = dataset.split((7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) clf_pack.fit(train_Xs, train_Ys) dataset.sort() full_Xs, full_Ys = dataset.full_batch(['Xs', 'Ys']) predict = clf_pack.predict(full_Xs) for key in predict: if key in ret_dict: ret_dict[key] += predict[key] / float(n) else: ret_dict[key] = predict[key] / float(n) import pandas as pd df = pd.DataFrame() for key in ret_dict: df[key] = ret_dict[key] for key in dataset.BATCH_KEYS: if key in ['Xs', 'Ys']: continue print(key, type(key)) df[key] = dataset.full_batch([key]) df.to_csv('./exp_titianic_id_result.csv', )
def test_titanic(self): class_ = DAE dataset = DatasetPackLoader().load_dataset("titanic") dataset = dataset.train_set model = class_(dataset.input_shapes) model.build() Xs = dataset.full_batch(['Xs']) model.train(Xs, epoch=1) sample_X = Xs[:2] code = model.code(sample_X) print("code {code}".format(code=code)) recon = model.recon(sample_X) print("recon {recon}".format(recon=recon)) loss = model.metric(Xs) loss = np.mean(loss) print("loss {:.4}".format(loss)) path = model.save() model = class_() model.load(path) print('model reloaded') sample_X = Xs[:2] code = model.code(sample_X) print("code {code}".format(code=code)) recon = model.recon(sample_X) print("recon {recon}".format(recon=recon)) loss = model.metric(Xs) loss = np.mean(loss) print("loss {:.4}".format(loss))
def exp_stackingCV_metaclf(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) clf = ClassifierPack() clf.drop_clf('mlxMLP') clf.drop_clf('mlxAdaline') clf.drop_clf('mlxSoftmaxRegressionClf') clf.drop_clf('skGaussian_NB') clf.drop_clf('skQDA') pack = clf.pack for key, meta_clf in pack.items(): if 'mlx' in key: continue pprint(f'meta clf = {key}') stacking = clf.make_stackingCVClf(meta_clf) stacking.fit(train_Xs, train_Ys) score = stacking.score_pack(valid_Xs, valid_Ys) pprint(f'score {score}')
def setup(self): print('reset current dir') print('cur dir') print(os.getcwd()) head, tail = os.path.split(os.getcwd()) os.chdir(head) print(os.getcwd()) from data_handler.DatasetPackLoader import DatasetPackLoader from sklearn_like_toolkit.ClassifierPack import ClassifierPack self.cls = ClassifierPack dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) self.dataset = dataset self.train_Xs = train_Xs self.train_Ys = train_Ys self.valid_Xs = valid_Xs self.valid_Ys = valid_Ys
def test_mnist(self): class_ = self.class_ dataset = DatasetPackLoader().load_dataset("MNIST") dataset = dataset.train_set Xs, Ys = dataset.full_batch(['Xs', 'Ys']) sample_X = Xs[:2] sample_Y = Ys[:2] model = class_(dataset.input_shapes) model.build() model.train(Xs, Ys, epoch=1) code = model.code(sample_X, sample_Y) print("code {code}".format(code=code)) recon = model.recon(sample_X, sample_Y) print("recon {recon}".format(recon=recon)) loss = model.metric(sample_X, sample_Y) loss = np.mean(loss) print("loss {:.4}".format(loss)) path = model.save() model = class_() model.load(path) print('model reloaded') code = model.code(sample_X, sample_Y) print("code {code}".format(code=code)) recon = model.recon(sample_X, sample_Y) print("recon {recon}".format(recon=recon)) loss = model.metric(sample_X, sample_Y) loss = np.mean(loss) print("loss {:.4}".format(loss))
def test(self): dataset = DatasetPackLoader().load_dataset("titanic") input_shapes = dataset.train_set.input_shapes Xs, Ys = dataset.train_set.full_batch( batch_keys=["Xs", "Ys"], ) model = MLPClassifier(input_shapes) model.build() model.train(Xs, Ys, epoch=1) Xs, Ys = dataset.train_set.next_batch( 5, batch_keys=["Xs", "Ys"], ) predict = model.predict(Xs) print("predict {}".format(predict)) loss = model.metric(Xs, Ys) print("loss {}".format(loss)) proba = model.proba(Xs) print('prob {}'.format(proba)) score = model.score(Xs, Ys) print('score {}'.format(score)) path = model.save() model = MLPClassifier() model.load(path) predict = model.predict(Xs) print("predict {}".format(predict)) loss = model.metric(Xs, Ys) print("loss {}".format(loss)) proba = model.proba(Xs) print('prob {}'.format(proba)) score = model.score(Xs, Ys) print('score {}'.format(score))
def exp_titanic_statistic(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) clf_pack = ClassifierPack() clf_pack.drop_clf('mlxMLP') clf_pack.drop_clf('mlxAdaline') clf_pack.drop_clf('mlxSoftmaxRegressionClf') clf_pack.drop_clf('skGaussian_NB') clf_pack.drop_clf('skQDA') pack = clf_pack.pack pprint(f'pack list {pack}') meta_clf = pack['skBernoulli_NB'] pprint(f'metaclf = {meta_clf}') clf_pack.fit(train_Xs, train_Ys) score_pack = clf_pack.score_pack(valid_Xs, valid_Ys) pprint('default param clf pack') pprint(score_pack) clf_pack.param_search(train_Xs, train_Ys) score_pack = clf_pack.score_pack(valid_Xs, valid_Ys) pprint('optimize param clf pack top1') pprint(score_pack) pack = [clf for k, clf in clf_pack.pack.items() if hasattr(clf, 'get_params')] pack1_default = pack pack10_default = pack * 10 pack100_default_ = pack * 100 pack1_top1 = clf_pack.clone_top_k_tuned(k=1) pack1_top1 = [clf for k, clf in pack1_top1.items() if hasattr(clf, 'get_params')] pack10_top1 = pack1_top1 * 10 pack100_top1 = pack1_top1 * 100 pack1_top5 = clf_pack.clone_top_k_tuned(k=5) pack1_top5 = [clf for k, clf in pack1_top5.items() if hasattr(clf, 'get_params')] pack10_top5 = pack1_top5 * 10 pack100_top5 = pack1_top5 * 100 def voting_stacking_stackingCV(pack, param_type, pack_n, top): pprint(f'param_type={param_type}, pack_n={pack_n}, top={top}') voting = FoldingHardVote(pack) voting.fit(train_Xs, train_Ys) score_pack = voting.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to hard voting') pprint(score_pack) stacking = mlxStackingClf(pack, meta_clf) stacking.fit(train_Xs, train_Ys) score_pack = stacking.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to stacking') pprint(score_pack) stackingCV = mlxStackingCVClf(pack, meta_clf) stackingCV.fit(train_Xs, train_Xs) score_pack = stackingCV.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to stackingCV') pprint(score_pack) voting_stacking_stackingCV(pack1_default, 'default', 1, None) voting_stacking_stackingCV(pack10_default, 'default', 10, None) voting_stacking_stackingCV(pack100_default_, 'default', 100, None) voting_stacking_stackingCV(pack1_top1, 'optimize', 1, 'top1') voting_stacking_stackingCV(pack10_top1, 'optimize', 10, 'top1') voting_stacking_stackingCV(pack100_top1, 'optimize', 100, 'top1') voting_stacking_stackingCV(pack1_top5, 'optimize', 1, 'top5') voting_stacking_stackingCV(pack10_top5, 'optimize', 10, 'top5') voting_stacking_stackingCV(pack100_top5, 'optimize', 100, 'top5')
# usage from data_handler.DatasetPackLoader import DatasetPackLoader from InstanceManger import InstanceManager # load dataset by calling DatasetLoader # input_shapes is for tensorflow.PlaceHolder's shape # need to build instanc e dataset, input_shapes = DatasetPackLoader().load_dataset("dataset_name") # apply to train model instanceManager = InstanceManager() instanceManager.train_instance(epoch_time, dataset=dataset, check_point_interval=check_point_interval) # 1. add dataset folder path in **env_setting.py** EXAMPLE_DATASET_PATH = os.path.join(DATA_PATH, 'example_dataset') # 2. add dataset_batch_keys in **dict_keys.dataset_batch_keys.py** INPUT_SHAPE_KEY_DATA_X = "INPUT_SHAPE_KEY_DATA_X" INPUT_SHAPE_KEY_DATA_X = "INPUT_SHAPE_KEY_DATA_X" INPUT_SHAPE_KEY_LABEL = "INPUT_SHAPE_KEY_LABEL" INPUT_SHAPE_KEY_LABEL_SIZE = "INPUT_SHAPE_KEY_LABEL_SIZE" # 3. add input_shapes_keys in **dict_keys.dataset_batch_keys.py** BATCH_KEY_EXAMPLE_TRAIN_X = "BATCH_KEY_EXAMPLE_TRAIN_X" BATCH_KEY_EXAMPLE_TEST_X = "BATCH_KEY_EXAMPLE_TEST_X" BATCH_KEY_EXAMPLE_TRAIN_LABEL = "BATCH_KEY_EXAMPLE_TRAIN_LABEL" BATCH_KEY_EXAMPLE_TEST_LABEL = "BATCH_KEY_EXAMPLE_TEST_LABEL" # 4. implement dataset class in **data_handler.dataset_name.py**