def exp_titanic_id_static(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") dataset = dataset.set['train'] ret_dict = {} n = 100 for i in range(n): clf_pack = ClassifierPack() dataset.shuffle() train_set, valid_set = dataset.split((7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) clf_pack.fit(train_Xs, train_Ys) dataset.sort() full_Xs, full_Ys = dataset.full_batch(['Xs', 'Ys']) predict = clf_pack.predict(full_Xs) for key in predict: if key in ret_dict: ret_dict[key] += predict[key] / float(n) else: ret_dict[key] = predict[key] / float(n) import pandas as pd df = pd.DataFrame() for key in ret_dict: df[key] = ret_dict[key] for key in dataset.BATCH_KEYS: if key in ['Xs', 'Ys']: continue print(key, type(key)) df[key] = dataset.full_batch([key]) df.to_csv('./exp_titianic_id_result.csv', )
def exp_stackingCV_metaclf(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) clf = ClassifierPack() clf.drop_clf('mlxMLP') clf.drop_clf('mlxAdaline') clf.drop_clf('mlxSoftmaxRegressionClf') clf.drop_clf('skGaussian_NB') clf.drop_clf('skQDA') pack = clf.pack for key, meta_clf in pack.items(): if 'mlx' in key: continue pprint(f'meta clf = {key}') stacking = clf.make_stackingCVClf(meta_clf) stacking.fit(train_Xs, train_Ys) score = stacking.score_pack(valid_Xs, valid_Ys) pprint(f'score {score}')
def setup(self): print('reset current dir') print('cur dir') print(os.getcwd()) head, tail = os.path.split(os.getcwd()) os.chdir(head) print(os.getcwd()) from data_handler.DatasetPackLoader import DatasetPackLoader from sklearn_like_toolkit.ClassifierPack import ClassifierPack self.cls = ClassifierPack dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) self.dataset = dataset self.train_Xs = train_Xs self.train_Ys = train_Ys self.valid_Xs = valid_Xs self.valid_Ys = valid_Ys
def exp_titanic_statistic(print, pprint): dataset = DatasetPackLoader().load_dataset("titanic") train_set, valid_set = dataset.split('train', 'train', 'valid', (7, 3)) train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys']) valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys']) clf_pack = ClassifierPack() clf_pack.drop_clf('mlxMLP') clf_pack.drop_clf('mlxAdaline') clf_pack.drop_clf('mlxSoftmaxRegressionClf') clf_pack.drop_clf('skGaussian_NB') clf_pack.drop_clf('skQDA') pack = clf_pack.pack pprint(f'pack list {pack}') meta_clf = pack['skBernoulli_NB'] pprint(f'metaclf = {meta_clf}') clf_pack.fit(train_Xs, train_Ys) score_pack = clf_pack.score_pack(valid_Xs, valid_Ys) pprint('default param clf pack') pprint(score_pack) clf_pack.param_search(train_Xs, train_Ys) score_pack = clf_pack.score_pack(valid_Xs, valid_Ys) pprint('optimize param clf pack top1') pprint(score_pack) pack = [clf for k, clf in clf_pack.pack.items() if hasattr(clf, 'get_params')] pack1_default = pack pack10_default = pack * 10 pack100_default_ = pack * 100 pack1_top1 = clf_pack.clone_top_k_tuned(k=1) pack1_top1 = [clf for k, clf in pack1_top1.items() if hasattr(clf, 'get_params')] pack10_top1 = pack1_top1 * 10 pack100_top1 = pack1_top1 * 100 pack1_top5 = clf_pack.clone_top_k_tuned(k=5) pack1_top5 = [clf for k, clf in pack1_top5.items() if hasattr(clf, 'get_params')] pack10_top5 = pack1_top5 * 10 pack100_top5 = pack1_top5 * 100 def voting_stacking_stackingCV(pack, param_type, pack_n, top): pprint(f'param_type={param_type}, pack_n={pack_n}, top={top}') voting = FoldingHardVote(pack) voting.fit(train_Xs, train_Ys) score_pack = voting.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to hard voting') pprint(score_pack) stacking = mlxStackingClf(pack, meta_clf) stacking.fit(train_Xs, train_Ys) score_pack = stacking.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to stacking') pprint(score_pack) stackingCV = mlxStackingCVClf(pack, meta_clf) stackingCV.fit(train_Xs, train_Xs) score_pack = stackingCV.score_pack(valid_Xs, valid_Ys) pprint(f'{param_type} param clf pack * {pack_n}, {top} to stackingCV') pprint(score_pack) voting_stacking_stackingCV(pack1_default, 'default', 1, None) voting_stacking_stackingCV(pack10_default, 'default', 10, None) voting_stacking_stackingCV(pack100_default_, 'default', 100, None) voting_stacking_stackingCV(pack1_top1, 'optimize', 1, 'top1') voting_stacking_stackingCV(pack10_top1, 'optimize', 10, 'top1') voting_stacking_stackingCV(pack100_top1, 'optimize', 100, 'top1') voting_stacking_stackingCV(pack1_top5, 'optimize', 1, 'top5') voting_stacking_stackingCV(pack10_top5, 'optimize', 10, 'top5') voting_stacking_stackingCV(pack100_top5, 'optimize', 100, 'top5')