示例#1
0
    def save_params(self, path=None):
        if path is None:
            path = os.path.join(self.params_save_path, time_stamp())

        params = self.export_params()

        pickle_path = path + '.pkl'
        dump_pickle(params, pickle_path)

        self.log.info('save params at {}'.format([pickle_path]))

        return pickle_path
示例#2
0
    def train_models(self, cache=True, path='./models.pkl'):
        p_types = self.p_types

        if os.path.exists(path) and cache:
            print('models cache found, use cache')
            clfs = load_pickle(path)
            return clfs

        print('train_model')

        full_df = self.full_set.to_DataFrame()
        print(full_df.info())
        print('load data')

        p_types_str = [str(val) for val in p_types]
        pprint(p_types)

        clf_dict = {}
        for p_type, p_type_str in list(zip(p_types, p_types_str)):
            print(f'train type : {p_type_str }')

            x_cols = list(self.x_cols)

            for y_col in p_type:
                x_cols.remove(y_col)

            clfs = {}
            for y_col in p_type:
                print(f'train label : {y_col}')
                print(x_cols, y_col)

                x_df = full_df[x_cols]
                y_df = full_df[[y_col]]
                dataset = BaseDataset(x=x_df, y=y_df)
                dataset.shuffle()
                train_set, test_set = dataset.split()
                train_xs, train_ys = train_set.full_batch(out_type='df')
                test_xs, test_ys = test_set.full_batch(out_type='df')
                # print(train_xs.info())

                class_pack_names = [
                    'skMLPClf',
                    # 'skRandomForestClf',
                    # 'skExtraTreesClf',
                    # 'skAdaBoostClf',
                    # 'skGradientBoostingClf',
                    # 'skLinear_SVCClf',
                    # 'skBaggingClf',
                    #
                    'XGBoostClf',
                    # 'LightGBMClf',
                    # 'skRidgeCVClf',
                ]
                clf_name = 'XGBoostClf'
                class_pack_names = [clf_name]
                clf = ClassifierPack(class_pack_names)

                # opt = ParamOpt(cv=3, n_jobs=6, n_iter=10)
                # clf.pack[clf_name] = opt.fit(clf[clf_name], train_xs, train_ys)
                clf.fit(train_xs, train_ys)

                train_score = clf.score(train_xs, train_ys)
                test_score = clf.score(test_xs, test_ys)
                if len(train_score) == 0:
                    raise ValueError(f'{y_col} in {p_type} fail')
                pprint(train_score)
                pprint(test_score)
                score_pack = clf.score_pack(test_xs, test_ys)
                pprint(score_pack)
                print(clf.feature_importance)
                pprint(f'score train = {train_score},\n test = {test_score}')

                predict = clf.predict(train_xs[:1])[clf_name]
                print(f'predict = {predict}, test_ys= {test_ys[:1]}')

                clfs[y_col] = clf

            clf_dict[p_type_str] = clfs
            # exit()

        dump_pickle(clf_dict, path)

        return clf_dict
示例#3
0
 def _save_params(self, path):
     dump_pickle(self.params, path)
示例#4
0
    def save_fail_list(self, path=None):
        if path is None:
            path = os.path.join('.', 'fail_list', time_stamp())

        dump_pickle(self.fail_list, path + ".pkl")
        dump_json(list(map(str, self.fail_list)), path + ".json")
示例#5
0
def make_data_pkl():
    print(f'collect train images')
    train_images, train_image_names, train_ids = collect_images(TRAIN_IMAGE_PATH)

    print(f'collect train mask images')
    train_mask, train_mask_names, train_mask_ids = collect_images(TRAIN_MASK_PATH)

    print(f'collect test images')
    test_images, test_image_names, test_ids = collect_images(TEST_IMAGE_PATH)

    print(f'collect csv files')
    df_depths = pd.read_csv(DEPTHS_CSV_PATH)
    df_train = pd.read_csv(TRAIN_CSV_PATH)
    df_train.fillna('none', inplace=True)

    df_merge = pd.merge(left=df_depths, right=df_train, how='outer', left_on='id', right_on='id')
    df_merge.to_csv(MERGE_CSV_PATH, index=False)

    print(f'collect train depth')
    train_depths = df_merge[df_merge['rle_mask'].notna()]
    train_depths = pd.DataFrame(train_depths).sort_values('id')
    train_depths = train_depths.reset_index(drop=True)
    train_depths = train_depths['z']

    print(f'collect test depth')
    test_depths = df_merge[df_merge['rle_mask'].isna()]
    test_depths = pd.DataFrame(test_depths).sort_values('id')
    test_depths = test_depths.reset_index(drop=True)
    test_depths = test_depths['z']

    print(f'collect train mask rate')
    train_mask_rate = get_feature_mask_rate(train_mask)

    print(f'collect train empty mask')
    train_empty_mask = get_feature_empty_mask(train_mask)

    print(f'collect train weired mask')
    train_weired_mask = get_feature_weired_mask(train_mask)

    print(f'collect train depth_image')
    train_depths_image = depth_to_image(train_depths)

    print(f'collect test depth_image')
    test_depths_image = depth_to_image(test_depths)

    print('dump train pickle')
    train_pkl = {
        'image': train_images,
        'mask': train_mask,
        'id': train_ids,
        'depths': train_depths,
        'mask_rate': train_mask_rate,
        'empty_mask': train_empty_mask,
        'is_weired_mask': train_weired_mask,
        'depth_image': train_depths_image,
    }
    dump_pickle(train_pkl, TRAIN_PKL_PATH)

    print('dump test pickle')
    test_pkl = {
        'image': test_images,
        'id': test_ids,
        'depths': test_depths,
        'depth_image': test_depths_image
    }
    dump_pickle(test_pkl, TEST_PKL_PATH)
示例#6
0
文件: MixIn.py 项目: demetoir/MLtools
 def to_pickle(obj, path):
     dump_pickle(obj, path)
示例#7
0
文件: MixIn.py 项目: demetoir/MLtools
 def to_pickle(self, path, **kwargs):
     dump_pickle(self, path)
示例#8
0
文件: MixIn.py 项目: demetoir/MLtools
 def dump(self, path):
     dump_pickle(self, path)