def save_params(self, path=None): if path is None: path = os.path.join(self.params_save_path, time_stamp()) params = self.export_params() pickle_path = path + '.pkl' dump_pickle(params, pickle_path) self.log.info('save params at {}'.format([pickle_path])) return pickle_path
def train_models(self, cache=True, path='./models.pkl'): p_types = self.p_types if os.path.exists(path) and cache: print('models cache found, use cache') clfs = load_pickle(path) return clfs print('train_model') full_df = self.full_set.to_DataFrame() print(full_df.info()) print('load data') p_types_str = [str(val) for val in p_types] pprint(p_types) clf_dict = {} for p_type, p_type_str in list(zip(p_types, p_types_str)): print(f'train type : {p_type_str }') x_cols = list(self.x_cols) for y_col in p_type: x_cols.remove(y_col) clfs = {} for y_col in p_type: print(f'train label : {y_col}') print(x_cols, y_col) x_df = full_df[x_cols] y_df = full_df[[y_col]] dataset = BaseDataset(x=x_df, y=y_df) dataset.shuffle() train_set, test_set = dataset.split() train_xs, train_ys = train_set.full_batch(out_type='df') test_xs, test_ys = test_set.full_batch(out_type='df') # print(train_xs.info()) class_pack_names = [ 'skMLPClf', # 'skRandomForestClf', # 'skExtraTreesClf', # 'skAdaBoostClf', # 'skGradientBoostingClf', # 'skLinear_SVCClf', # 'skBaggingClf', # 'XGBoostClf', # 'LightGBMClf', # 'skRidgeCVClf', ] clf_name = 'XGBoostClf' class_pack_names = [clf_name] clf = ClassifierPack(class_pack_names) # opt = ParamOpt(cv=3, n_jobs=6, n_iter=10) # clf.pack[clf_name] = opt.fit(clf[clf_name], train_xs, train_ys) clf.fit(train_xs, train_ys) train_score = clf.score(train_xs, train_ys) test_score = clf.score(test_xs, test_ys) if len(train_score) == 0: raise ValueError(f'{y_col} in {p_type} fail') pprint(train_score) pprint(test_score) score_pack = clf.score_pack(test_xs, test_ys) pprint(score_pack) print(clf.feature_importance) pprint(f'score train = {train_score},\n test = {test_score}') predict = clf.predict(train_xs[:1])[clf_name] print(f'predict = {predict}, test_ys= {test_ys[:1]}') clfs[y_col] = clf clf_dict[p_type_str] = clfs # exit() dump_pickle(clf_dict, path) return clf_dict
def _save_params(self, path): dump_pickle(self.params, path)
def save_fail_list(self, path=None): if path is None: path = os.path.join('.', 'fail_list', time_stamp()) dump_pickle(self.fail_list, path + ".pkl") dump_json(list(map(str, self.fail_list)), path + ".json")
def make_data_pkl(): print(f'collect train images') train_images, train_image_names, train_ids = collect_images(TRAIN_IMAGE_PATH) print(f'collect train mask images') train_mask, train_mask_names, train_mask_ids = collect_images(TRAIN_MASK_PATH) print(f'collect test images') test_images, test_image_names, test_ids = collect_images(TEST_IMAGE_PATH) print(f'collect csv files') df_depths = pd.read_csv(DEPTHS_CSV_PATH) df_train = pd.read_csv(TRAIN_CSV_PATH) df_train.fillna('none', inplace=True) df_merge = pd.merge(left=df_depths, right=df_train, how='outer', left_on='id', right_on='id') df_merge.to_csv(MERGE_CSV_PATH, index=False) print(f'collect train depth') train_depths = df_merge[df_merge['rle_mask'].notna()] train_depths = pd.DataFrame(train_depths).sort_values('id') train_depths = train_depths.reset_index(drop=True) train_depths = train_depths['z'] print(f'collect test depth') test_depths = df_merge[df_merge['rle_mask'].isna()] test_depths = pd.DataFrame(test_depths).sort_values('id') test_depths = test_depths.reset_index(drop=True) test_depths = test_depths['z'] print(f'collect train mask rate') train_mask_rate = get_feature_mask_rate(train_mask) print(f'collect train empty mask') train_empty_mask = get_feature_empty_mask(train_mask) print(f'collect train weired mask') train_weired_mask = get_feature_weired_mask(train_mask) print(f'collect train depth_image') train_depths_image = depth_to_image(train_depths) print(f'collect test depth_image') test_depths_image = depth_to_image(test_depths) print('dump train pickle') train_pkl = { 'image': train_images, 'mask': train_mask, 'id': train_ids, 'depths': train_depths, 'mask_rate': train_mask_rate, 'empty_mask': train_empty_mask, 'is_weired_mask': train_weired_mask, 'depth_image': train_depths_image, } dump_pickle(train_pkl, TRAIN_PKL_PATH) print('dump test pickle') test_pkl = { 'image': test_images, 'id': test_ids, 'depths': test_depths, 'depth_image': test_depths_image } dump_pickle(test_pkl, TEST_PKL_PATH)
def to_pickle(obj, path): dump_pickle(obj, path)
def to_pickle(self, path, **kwargs): dump_pickle(self, path)
def dump(self, path): dump_pickle(self, path)