def dump_clf(self): # x_scale_param = self.scaler.fit(self.fiter.x) # filter_scale_x = self.scaler.fit_transform(self.fiter.x, x_scale_param) dump_clf = {'top_loss_ss': self.top_loss_ss, 'top_win_ss': self.top_win_ss, 'fiter_df': self.fiter.df, 'fiter_x': self.fiter.x} ZCommonUtil.dump_pickle(dump_clf, self.dump_file_fn())
def dump_clf(self, llps): """ :param llps: cprs[(so.cprs['lps'] < 0) & (so.cprs['lms'] < -0.0)] 你所需要的符合筛选条件的cprs :return: """ dump_clf_with_ind = {} for rt_ind in llps.index: component, sub_ind = rt_ind.split('_') component = int(component) sub_ind = int(sub_ind) dump_clf_with_ind[rt_ind] = (self.rts[component][0], sub_ind) ZCommonUtil.dump_pickle(dump_clf_with_ind, self.dump_file_fn())
def dump_verify_result(cls, order_has_ret): regex = 'result|d_ret|v_ret|dm_ret|vm_ret|dp_ret|vp_ret' ttr = order_has_ret.filter(regex=regex) matrix = ttr.as_matrix() y = matrix[:, 0] x = matrix[:, 1:] feature_names = ttr.columns[1:] dec_tree = MlFiterCreater.MlFiterCreaterClass().decision_tree_classifier(max_depth=len(feature_names)) dec_tree.fit(x, y) MlFiterExcute.graphviz_tree(dec_tree, feature_names, x, y) ZCommonUtil.dump_pickle(dec_tree, K_GOLDEN_VERIFY_DEC_TREE)
def get_ump(self, ump): name = ump.dump_file_fn() if name not in self._cache: dump_clf_with_ind = ZCommonUtil.load_pickle(name) self._cache[name] = dump_clf_with_ind else: dump_clf_with_ind = self._cache[name] return dump_clf_with_ind
def __init__(self, train_path, test_path, n_classes, img_root_dir, one_hot_offset=0, channel_cnt=3, img_size=K_CNN_IMG_SIZE_M, batch_size=256, learning_rate=0.01, training_iters=100000, data_provide=None): self.channel_cnt = channel_cnt """ TensorBatchGener也同样使用K_CNN_IMG_SIZE如果要概需要同步 """ self.img_size = img_size self.dropout = 0.8 # self.n_classes = len(np.unique(y_train)) self.n_classes = n_classes self.checkpoint_path = ZEnv.g_project_root + '/data/tensor/tf_model.ckpt' ZCommonUtil.ensure_dir(self.checkpoint_path) self.n_input = self.img_size * self.img_size * self.channel_cnt self.learning_rate = learning_rate self.training_iters = training_iters self.batch_size = batch_size """ 整个训练过程diplay20次 """ self.display_step = int(training_iters / batch_size / 20) """ 整个训练过程save 3次 """ self.save_step = int(training_iters / batch_size / 3) self.x = tf.placeholder(tf.float32, [None, self.n_input]) self.y = tf.placeholder(tf.float32, [None, self.n_classes]) self.keep_prob = tf.placeholder(tf.float32) if data_provide is None: self.batch_gen = TensorBatchGen(train_path, test_path, n_classes=n_classes, img_root_dir=img_root_dir, resize=self.img_size, one_hot_offset=one_hot_offset) else: if not hasattr(data_provide, 'next_batch'): raise RuntimeError('data_provide need only one func next_batch!') self.batch_gen = data_provide
def down_load_img(self, url_dict, thread_lock): img_url = url_dict['url'] img_url_thumb = url_dict['url_thumb'] file_name = self.img_dir + Md5Helper.mkmd5frombinary(img_url) + '.jpg' if ZCommonUtil.file_exist(file_name): ZLog.debug('{} has already exist'.format(img_url)) """ 也还是应该算成功收集了 """ with thread_lock: self.collect_cnt += 1 return def begin_req(a_proxy): headers = {"User-Agent": K_UA} if self.proxy['type'] == 'HTTP': proxy_dict = dict(http='http://{}'.format(a_proxy['proxy']), https='http://{}'.format(a_proxy['proxy'])) else: proxy_dict = dict(http='socks5://{}'.format(a_proxy['proxy']), https='socks5://{}'.format(a_proxy['proxy'])) try: if g_enable_stream: return self._down_load_img_stream( img_url, file_name, headers, proxy_dict, thread_lock, img_url_thumb=img_url_thumb) else: return self._down_load_img_direct( img_url, file_name, headers, proxy_dict, thread_lock, img_url_thumb=img_url_thumb) except Exception: # ZLog.exception(e) return False bps = self.back_proxys[:] random.shuffle(bps) for proxy in bps: if begin_req(proxy): break
def load_estimator(self): fn = self._serialize_file_name() if not ZCommonUtil.file_exist(fn): return None fr = open(fn) ret = pickle.load(fr) fr.close() for key, value in ret.items(): """ setattr给self """ setattr(self, key, value) return ret
def save_csv(self): self.proxy_df = pd.DataFrame(self.proxy_list) fn = '../gen/proxy/proxy_df' ZCommonUtil.ensure_dir(fn) self.proxy_df.to_csv(fn, columns=self.proxy_df.columns, index=True)
def phantomjs_screen_html(self): ZCommonUtil.save_file(self.driver.page_source.encode('utf-8'), './hj.html')
def do_prisma_img(parm_product, mask_func_str, n1, n2, n3, dd_factor, std_factor, loop_factor, convd_median_factor, convd_big_factor, cb, all_mask, save_dir): org_file = parm_product[0] gd_file = parm_product[1] nbk = parm_product[2] enhance = parm_product[3] rb_rate = parm_product[4] if org_file is None: raise RuntimeError('do_prisma_img org_file is None!') gd_file_name = 'none_gd' if gd_file is None else os.path.basename( gd_file).split('.')[0] enhance_name = '' if enhance is None else enhance rb_rate_name = str(rb_rate) nbk_name = nbk.replace('/', '_') """ 外层/输入/引导/enhance_nbk_rb_rate """ # gd_file_name.split('_allmask')[0]mask的自己生成目录 fn = '{}/{}/{}/_{}_{}_{}_%s.jpg'.format( save_dir, os.path.basename(org_file).split('.')[0], gd_file_name.split('_allmask')[0], enhance_name, nbk_name, rb_rate_name) ZCommonUtil.ensure_dir(fn) if ZCommonUtil.file_exist(fn): return if g_skip_no_guide_rb and gd_file is None and rb_rate <> 1: """ 忽略无引导图的rb_rate转换iter_n """ return """ 初始化worker """ pw = PrismaWorkerClass() """ 使用partial统一mask函数接口形式 """ mask_stdmean_func = partial(pw.do_stdmean, std_factor=std_factor) mask_features_func = partial(pw.do_features, loop_factor=loop_factor) mask_otsu_func = partial(pw.do_otsu, dd=dd_factor) if gd_file is None: """ 如果不需要特征渲染,这里的rb_rate被用来使用切换iter_n数量 """ iter_n = int((1 / rb_rate) * 10) ret_img = pw.cp.fit_img(org_file, nbk=nbk, iter_n=iter_n, enhance=enhance, resize=True, save=False) PrismaHelper.save_array_img(ret_img, fn % 'mask') else: if mask_func_str is None: for mask_name, mask_func in zip( ['otsu_func', 'features_func', 'stdmean_func'], [mask_otsu_func, mask_features_func, mask_stdmean_func]): ret_img = pw.mix_mask_with_convd( mask_func, org_file, gd_file, nbk, enhance=enhance, rb_rate=rb_rate, cb=cb, n1=n1, n2=n2, n3=n3, convd_median_factor=convd_median_factor, convd_big_factor=convd_big_factor, all_mask=all_mask, save=False, show=False) PrismaHelper.save_array_img(ret_img, fn % mask_name) else: if mask_func_str == 'otsu_func': mask_func = mask_otsu_func elif mask_func_str == 'features_func': mask_func = mask_features_func elif mask_func_str == 'stdmean_func': mask_func = mask_stdmean_func elif mask_func_str == 'otsu_func + stdmean_func': mask_func = partial( pw.together_mask_func, func_list=[mask_otsu_func, mask_stdmean_func]) elif mask_func_str == 'otsu_func + features_func': mask_func = partial( pw.together_mask_func, func_list=[mask_otsu_func, mask_features_func]) elif mask_func_str == 'stdmean_func + features_func': mask_func = partial( pw.together_mask_func, func_list=[mask_stdmean_func, mask_features_func]) elif mask_func_str == 'otsu_func + stdmean_func + features_func': mask_func = partial(pw.together_mask_func, func_list=[ mask_otsu_func, mask_stdmean_func, mask_features_func ]) else: raise ValueError('do_prisma_img mask_func_str MATCH ERROR!!') if not os.path.basename(gd_file).split('.')[0].endswith( '_allmask') or nbk <> 'conv2/3x3_reduce': """ 只有设置了all_mask,并且可以使用allmask的引导特征图才使用(文件结尾以_allmake) and nbk == 'conv2/3x3_reduce 由于下面自行循环nbk所以只来一个 """ all_mask = False if all_mask is False: ret_img = pw.mix_mask_with_convd( mask_func, org_file, gd_file, nbk, enhance=enhance, rb_rate=rb_rate, cb=cb, n1=n1, n2=n2, n3=n3, convd_median_factor=convd_median_factor, convd_big_factor=convd_big_factor, all_mask=all_mask, save=False, show=False) PrismaHelper.save_array_img(ret_img, fn % mask_func_str) else: tmp_mask_rb = PrismaWorker.g_all_mask_rb_rate for prb in [PrismaWorker.g_all_mask_rb_rate, 1.0]: PrismaWorker.g_all_mask_rb_rate = prb for mask_nbk in [ 'conv2/3x3_reduce', 'conv2/3x3', 'conv2/norm2', 'pool2/3x3_s2', 'inception_3a/1x1', 'inception_3a/5x5_reduce', 'inception_3a/5x5', 'inception_3b/5x5_reduce', 'inception_3b/5x5' ]: mask_nbk_name = mask_nbk.replace('/', '_') mask_fn = '{}/{}/{}/_{}_{}_{}_%s.jpg'.format( save_dir, os.path.basename(org_file).split('.')[0], gd_file_name, enhance_name, mask_nbk_name, prb) ZCommonUtil.ensure_dir(mask_fn) ret_img = pw.mix_mask_with_convd( mask_func, org_file, gd_file, mask_nbk, enhance=enhance, rb_rate=rb_rate, cb=cb, n1=n1, n2=n2, n3=n3, convd_median_factor=convd_median_factor, convd_big_factor=convd_big_factor, all_mask=all_mask, save=False, show=False) PrismaHelper.save_array_img(ret_img, mask_fn % 'allmask_func') PrismaWorker.g_all_mask_rb_rate = tmp_mask_rb """ 如果有all_mask,下面才开始正常工作 """ ret_img = pw.mix_mask_with_convd( mask_func, org_file, gd_file, nbk, enhance=enhance, rb_rate=rb_rate, cb=cb, n1=n1, n2=n2, n3=n3, convd_median_factor=convd_median_factor, convd_big_factor=convd_big_factor, all_mask=False, save=False, show=False) PrismaHelper.save_array_img(ret_img, fn % mask_func_str) if g_show_msg_tip: ShowMsg.show_msg('pid %s' % os.getpid(), os.path.basename(fn))