def set_conf(path): global cf with open(path, encoding='utf-8') as f: content = f.read() cf = load(content) from pyduyp.logger.log import log log.debug("init customize config {} load ok!".format(path))
def fc_variable_scope_v5(): with tf.variable_scope("foo"): v = tf.get_variable("v", [1]) w = tf.get_variable("w", [1]) log.debug("{}, {}".format(v, w)) with tf.variable_scope("foo", reuse=False): v1 = tf.get_variable("u", [1])
def read_hf2arr(inputs_list): out = [] for image in inputs_list: image = cv2.imread(image) image = crop(image, 128, 128) out.append(image) out2arr = np.array(out) log.debug("{}".format(out2arr.shape)) return out2arr
def create_index(index, settings, mapping, doc_type='doc'): settings = {"settings": settings, "mapping": mapping} url = "{}://{}:{}/{}".format(es_args.get('schema'), es_args.get('host'), es_args.get('port'), index) ret = requests.put(url) log.debug("create index request sql url: {} result: {}".format(url, str(ret))) mapstr = json.dumps(mapping) url += '/{}/_mapping'.format(doc_type) log.debug("put {} body: {}".format(url, mapstr)) ret = requests.put(url, data=mapstr) # ret = es().indices.create(index=index, ignore=400, body=settings) return ret
def fc_variable_scope_v6(): with tf.variable_scope("foo"): v1 = tf.Variable(tf.random_normal(shape=[2, 3], mean=0., stddev=1.), dtype=tf.float32, name='v1') v2 = tf.get_variable("v2", [1]) log.debug("{}, {}".format(v1, v2)) with tf.variable_scope("foo", reuse=True): v3 = tf.get_variable('v2') v4 = tf.get_variable('v1') log.debug("{}, {}".format(v3, v4))
def save_question_45_to_es(index_name="question_cd_update"): # 批量插入 try: es.indices.delete(index_name) log.info("{} have delete ".format(index_name)) setting = {"number_of_shards": 6, "number_of_replicas": 0} mapping = { "timestamp": { "enabled": "true" }, "properties": { "logdate": { "type": "date", "format": "dd/MM/yyy HH:mm:ss" } } } settings = {"settings": setting, "mapping": mapping} es.indices.create(index=index_name, ignore=400, body=settings) except: pass file_dir = "antbot/datasets/city_questions_740432.csv" if not os.path.isfile(file_dir): raise FileNotFoundError("没有数据文件") data = pd.read_csv(file_dir).values.tolist() line_number = 0 all_data = [] source = '' for m in tqdm(data): body = { '_index': '{}'.format(index_name), '_type': 'post', '_id': id, '_source': source } all_data.append(body) line_number += 1 if line_number % 10000 == 0: try: success, _ = bulk(es, all_data, index=index_name, raise_on_error=True) all_data = [] log.info( "==================== success :{}/{} ====================". format(line_number, len(data))) except Exception as e: log.debug("\n 存储失败! ")
def create_entity_tmp(): setting = {"number_of_shards": 6, "number_of_replicas": 0} mapping = { "properties": { "q": { "type": "text" }, "a": { "type": "text" }, "roomId": { "type": "text" }, "tenantId": { "type": "text", "analyzer": "ik_smart", "search_analyzer": "ik_smart" }, "lanlordId": { "type": "text", "analyzer": "ik_smart", "search_analyzer": "ik_smart" }, "id": { "type": "text" } } } index_name = 'bot_entity_tmp_new' try: es.indices.delete(index_name) except: pass ret_entity = create_index(index_name, setting, mapping, doc_type='fulltext') log.debug(ret_entity) data = pd.read_csv("antbot/datasets/question_45/import_new.csv") for message in tqdm(data.values): if isinstance(message[0], str) and isinstance(message[1], str): body = { 'q': message[0], 'a': message[1], 'roomId': message[2], 'tenantId': message[3], 'lanlordId': message[4], 'id': str(curlmd5(message[0])) } es.index(index_name, doc_type="fulltext", id=body['id'], body=body)
def inference_losses(x, imitation, true_output, fake_output): log.debug("inference_losses inputs x: {}, imitation:{}".format( x, imitation)) content_loss = inference_content_loss(x, imitation) log.debug("content_loss:{}".format(content_loss)) generator_loss, discriminator_loss = (inference_adversarial_loss( true_output, fake_output, true_output)) log.debug("{}, {}".format(generator_loss, discriminator_loss)) # generator_loss, discriminator_loss = (inference_adversarial_loss_with_sigmoid(true_output, fake_output)) g_loss = content_loss + generator_loss log.debug("g_loss: {}".format(g_loss)) d_loss = discriminator_loss log.debug("d_loss: {}".format(d_loss)) return g_loss, d_loss
def preprocess(path, scale=4): """ Preprocess single image file (1) Read original image as YCbCr format (and grayscale as default) (2) Normalize (3) Apply image file with bicubic interpolation Args: path: file path of desired file input_: image applied bicubic interpolation (low-resolution) label_: image with original resolution (high-resolution) """ image = imread(path, is_grayscale=False) log.debug("image shape:{}".format(image.shape)) label_ = modcrop(image, scale) log.debug("label_ shape: {}".format(label_.shape)) # Must be normalized image = image / 255. label_ = label_ / 255. input_ = scipy.ndimage.interpolation.zoom(label_, (1. / scale), prefilter=False) log.debug("inputs shape: {}".format(input_.shape)) input_ = scipy.ndimage.interpolation.zoom(input_, (scale / 1.), prefilter=False) log.debug("inputs shape: {}".format(input_.shape)) return input_, label_
def getespages(total, pagesize): pagecount = math.ceil(total / pagesize) log.debug("total {} pagesie: {} count: {}".format(total, pagesize, pagecount)) ret = [] if pagecount == 0: ret.append("0, {}".format(pagesize)) for i in range(0, pagecount): if i * pagesize + pagesize >= total: last = total else: last = i * pagesize + pagesize ret.append("{}, {}".format(i * pagesize, pagesize)) log.debug(ret) return ret
def makedict(): jiebadict = [] path = 'word2vector_code/datasets/datesdict.csv' with open(path, 'r') as fr: lines = fr.readlines() for line in lines: word = line.split(":")[0] if len(word) == 1: continue if word.isnumeric(): continue jiebadict.append(word) df = pd.Series(jiebadict) log.debug(len(df)) df.to_csv("word2vector_code/datasets/jiebadict.csv", index=None)
def sqlresponse(sql): sql = quote(sql) log.debug("get encode sql {}".format(sql)) url = "{}://{}:{}/_sql?sql={}".format(es_args.get('schema'), es_args.get('host'), es_args.get('port'), sql) log.debug("request sql url: {}".format(url)) res = [] try: response = requests.get(url) content = response.content.decode('utf8') except: log.error("request get {} error!".format(url)) pass if len(content) > 2: res = json.loads(content)['hits'] return res
def sqldatacount(sql): sql = quote(sql) log.debug("get encode sql {}".format(sql)) url = "{}://{}:{}/_sql?sql={}".format(es_args.get('schema'), es_args.get('host'), es_args.get('port'), sql) log.debug("request sql url: {}".format(url)) response = requests.get(url) content = response.content.decode('utf8') res = [] count = 0 if len(content) > 2: ret = json.loads(content)['hits'] count = ret['count'] if ret and len(ret) > 0 and ret['hits']: for hit in ret['hits']: res.append(hit['_source']) return res, count
def discriminator(x, is_training, reuse): log.debug("discriminator inputs x: {}".format(x)) with tf.variable_scope('discriminator', reuse=reuse): with tf.variable_scope('conv1'): x = conv_layer(x, [3, 3, 3, 64], 1) x = lrelu(x) with tf.variable_scope('conv2'): x = conv_layer(x, [3, 3, 64, 64], 2) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv3'): x = conv_layer(x, [3, 3, 64, 128], 1) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv4'): x = conv_layer(x, [3, 3, 128, 128], 2) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv5'): x = conv_layer(x, [3, 3, 128, 256], 1) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv6'): x = conv_layer(x, [3, 3, 256, 256], 2) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv7'): x = conv_layer(x, [3, 3, 256, 512], 1) x = lrelu(x) x = batch_normalize(x, is_training) with tf.variable_scope('conv8'): x = conv_layer(x, [3, 3, 512, 512], 2) x = lrelu(x) x = batch_normalize(x, is_training) x = flatten_layer(x) with tf.variable_scope('fc'): x = full_connection_layer(x, 1024) x = lrelu(x) with tf.variable_scope('softmax'): x = full_connection_layer(x, 1) d_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') log.debug("discriminator outputs x: {}, vairables: {}".format( x, d_variables)) return x, d_variables
def prepare_data(sess, dataset): """ Args: dataset: choose train dataset or test dataset For train dataset, output data would be ['.../t1.bmp', '.../t2.bmp', ..., '.../t99.bmp'] """ if FLAGS.is_train: filenames = os.listdir(dataset) data_dir = os.path.join(os.getcwd(), dataset) data = glob.glob(os.path.join(data_dir, "*.png")) else: data_dir = os.path.join(os.sep, (os.path.join(os.getcwd(), dataset)), "Set5") log.debug("65: data dir:{}".format(data_dir)) data = glob.glob(os.path.join(data_dir, "*.png")) return data
def is_shengpizi(inputstring): try: p1 = os.path.join('pyduyp', "dictionary", "shengpizi.csv") if isinstance(inputstring, str): if len(inputstring) > 5: data = pd.read_csv(p1)['name'].tolist() string2list = list(inputstring) for w in string2list: if w in data: log.debug("生癖字:{}".format(w)) return inputstring.replace(w, "") else: return inputstring else: return inputstring else: return inputstring except Exception as e: log.debug("{} 当前执行路径不在pyduyp下".format(e))
def generator(x, is_training, reuse): log.debug("generator inputs x :{}".format(x)) with tf.variable_scope('generator', reuse=reuse): with tf.variable_scope('deconv1'): x = deconv_layer(x, [3, 3, 64, 3], [batch_size, 24, 24, 64], 1) x = tf.nn.relu(x) shortcut = x for i in range(5): mid = x with tf.variable_scope('block{}a'.format(i + 1)): x = deconv_layer(x, [3, 3, 64, 64], [batch_size, 24, 24, 64], 1) x = batch_normalize(x, is_training) x = tf.nn.relu(x) with tf.variable_scope('block{}b'.format(i + 1)): x = deconv_layer(x, [3, 3, 64, 64], [batch_size, 24, 24, 64], 1) x = batch_normalize(x, is_training) x = tf.add(x, mid) with tf.variable_scope('deconv2'): x = deconv_layer(x, [3, 3, 64, 64], [batch_size, 24, 24, 64], 1) x = batch_normalize(x, is_training) x = tf.add(x, shortcut) with tf.variable_scope('deconv3'): x = deconv_layer(x, [3, 3, 256, 64], [batch_size, 24, 24, 256], 1) x = pixel_shuffle_layer(x, 2, 64) # n_split = 256 / 2 ** 2 x = tf.nn.relu(x) with tf.variable_scope('deconv4'): x = deconv_layer(x, [3, 3, 64, 64], [batch_size, 48, 48, 64], 1) x = pixel_shuffle_layer(x, 2, 16) x = tf.nn.relu(x) with tf.variable_scope('deconv5'): x = deconv_layer(x, [3, 3, 3, 16], [batch_size, image_size, image_size, 3], 1) g_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') log.debug("generator ouputs: {}, variables: {}".format(x, g_variables)) return x, g_variables
def get_data(data_path, data_name, f_start, f_end): root = os.path.dirname(os.path.realpath(__file__)) log.debug("{}".format(root)) data_name = os.path.join(root, data_path, data_name) data = pd.read_csv(data_name) train_data_x = data.loc[:, f_start:f_end] train_data_x_columns_name = train_data_x.columns save_path = os.path.join(root, 'data_pre/columns_name_for_test.csv') df = pd.Series(train_data_x_columns_name) df.to_csv(save_path, index=None) train_data_x = train_data_x.loc[:len(train_data_x) - 2, :] shape = train_data_x.shape fenge = int(shape[0] * 0.9) train_x = train_data_x.loc[:fenge, :] train_y = data.loc[:fenge, "class_Normal":"class_Spam"] test_x = train_data_x.loc[fenge:, :] test_y = data.loc[fenge:len(data) - 2, "class_Normal":"class_Spam"] # assert len(test_x) == len(test_y) return train_x, train_y, test_x, test_y
def compute_date_interval_for_timelist(time_list, top_k=5): if len(time_list) < top_k: return -1 else: results = [] test_t1, test_t2 = time_list[1], time_list[2] time1 = datetime.strptime(test_t1, "%Y-%m-%d %H:%M:%S") time2 = datetime.strptime(test_t2, "%Y-%m-%d %H:%M:%S") if time2 > time1: # 最后发送消息的时间放在前面 if isinstance(time_list, list): new_time_list = time_list[::-1] length = len(new_time_list) for i in range(length): if i + 1 < length: log.debug("{} {}".format(new_time_list[i], new_time_list[i + 1])) time_interval = compute_date_interval( new_time_list[i], new_time_list[i + 1]) results.append(time_interval) if i >= top_k: break mean_time = sum(results) / top_k log.debug("平均时间: {}".format(mean_time)) return mean_time else: log.debug("time list type :{}, {}".format( type(time_list), time_list)) else: new_time_list = time_list length = len(new_time_list) for i in range(length): if i + 1 < length: time_interval = compute_date_interval( new_time_list[i], new_time_list[i + 1]) results.append(time_interval) if i >= top_k: break mean_time = sum(results) / top_k return mean_time
def input_setup(sess, config): if config.is_train: data = prepare_data(sess, dataset="Train\\{}".format(config.train_data)) else: data = prepare_data(sess, dataset="Test\\{}".format(config.test_data)) sub_input_sequence = [] sub_label_sequence = [] padding = abs(config.image_size - config.label_size) / 2 # 6 if config.is_train: for i in range(len(data)): input_, label_ = preprocess(data[i], config.scale) if len(input_.shape) == 3: h, w, _ = input_.shape else: h, w = input_.shape log.debug("{}, {}".format(h, w)) for x in range(0, h - config.image_size + 1, config.stride): for y in range(0, w - config.image_size + 1, config.stride): sub_input = input_[x:x + config.image_size, y:y + config.image_size] sub_label = label_[x + int(padding):x + int(padding) + config.label_size, y + int(padding):y + int(padding) + config.label_size] # Make channel value sub_input = sub_input.reshape( [config.image_size, config.image_size, 1]) sub_label = sub_label.reshape( [config.label_size, config.label_size, 1]) sub_input_sequence.append(sub_input) sub_label_sequence.append(sub_label) else: input_, label_ = preprocess(data[2], config.scale) if len(input_.shape) == 3: h, w, _ = input_.shape else: h, w = input_.shape nx = ny = 0 for x in range(0, h - config.image_size + 1, config.stride): nx += 1 ny = 0 for y in range(0, w - config.image_size + 1, config.stride): ny += 1 sub_input = input_[x:x + config.image_size, y:y + config.image_size] # [33 x 33] sub_label = label_[x + int(padding):x + int(padding) + config.label_size, y + int(padding):y + int(padding) + config.label_size] # [21 x 21] sub_input = sub_input.reshape( [config.image_size, config.image_size, 1]) sub_label = sub_label.reshape( [config.label_size, config.label_size, 1]) sub_input_sequence.append(sub_input) sub_label_sequence.append(sub_label) arrdata = np.asarray(sub_input_sequence) arrlabel = np.asarray(sub_label_sequence) make_data(arrdata, arrlabel) if not config.is_train: return nx, ny
null_word=0, trim_rule=None, sorted_vocab=1, batch_words=max_words, compute_loss=False) logging.info(model) save_dir = 'word2vector_code/datasets/results/vector' if not os.path.exists(save_dir): os.makedirs(save_dir) total_path = os.path.join(root_path, "datasets/results/vector/vectors_train.csv") fvocab_path = os.path.join(root_path, "datasets/results/vector/fvocab_train.csv") log.debug("save path : {}".format(total_path)) model.wv.save_word2vec_format(total_path, fvocab=fvocab_path, binary=False) model.save(root_path + "/datasets/train.model") log.info(" ! Build Success ! ") if method == 'test': # distance = model.wmdistance(sentence_obama, sentence_president) total_path = os.path.join( root_path, "datasets/results/vector/vectors_traintest.csv") word_vectors = KeyedVectors.load_word2vec_format(total_path, binary=False) with open( "word2vector_code/datasets/results/vector/fvocab_traintest.csv", 'r') as fr:
ssim = measure.compare_nrmse(img1, img2, norm_type='Euclidean') return ssim if __name__ == "__main__": method = 'test' if method == 'train': train() if method == 'test': modeltest() if method == 'stat': origin = './results/yaogan100_1' results = './results/yaogan100' length = len(os.listdir(origin)) nrmseall = 0 for i in range(length): log.debug("{}, {}".format( os.path.join(origin, os.listdir(origin)[i]), os.path.join(results, os.listdir(results)[i]))) originimg = cv2.imread(os.path.join(origin, os.listdir(origin)[i])) resultsimg = cv2.imread( os.path.join(results, os.listdir(results)[i])) res = compare_ssim(originimg, resultsimg) nrmseall += res print(nrmseall / length)
'卡萨帝还邀请不同年龄的两位参展者亲身体验天玺空调的“定制送风”,年长的参展者表示感觉风力柔和温度舒适,年轻的参展者表示感觉风力舒适且出风凉爽;据了解,此智慧识别效果源自于卡萨帝智慧人体温冷感知系统,' \ '其能智能检测人体、环境温度等参数,形成红外热图像,并通过大数据计算出人体主观舒适度从而获得室内人员的冷热感受,进而通过独立的双循环送风系统,根据不同人的体感需求实现“定制送风”。 ' \ '近两年来全球正在悄悄燃起一场智能化革命,从此次CES展会上就可以看出,智能化已经深入到了人类社会的各个领域。而真正的智能应该是饱含人类智慧的人工替代者,具体落地在家电即是让用户随时体验所想,目前在空调领域,' \ '卡萨帝坚守“人单合一”模式,让员工与用户融为一体,员工深度了解用户诉求,让用户直接产于到产品的研发,打造天玺空调以“定制风”颠覆传统空调对人体舒适度的调节标准。可以说,' \ '天玺空调是卡萨帝“人单合一”模式中迭代出的代表性产品,为全人类智慧家庭提供更高端选择的同时,更为空调行业未来在智能领域的发展划定了方向。 ' vecots = '0.0016817 -0.0023771 0.0014308 0.0017345 0.00036935 -0.002784 -0.0011625 0.0013753 0.00041258 -0.0002469 -0.0006787 -0.00034368 0.0014742 0.0012733 -0.0020595 -0.00027367 -0.002166 0.0044171 0.0041103 0.0025185 6.2085e-06 -0.0013929 0.0028849 0.0045143 0.0018157 0.0034017 0.0010038 -0.0016331 -0.0020036 0.00040291 -0.0028853 0.00028505 -0.0035071 0.000594 -0.0075253 -0.0022002 0.00015122 -0.00096553 -0.006448 -0.0020763 -0.0011463 -0.0020786 -0.00045267 -0.00020631 0.0038685 0.002328 0.0029051 -0.0045597 0.0011096 -0.0031578 -0.00055255 7.3881e-05 -0.0010105 0.0041443 -0.0014362 -0.00074608 -0.0022296 0.0071468 0.00026954 0.0048568 0.0046115 0.0070269 0.0014227 0.0013971 0.00089506 0.00089894 -0.00015574 0.0033562 -0.0024287 0.0010246 -0.0033144 -0.0031511 -0.0033345 0.004288 -0.00052171 -0.00089313 0.0047684 -0.0010132 -0.0024283 -0.005237 -0.0036746 0.001436 0.0061823 -0.0055095 0.005296 -0.0035991 -0.00093066 0.0038196 0.00032246 -0.0039256 0.0072522 -0.0022805 0.0055718 -0.0035988 -0.0032735 -0.0014788 0.0024037 0.0026939 6.2561e-06 -0.0011473 0.0017805 -0.0029199 -2.6602e-05 -0.0051363 0.00049094 1.6564e-05 -0.0048527 0.0039031 0.0012706 -0.0042202 0.0026411 -0.0013751 -0.0040701 -0.0011444 -0.0026219 0.00075352 0.0012023 0.00225 -0.0028171 -0.00066409 -0.0005664 0.0040051 0.00075355 0.0010557 -0.0011771 -0.00097568 -0.00024623 0.0053235 0.0030382 -0.0017315 0.0024207 -0.0025765 0.0023139 -0.0032092 -0.003548 0.0022881 -0.0061837 0.00029765 0.00062331 0.00068086 -0.0027166 0.0031863 -0.0065737 0.0034293 0.0060602 0.004504 0.0053295 0.0024079 0.00070902 0.0028966 -0.0045681 0.0015634 -0.0014613 0.0069561 -0.0070769 0.0023017 -0.0011064 -0.00053212 0.0032405 -0.0035337 0.0026694 0.0035651 0.0047409 -0.0012295 -0.00032469 0.0025948 -0.0032963 0.00096035 -0.0043975 -0.0009855 -0.0035192 -0.00027234 -0.0030548 -0.0012547 -0.001251 -0.00011277 -0.0015221 -0.00045698 -0.00010189 -0.0018511 -0.0018755 -0.00048976 -0.0012928 0.001183 0.00031565 -0.00019164 -0.00089451 -0.00032064 -0.00039411 -0.0010326 -0.0011004 -0.00057266 -0.00022796 -0.0045244 0.0018536 3.0427e-05 0.001012 0.00053805 -0.0029383 0.0014684 -0.00035749 0.0012045 -0.0016012 -4.948e-05 -0.001409 -0.00033843 -0.0012289 -0.0013084 0.0019772 0.0028715 -0.0031593 -0.00010765 -0.0026751 -0.0010606 -0.0035321 0.0012284 0.0014916 -4.2898e-05 -0.00025318 0.00037125 -0.00074562 -0.00032884 0.00075165 0.0013371 0.00042652 -0.00058873 0.00061513 -0.00090333 -0.0022328 0.0010914 0.0002528 0.002822 -0.0016027 -1.5497e-05 -0.0014826 0.00015337 0.00052575 0.0024139 -0.0025401 0.0021926 -0.0013335 0.0019168 0.0030731 0.0024576 0.0012196 0.00036149 -0.00027879 0.0049065 0.00053997 0.0039811 -0.0024202 0.00069778 -0.0026459 -0.0019546 -0.0068449 0.0020488 0.00086165 -0.0018356 -0.0021088 0.0020403 0.0036333 -0.00074625 0.001589 0.0024911 -0.00043104 0.0016868 -0.0001082 0.00086151 -0.00018678 -0.0032375 0.0018129 -0.00011978 0.0058609 -0.00019446 -0.0026387 0.0016337 0.0039428 -0.00054639 0.0018987 -0.001241 0.0042978 0.0018963 -0.0015826 -0.0016058 -0.0006198 -0.00096858 0.0004066 0.0042636 0.0014089 -0.00094127 -0.001492 0.00016004 0.0027676 0.00023191 0.00017472 -0.00060181 0.00038404 0.000846 0.001399 0.0017932 ' v = [] for x in vecots.split(): v.append(float(x)) vnew = np.array(v) is_training = tf.placeholder(tf.bool, []) image_holder = tf.placeholder(dtype=tf.float32, shape=[None, 256, 256, 3], name='input') vgg = VGG19(image_holder, is_training) with tf.Session() as sess: batch_size = 1 imagename = '2.png' image = scipy.misc.imread(imagename) image = scipy.misc.imresize(image, (256, 256)) image = image.reshape((batch_size, 256, 256, 3)) init = tf.global_variables_initializer() sess.run(init) res = sess.run(vgg, feed_dict={image_holder: image, is_training: True}) log.debug("{}, {}".format(vnew.shape, res[0].shape)) print(cosdistance(vnew, res[0]))
def train_nnmodel(epoch, learning_rate, batch_size, data_path='datasets/results', data_name="train.csv", class_number=2, checkpoint_dir="datasets/results/models"): root = os.path.dirname(os.path.realpath(__file__)) data_path = os.path.join(root, data_path, data_name) df_ohe = pd.read_csv(data_path) log.info("{}".format(df_ohe.shape)) df_ohe = shuffle(df_ohe) train_y = df_ohe['label'] train_y = pd.get_dummies(train_y) del df_ohe['label'] train_x = df_ohe x_data_holder = tf.placeholder(tf.float32, [None, train_x.shape[1]], name='inputs_x') y_data_holder = tf.placeholder(tf.float32, [None, class_number], name='inputs_y') y_prediction = neural_networks(x_data_holder, train_x.shape[1], class_number) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_data_holder, logits=y_prediction)) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss) y_pre_max = tf.argmax(y_prediction, axis=1) # 预测值的最大值的索引 y_train_max = tf.argmax(y_data_holder, axis=1) # 真实值的最大值的索引 correct_prediction = tf.equal(y_pre_max, y_train_max) # 返回bool值 bool2float = tf.cast(correct_prediction, tf.float32) # bool转float32 accuracy = tf.reduce_mean(bool2float) # 准确率 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=5) for e in range(epoch): counter = 0 batch_count = len(train_x) // batch_size for batch_x, batch_y in minibatches(inputs=train_x, targets=train_y, batch_size=batch_size, shuffle=False): sess.run(train_step, feed_dict={ x_data_holder: batch_x, y_data_holder: batch_y }) train_loss = sess.run(loss, feed_dict={ x_data_holder: batch_x, y_data_holder: batch_y }) train_acc = sess.run(accuracy, feed_dict={ x_data_holder: batch_x, y_data_holder: batch_y }) if np.mod(counter, 10) == 1: log_out = "Epoch:{} Batch Count: {}/{}, Train Accuracy: {:06f}; Loss: {:06f}" log.info( log_out.format(e, counter, batch_count, train_acc, train_loss)) counter += 1 if np.mod(counter, 10) == 1: if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_name = os.path.join(root, checkpoint_dir) saver.save(sess, save_path=os.path.join( checkpoint_name, "{}.model".format(counter))) log.debug(" Model {} have save success ...".format( checkpoint_name))
args = get_dictionary() not_cuts = re.compile(u'([\da-zA-Z \.]+)|《(.*?)》|“(.{1,10})”') re_replace = re.compile(u'[^\u4e00-\u9fa50-9a-zA-Z《》\(\)()“”·\.]') jieba.load_userdict(os.path.join(args.get('path'), 'jiebadict.csv')) jieba.analyse.set_stop_words(os.path.join(args.get('path'), 'stopwords_zh.csv')) sw = pd.read_csv("pyduyp/dictionary/stopwords_zh.csv", lineterminator="\n").values.tolist() sw2list = [j for i in sw for j in i] dict_name = os.path.join(args.get('path'), 'jiebadict.csv') dict_data = pd.read_csv(dict_name).values.tolist() dict_data2list = [j for i in dict_data for j in i] log.debug("dict load success") def isindict(inputs): out = True for x in inputs: if x not in sw2list: if x not in dict_data2list: out = False return out def cut(s, add_stopwords=True): out = [] scut = jieba.lcut(s) for x in scut:
if not os.path.exists('logs'): os.mkdir('logs') merged = tf.summary.merge_all() file_writer = tf.summary.FileWriter('logs', sess.graph) tf.initialize_all_variables().run() for epoch in trange(0, max_epoch): log.info( " ................... Start Training ...................") batch_count = train_list_length // batch_size log.info("{}".format(batch_count)) for bc in range(batch_count): offset = bc * batch_size for hr, lr in get_image_batch_forpng(bc, batch_size): input_data, gt_data = read_data2arr(lr), read_data2arr( hr) log.debug("{}, {}".format(input_data.shape, gt_data.shape)) feed_dict = { train_input: input_data, train_gt: gt_data } run_obj = [ opt, loss, train_output, learning_rate, global_step ] _, l, output, lr, g_step = sess.run( run_obj, feed_dict=feed_dict) loginfo = "epoch/bc:{}/{}, loss: {},lr: {}".format( epoch, bc, np.sum(l) / batch_size, lr) log.info("{}".format(loginfo)) if bc % 80 == 1:
os.mkdir('logs') merged = tf.summary.merge_all() file_writer = tf.summary.FileWriter('logs', sess.graph) tf.initialize_all_variables().run() for epoch in trange(0, max_epoch): log.info( " ................... Start Training ...................") batch_count = train_list_length // batch_size log.info("{}".format(batch_count)) for bc in range(batch_count): offset = bc * batch_size for hr, lr in get_image_batch_forpng( bc, batch_size, data_path="{}.txt".format(data_sets)): input_data, gt_data = read_lf2arr(lr), read_hf2arr(hr) log.debug("{}, {}".format(input_data.shape, gt_data.shape)) feed_dict = { train_input: input_data, train_gt: gt_data } run_obj = [ opt, loss, train_output, learning_rate, global_step ] _, l, output, lr, g_step = sess.run( run_obj, feed_dict=feed_dict) loginfo = "epoch/bc:{}/{}, loss: {},lr: {}".format( epoch, bc, np.sum(l) / batch_size, lr) log.info("{}".format(loginfo)) if bc % 90 == 1:
class VGG19: log.debug( "===========================Start VGG19===========================") def __init__(self, x, t, is_training): if x is None: return out, phi = self.build_model(x, is_training) loss = self.inference_loss(out, t) def build_model(self, x, is_training, reuse=False): with tf.variable_scope('vgg19', reuse=reuse): phi = [] with tf.variable_scope('conv1a'): x = conv_layer(x, [3, 3, 3, 64], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv1b'): x = conv_layer(x, [3, 3, 64, 64], 1) x = batch_normalize(x, is_training) x = lrelu(x) phi.append(x) x = max_pooling_layer(x, 2, 2) with tf.variable_scope('conv2a'): x = conv_layer(x, [3, 3, 64, 128], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv2b'): x = conv_layer(x, [3, 3, 128, 128], 1) x = batch_normalize(x, is_training) x = lrelu(x) phi.append(x) x = max_pooling_layer(x, 2, 2) with tf.variable_scope('conv3a'): x = conv_layer(x, [3, 3, 128, 256], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv3b'): x = conv_layer(x, [3, 3, 256, 256], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv3c'): x = conv_layer(x, [3, 3, 256, 256], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv3d'): x = conv_layer(x, [3, 3, 256, 256], 1) x = batch_normalize(x, is_training) x = lrelu(x) phi.append(x) x = max_pooling_layer(x, 2, 2) with tf.variable_scope('conv4a'): x = conv_layer(x, [3, 3, 256, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv4b'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv4c'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv4d'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) phi.append(x) x = max_pooling_layer(x, 2, 2) with tf.variable_scope('conv5a'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv5b'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv5c'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) with tf.variable_scope('conv5d'): x = conv_layer(x, [3, 3, 512, 512], 1) x = batch_normalize(x, is_training) x = lrelu(x) phi.append(x) x = max_pooling_layer(x, 2, 2) x = flatten_layer(x) with tf.variable_scope('fc1'): x = full_connection_layer(x, 4096) x = lrelu(x) with tf.variable_scope('fc2'): x = full_connection_layer(x, 4096) x = lrelu(x) with tf.variable_scope('softmax'): x = full_connection_layer(x, 100) return x, phi def inference_loss(self, out, t): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(t, 100), logits=out) return tf.reduce_mean(cross_entropy)
def sql(sql): url = "{}://{}:{}/_sql?sql={}".format(es_args.get('schema'), es_args.get('host'), es_args.get('port'), sql) log.debug("request sql url: {}".format(url)) response = requests.get(url) return response