# coding: utf-8 ''' HCL 关键字 单字训练 CNN ''' import sys sys.path.append('../data_set/hcl/') from hcl import input_data hcl = input_data([1]+[i for i in range(3755)], 20, 2, True, True, (28, 28), False) import tensorflow as tf sess = tf.InteractiveSession() #placeholder x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 3755]) #variable initialize def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial)
from PIL import Image import numpy as np gb2312_path = '../data/hcl/GB2312_3755.txt' keyword_list = [ '省', '市', '县', '区', '乡', '镇', '村', '巷', '弄', '路', '街', '社', '组', '队', '州', 'X' ] full_list = [i.rstrip('\n') for i in open(gb2312_path, 'r').readlines()] small_list = list( set(small_char_set.samll_char_set).intersection(set(full_list))) full_list = small_list arguments = { 'kwd': [ hcl.input_data(keyword_list, raw_data=False, direct_info=False), (1, 32, 32), 16, '../data/result/', 'kwd_weights.hdf5', 'kwd_history.txt', 100, 10, 1000000, False ], 'full': [ hcl.input_data(full_list, raw_data=False, direct_info=False), (1, 32, 32), len(full_list), '../data/result/', 'full_weights.hdf5', 'full_history.txt', 1200, 10, 12000000, False ] } def proba_to_char(probas, word_list=keyword_list): kps = [] probas = list(probas)
# coding: utf-8 # keyword_recognizer.py import cnn import sys sys.path.append('../data_set/hcl/') from hcl import input_data hcl = input_data(['省','市','县','区','乡','镇','村','巷','弄','路', '街', '社', '组', '队', '州', 'X'], 500, 200, True, True, (32, 32), False) x_shape = 1024 cnn_reshape = [-1,32,32,1] y_shape = 16 cnn_layer_n = 2 cnn_weights = [[3, 3, 1, 32], [3, 3, 32, 64]] keep_prob = [1, 1, 1, 1, 0.5] fnn_reshape = [-1, 8*8*64] fnn_layer_n = 1 fnn_weights = [[8*8*64, 1024]] softmax_weight = [1024, 16] a = cnn(x_shape, cnn_reshape, y_shape, cnn_layer_n, cnn_weights, keep_prob, fnn_reshape, fnn_layer_n, fnn_weights, softmax_weight) a.train(hcl, 2000, 50) a.test(hcl, 200)
# coding: utf-8 ''' HCL 关键字 单字训练 CNN ''' import sys sys.path.append('../data_set/hcl/') from hcl import input_data hcl = input_data(['省', '市', '县', '区', '乡', '镇', '村', '巷', '弄', 'X'], 20, 20, True, True, (28, 28), False) import tensorflow as tf sess = tf.InteractiveSession() #placeholder x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) #variable initialize def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial)
# coding: utf-8 # batches_saver.py # to save the batches frequently used ( 3755 classes, batch size = 1000, size = 32 x 32) import hcl import numpy as np a = hcl.input_data([i for i in range(3755)], direct_info=True) def batches_saver(ty, base=0, num_batch=100, batch_size=1000): for i in range(base, base + num_batch): filename = 'hcl/' + ty + '/bin/' + str(i) x, y = a.next_batch(batch_size, ty) x = np.array(x) y = np.array(y) x.tofile(filename + '.binx') y.tofile(filename + '.biny') print 'save %d th batch successful!' % i batches_saver('train', 69) batches_saver('test', num_batch=1, batch_size=10000)
# coding: utf-8 ''' HCL 关键字 单字训练 CNN ''' import sys sys.path.append('../data_set/hcl/') from hcl import input_data import pickle import os hcl = input_data([1] + [i for i in range(3755)], 50, 20, True, True, (32, 32), False) keep_prob = [0.1, 0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.5] import tensorflow as tf sess = tf.InteractiveSession() #placeholder x = tf.placeholder(tf.float32, shape=[None, 1024]) y_ = tf.placeholder(tf.float32, shape=[None, 3755]) #variable initialize def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)
# coding: utf-8 ''' HCL 关键字 单字训练 CNN ''' import sys sys.path.append('../data_set/hcl/') from hcl import input_data #hcl = input_data(['省','市','县','区','乡','镇','村','巷','弄','X'], 20, 20, True, True, (28, 28), False) hcl = input_data([ '上', '云', '内', '北', '台', '吉', '四', '天', '宁', '安', '山', '广', '新', '江', '河', '浙', '海', '湖', '澳', '甘', '福', '西', '贵', '辽', '重', '陕', '青', '香', '黑', 'X' ], 20, 20, True, True, (28, 28), False) import tensorflow as tf sess = tf.InteractiveSession() #placeholder x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 30]) #variable initialize def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape):
@staticmethod def conv2d(x, W): return tf.nn.conv2d(x,W, strides=[1,1,1,1],padding='SAME') @staticmethod def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') @staticmethod def make_softmax(x, weight): return tf.nn.softmax(tf.matmul(x, cnn.weight_variable(weight)) + cnn.bias_variable([weight[-1]])) if __name__ == '__main__': x_shape = 1024 cnn_reshape = [-1,32,32,1] y_shape = 10 cnn_layer_n = 2 cnn_weights = [[3, 3, 1, 32], [3, 3, 32, 64]] keep_prob = [1, 1, 1, 1, 0.5] fnn_reshape = [-1, 8*8*64] fnn_layer_n = 1 fnn_weights = [[8*8*64, 1024]] softmax_weight = [1024, 10] a = cnn(x_shape, cnn_reshape, y_shape, cnn_layer_n, cnn_weights, keep_prob, fnn_reshape, fnn_layer_n, fnn_weights, softmax_weight) import sys sys.path.append('../data_set/hcl/') from hcl import input_data b = input_data(['省','市','县','区','乡','镇','村','巷','弄','X'], 50, 50, True, True, (32, 32), False) a.train(b, 2000, 50) a.test(b, 200) x = b.test.next_batch(20) print a.predict(x[0])