def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH) if not os.path.exists(LSTM_MODEL_PATH): exit() self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format( GLOVE_WIKI_GENSIM_DATA_PATH) self.img = None self.path = '.\\' YukiVisual.__init__(self)
def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH) # 获取样本集信息 self.reader = DataReader(TRAIN_DATA_TYPE) self.reader.set_pos() self.weight_vgg = None self.biase_vgg = None self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format( GLOVE_WIKI_GENSIM_DATA_PATH)
def createQuestionsDict(): """ 创建问题字典(包含回答字典) """ reader = DataReader() reader.set_pos() dealer = DataDealer(ANSWERS_DICT_PATH) start_id = reader.get_next_pic_id() qa = reader.get_pic_qa(start_id) for q in qa: question = q['question'] dealer.deal(question) now_id = reader.get_next_pic_id() i = 0 while now_id != start_id: qa = reader.get_pic_qa(now_id) for q in qa: question = q['question'] dealer.deal(question) now_id = reader.get_next_pic_id() i = i + 1 if i % 1000 == 0: print('*', end='') dealer.saveData(QUESTIONS_DICT_PATH) print('over!')
def createAnswersDict(): """ 创建回答字典 """ reader = DataReader() reader.set_pos() dealer = DataDealer(ANSWERS_DICT_PATH) start_id = reader.get_next_pic_id() qa = reader.get_pic_qa(start_id) for q in qa: answers = dict() for a in q['answers']: answer = a['answer'] weight = 0 if a['answer_confidence'] == 'yes': weight = 1 elif a['answer_confidence'] == 'maybe': weight = 0.5 if not answer in answers.keys(): answers[answer] = 0 answers[answer] = answers[answer] + weight answers_list = [] for key in answers.keys(): if answers[key] >= 3: dealer.deal(key) now_id = reader.get_next_pic_id() i = 0 while now_id != start_id: qa = reader.get_pic_qa(now_id) for q in qa: answers = dict() for a in q['answers']: answer = a['answer'] weight = 0 if a['answer_confidence'] == 'yes': weight = 1 elif a['answer_confidence'] == 'maybe': weight = 0.5 if not answer in answers.keys(): answers[answer] = 0 answers[answer] = answers[answer] + weight answers_list = [] for key in answers.keys(): if answers[key] >= 3: dealer.deal(key) now_id = reader.get_next_pic_id() i = i + 1 if i % 1000 == 0: print('*', end='') dealer.saveData() print('over!')
def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH)
class TrainNetForVQA: def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH) def __loadBatch(self,capacity,batch_size,file_name): """ 读取tfrecords的资料 batch_size为一次取出样本数量,capacity为队列的容量 """ def parse(example): features = tf.parse_single_example(example,features={ 'answer': tf.FixedLenFeature([self.dealer.getWordNum()], tf.int64), 'question' : tf.FixedLenFeature([QUESTION_MAX_LEN * 300], tf.float32), 'img':tf.FixedLenFeature([7*7*512], tf.float32), 'question_len' : tf.FixedLenFeature([1], tf.int64) }) answer = tf.cast(features['answer'],dtype = tf.float32) question = tf.cast(features['question'],dtype = tf.float32) img = tf.cast(features['img'],dtype = tf.float32) question_len = tf.cast(features['question_len'],dtype = tf.int64) return answer,question,img,question_len for root, dirs, files in os.walk(os.path.dirname(file_name)): pass for i in range(len(files)): files[i] = os.path.dirname(file_name) + '\\' + files[i] dataset = tf.data.TFRecordDataset([files[random.randint(0,len(files)-1)]]) dataset = dataset.map(parse).repeat().batch(batch_size).shuffle(buffer_size=capacity) iterator = dataset.make_one_shot_iterator() label_batch,question_batch,img_batch,question_len_batch = iterator.get_next() return label_batch,question_batch,img_batch,question_len_batch def __get_variables_to_restore(self,str): return [v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if str in v.name] def train(self,batch_size,iterate_time,learning_rate): """ 训练网络 """ model_path = LSTM_MODEL_PATH + LSTM_MODEL_NAME # 生成随机权值 weights = { 'w_pic': tf.get_variable('weight_of_pic',initializer=tf.random_normal([512,512])), 'w_q': tf.get_variable('weight_of_question',initializer=tf.random_normal([300,300])), 'w_q_out': tf.get_variable('weight_of_question_output',initializer=tf.random_normal([300,512])), 'w_pos': tf.get_variable('pos_weight_of_question_output',initializer=tf.random_normal([300,49])), 'w_pic_in': tf.get_variable('weight_of_pic_in_lstm',initializer=tf.random_normal([512,300])), 'w_q_in': tf.get_variable('weight_of_question_in_lstm',initializer=tf.random_normal([300,300])), 'out': tf.get_variable('wo',initializer=tf.random_normal([300,self.dealer.getWordNum()])) } biases = { 'b_pic': tf.get_variable('biase_of_pic',initializer=tf.random_normal([512,])), 'b_q': tf.get_variable('biase_of_question',initializer=tf.random_normal([300,])), 'b_q_out': tf.get_variable('biase_of_question_output',initializer=tf.random_normal([512,])), 'b_pos': tf.get_variable('pos_biase_of_question_output',initializer=tf.random_normal([49,])), 'b_pic_in': tf.get_variable('biase_of_pic_in_lstm',initializer=tf.random_normal([300,])), 'b_q_in': tf.get_variable('biase_of_question_in_lstm',initializer=tf.random_normal([300,])), 'out': tf.get_variable('bo',initializer=tf.random_normal([self.dealer.getWordNum(),])) } # 导入数据 label_batch,question_batch,img_batch,question_len_batch = self.__loadBatch(iterate_time,batch_size,TRAIN_BATCH_PATH) # 获取图像数据矩阵 img_batch = tf.reshape(img_batch,[batch_size*7*7,512]) img_batch = tf.add(tf.matmul(img_batch, weights['w_pic']), biases['b_pic']) img_batch = tf.nn.leaky_relu(img_batch) # 空间序矩阵batch_size,49,512 img_batch = tf.reshape(img_batch,[batch_size,7*7,512]) # 问题长度 question_len_batch = tf.reshape(question_len_batch,[batch_size]) label_batch = tf.reshape(label_batch,[batch_size,self.dealer.getWordNum()]) question_batch = tf.reshape(question_batch,[batch_size*QUESTION_MAX_LEN,300]) data = tf.add(tf.matmul(question_batch, weights['w_q']), biases['b_q']) data_batch = tf.reshape(data,[batch_size,-1,300]) # 问题矩阵序列构建 data_batch = tf.nn.leaky_relu(data_batch) # 进入LSTM网络训练 # LSTM网络得到问题特征 question_lstm_cell = tf.nn.rnn_cell.LSTMCell(300,name = 'QuestionLSTMCell') # 设置dropout question_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(question_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7) # 初始状态为0 q_init_state = question_lstm_cell.zero_state(batch_size, dtype=tf.float32) question_outputs, question_states = tf.nn.dynamic_rnn(question_lstm_cell, data_batch, initial_state=q_init_state, dtype = tf.float32,sequence_length = question_len_batch) question_output = question_states.h # 从问题序列中提取出来的特征 question_output = tf.reshape(question_output,(batch_size,300)) # 使用问题提取的特征生成空间权重 [batch_size,49] pos_weight = tf.add(tf.matmul(question_output, weights['w_pos']), biases['b_pos']) pos_weight = tf.reshape(pos_weight,[1,batch_size*49]) # sigmoid规范化,这个权值在预想中是用于遗忘空间不需要的特征的 pos_weight = tf.sigmoid(pos_weight) img_batch = tf.transpose(img_batch,(2,0,1)) img_batch = tf.reshape(img_batch,(512,batch_size*7*7)) img_batch = img_batch * pos_weight img_batch = tf.reshape(img_batch,(512,batch_size,7*7)) img_batch = tf.transpose(img_batch,(1,2,0)) # 使用问题提取的特征生成问题权重 [batch_size,512] question_weight = tf.add(tf.matmul(question_output, weights['w_q_out']), biases['b_q_out']) question_weight = tf.reshape(question_weight,[1,batch_size*512]) # sigmoid规范化,这个权值在预想中是用于遗忘物体不需要的特征的 question_weight = tf.sigmoid(question_weight) # 遗忘部分物体特征 img_batch = tf.transpose(img_batch,(1,0,2)) img_batch = tf.reshape(img_batch,(7*7,batch_size*512)) img_batch = img_batch * question_weight img_batch = tf.reshape(img_batch,[7*7,batch_size,512]) img_batch = tf.transpose(img_batch,(1,0,2)) img_lstm_cell = tf.nn.rnn_cell.LSTMCell(512,name = 'PicLSTMCell') # 设置dropout img_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(img_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7) # 初始状态为0 img_init_state = img_lstm_cell.zero_state(batch_size, dtype=tf.float32) img_outputs, img_states = tf.nn.dynamic_rnn(img_lstm_cell, img_batch, initial_state=img_init_state, dtype = tf.float32) img_output = img_states.h img_in = tf.add(tf.matmul(img_output, weights['w_pic_in']), biases['b_pic_in']) img_in = tf.reshape(img_in,[batch_size,1,300]) q_in = tf.add(tf.matmul(question_batch, weights['w_q_in']), biases['b_q_in']) q_in = tf.reshape(q_in,[batch_size,QUESTION_MAX_LEN,300]) data_add = tf.concat(axis = 1,values = [img_in,q_in]) # 约束大小 data_add = tf.sigmoid(data_add) lstm_cell = tf.nn.rnn_cell.LSTMCell(300,name = 'LSTMCell') init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) question_len_batch = question_len_batch + 1 outputs, states = tf.nn.dynamic_rnn(lstm_cell, data_add, initial_state=init_state, dtype = tf.float32,sequence_length = question_len_batch) pred = tf.add(tf.matmul(states.h, weights['out']), biases['out']) pred = tf.sigmoid(pred) # 防止梯度爆炸 pred = tf.clip_by_value(pred,1e-7,1.0-1e-7) # 计算交叉熵 # 由于正例远小于负例,使用激励系数 up = 5 loss = -(tf.log(pred)*label_batch*up + (1 - label_batch)*tf.log(1 - pred)) loss = tf.reduce_mean(loss,name = 'loss') # 计算准确率 accuracy = tf.abs(pred - label_batch) # print(accuracy) accuracy = 1 - tf.reduce_mean(accuracy,name = 'accuracy') # 建立优化器 随机梯度下降 optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) # 减少误差,提升准确度 train = optimizer.minimize(loss) saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess: # 输入变量 init = tf.group(tf.global_variables_initializer()) sess.run(init) if os.path.exists(LSTM_MODEL_PATH): # 变量替换 saver.restore(sess, model_path) for m in range(iterate_time): sess.run(train) ac,lo = sess.run([accuracy,loss]) if not iterate_time % (m+1) == 0: continue print('loss:',end = '') print(lo) print('accuracy:',end = '') print(ac) if not os.path.exists(LSTM_MODEL_PATH): os.makedirs(LSTM_MODEL_PATH) save_path = saver.save(sess, model_path) print("Model saved in file: %s" % save_path)
class Tester(YukiVisual): def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH) if not os.path.exists(LSTM_MODEL_PATH): exit() self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format( GLOVE_WIKI_GENSIM_DATA_PATH) self.img = None self.path = '.\\' YukiVisual.__init__(self) def __cd(self, path_): """ 成功进入目标文件夹返回True,否则返回False """ # 创建目录 if not os.path.exists(self.path + path_): return False else: self.path = self.path + path_ + '\\' return True def __op(self, image_name): """ 成功打开图像返回True,否则返回False """ if not os.path.exists(self.path + image_name): return False else: self.img = Image.open(self.path + image_name) self.img.show() self.img = self.img.resize((224, 224)) self.img = numpy.array(self.img) return True def __getAnswer(self, question): """ 获取question对应的答案 """ try: self.img.shape except: return "未打开有效图片!" # 重新设置图 tf.reset_default_graph() img_batch = tf.Variable(self.img, dtype=tf.float32) img_batch = tf.reshape(img_batch, [1, 224, 224, 3]) # 获取VGG19网络得到的结果 img_batch = self.__getVGG19Result(img_batch) # 重新设置图 tf.reset_default_graph() img_batch = tf.cast(img_batch, tf.float32) question = question.replace('\n', '') question = question.replace('?', '') question = question.replace(',', ' ,') question = question.replace('.', ' .') question = question.split(' ') data = [] for word in question: # data.shape = (len(question),300) try: data.append(list(self.model_word2vec[word])) except: data.append([0] * 300) question_batch = tf.cast(data, tf.float32) model_path = LSTM_MODEL_PATH + LSTM_MODEL_NAME # 生成随机权值 weights = { 'w_pic': tf.get_variable('weight_of_pic', initializer=tf.random_normal([512, 512])), 'w_q': tf.get_variable('weight_of_question', initializer=tf.random_normal([300, 300])), 'w_q_out': tf.get_variable('weight_of_question_output', initializer=tf.random_normal([300, 512])), 'w_pos': tf.get_variable('pos_weight_of_question_output', initializer=tf.random_normal([300, 49])), 'w_pic_in': tf.get_variable('weight_of_pic_in_lstm', initializer=tf.random_normal([512, 300])), 'w_q_in': tf.get_variable('weight_of_question_in_lstm', initializer=tf.random_normal([300, 300])), 'out': tf.get_variable('wo', initializer=tf.random_normal( [300, self.dealer.getWordNum()])) } biases = { 'b_pic': tf.get_variable('biase_of_pic', initializer=tf.random_normal([ 512, ])), 'b_q': tf.get_variable('biase_of_question', initializer=tf.random_normal([ 300, ])), 'b_q_out': tf.get_variable('biase_of_question_output', initializer=tf.random_normal([ 512, ])), 'b_pos': tf.get_variable('pos_biase_of_question_output', initializer=tf.random_normal([ 49, ])), 'b_pic_in': tf.get_variable('biase_of_pic_in_lstm', initializer=tf.random_normal([ 300, ])), 'b_q_in': tf.get_variable('biase_of_question_in_lstm', initializer=tf.random_normal([ 300, ])), 'out': tf.get_variable('bo', initializer=tf.random_normal([ self.dealer.getWordNum(), ])) } # 获取图像数据矩阵 img_batch = tf.reshape(img_batch, [7 * 7, 512]) img_batch = tf.add(tf.matmul(img_batch, weights['w_pic']), biases['b_pic']) img_batch = tf.nn.leaky_relu(img_batch) # 空间序矩阵1,49,512 img_batch = tf.reshape(img_batch, [1, 7 * 7, 512]) question_batch = tf.reshape(question_batch, [1 * len(question), 300]) data = tf.add(tf.matmul(question_batch, weights['w_q']), biases['b_q']) data_batch = tf.reshape(data, [1, -1, 300]) # 问题矩阵序列构建 data_batch = tf.nn.leaky_relu(data_batch) # 进入LSTM网络训练 # LSTM网络得到问题特征 question_lstm_cell = tf.nn.rnn_cell.LSTMCell(300, name='QuestionLSTMCell') # 设置dropout question_lstm_cell = tf.nn.rnn_cell.DropoutWrapper( question_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7) # 初始状态为0 q_init_state = question_lstm_cell.zero_state(1, dtype=tf.float32) question_outputs, question_states = tf.nn.dynamic_rnn( question_lstm_cell, data_batch, initial_state=q_init_state, dtype=tf.float32) question_output = question_states.h # 从问题序列中提取出来的特征 question_output = tf.reshape(question_output, (1, 300)) # 使用问题提取的特征生成空间权重 [1,49] pos_weight = tf.add(tf.matmul(question_output, weights['w_pos']), biases['b_pos']) pos_weight = tf.reshape(pos_weight, [1, 1 * 49]) # sigmoid规范化,这个权值在预想中是用于遗忘空间不需要的特征的 pos_weight = tf.sigmoid(pos_weight) img_batch = tf.transpose(img_batch, (2, 0, 1)) img_batch = tf.reshape(img_batch, (512, 1 * 7 * 7)) img_batch = img_batch * pos_weight img_batch = tf.reshape(img_batch, (512, 1, 7 * 7)) img_batch = tf.transpose(img_batch, (1, 2, 0)) # 使用问题提取的特征生成问题权重 [1,512] question_weight = tf.add( tf.matmul(question_output, weights['w_q_out']), biases['b_q_out']) question_weight = tf.reshape(question_weight, [1, 1 * 512]) # sigmoid规范化,这个权值在预想中是用于遗忘物体不需要的特征的 question_weight = tf.sigmoid(question_weight) # 遗忘部分物体特征 img_batch = tf.transpose(img_batch, (1, 0, 2)) img_batch = tf.reshape(img_batch, (7 * 7, 1 * 512)) img_batch = img_batch * question_weight img_batch = tf.reshape(img_batch, [7 * 7, 1, 512]) img_batch = tf.transpose(img_batch, (1, 0, 2)) img_lstm_cell = tf.nn.rnn_cell.LSTMCell(512, name='PicLSTMCell') # 设置dropout img_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(img_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7) # 初始状态为0 img_init_state = img_lstm_cell.zero_state(1, dtype=tf.float32) img_outputs, img_states = tf.nn.dynamic_rnn( img_lstm_cell, img_batch, initial_state=img_init_state, dtype=tf.float32) img_output = img_states.h img_in = tf.add(tf.matmul(img_output, weights['w_pic_in']), biases['b_pic_in']) img_in = tf.reshape(img_in, [1, 1, 300]) q_in = tf.add(tf.matmul(question_batch, weights['w_q_in']), biases['b_q_in']) q_in = tf.reshape(q_in, [1, len(question), 300]) data_add = tf.concat(axis=1, values=[img_in, q_in]) # 约束大小 data_add = tf.sigmoid(data_add) lstm_cell = tf.nn.rnn_cell.LSTMCell(300, name='LSTMCell') init_state = lstm_cell.zero_state(1, dtype=tf.float32) outputs, states = tf.nn.dynamic_rnn(lstm_cell, data_add, initial_state=init_state, dtype=tf.float32) pred = tf.add(tf.matmul(states.h, weights['out']), biases['out']) pred = tf.sigmoid(pred) # 获取saver saver = tf.train.Saver( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) as sess: # 输入变量 init = tf.group(tf.global_variables_initializer()) sess.run(init) # 变量替换 saver.restore(sess, model_path) re = sess.run(pred) return list(re)[0] def __getVGG19Result(self, img_batch): """ 获取VGG19网络的结果 img_batch:tf.Variable->[1,224,224,3] 返回最后一层隐藏层的一维列表 """ # 导入VGG19模型 model = VGG19model() weight_vgg, biase_vgg = model.loadWeightsAndBiases( VGG19_WEIGHTS_AND_BIASE_PATH, False) # 这里输出的是最后一个隐藏层 out = model.getNet(img_batch, weight_vgg, biase_vgg, 0.2, True) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) data = sess.run(out) # 一维列表 data = data[0] return data def send_message(self, text): text_list = self.received_text.split() if text_list[0] == 'cd': if self.__cd(text_list[1]): YukiVisual.send_message(self, 'now in ' + self.path + '\n') else: YukiVisual.send_message(self, 'wrong!\n') elif text_list[0] == 'op': if self.__op(text_list[1]): YukiVisual.send_message( self, 'success!\n Please ask me questions.\n') else: YukiVisual.send_message(self, 'wrong!\n') elif text_list[0] == 'ls': a = str() for i in os.listdir(self.path): a = a + i + '\n ' YukiVisual.send_message(self, a) else: data = self.__getAnswer(self.received_text) if not type(data) == numpy.ndarray: YukiVisual.send_message(self, 'wrong!\n') return data = list(data) sort_list = data[:] sort_list.sort() # print(sum(sort_list)/len(sort_list)) sort_list = sort_list[-5:] YukiVisual.send_message(self, 'Top Five Answer:\n') for i in range(len(data)): if data[i] in sort_list: YukiVisual.send_message( self, '*' * int(1 + data[i] * 10) + ' ' * int(11 - data[i] * 10) + self.dealer.getWord(i) + ' ' + str(data[i]) + '\n')
def trainOne(self, batch_size, learning_rate, rounds=10): """ batch_size:图片数 learning_rate:学习率 rounds:运算次数 """ img_batch, label_batch, bbox_batch = self.dealer.getRandomTrainBatch( batch_size) if not os.path.exists(os.path.dirname(self.model_path)): # os.makedirs(os.path.dirname(self.model_path)) RESTORE = False else: RESTORE = True weights = { 'down': tf.compat.v1.get_variable(name='w_down', shape=[1, 1, 2048, 1024]), # 降采样 'feature': tf.compat.v1.get_variable(name='w_feature', shape=[1, 1, 1024, K * K * 2]) } biases = { 'down': tf.compat.v1.get_variable(name='b_down', shape=[ 1024, ]), # 降采样 'feature': tf.compat.v1.get_variable(name='b_feature', shape=[ K * K * 2, ]) } for index in range(len(img_batch)): img = img_batch[index] label = label_batch[index] bbox = bbox_batch[index] rpn_view = [None] * 9 for i in range(9): rpn_view[i] = tf.constant(label[i], dtype=tf.float32) # 高宽比1:1 1:2 2:1 h, w, mod = numpy.shape(img) img = tf.constant(img, shape=(1, h, w, mod), dtype=tf.float32) # 图像原始数据 # 使用无pool1&pool5的RESNET 101 net, endpoints = my_resnet(img, global_pool=False, num_classes=None, is_training=True, reuse=tf.compat.v1.AUTO_REUSE ) # net's w&h = original_img's w&h / 8 net = tf.nn.conv2d(input=net, filter=weights['down'], strides=[1, 1, 1, 1], padding='VALID') net = tf.add(net, biases['down']) # 训练RPN网络 rpn_accuracy, rpn_result = trainRPN(net, rpn_view) # 生成feature_map feature_map = tf.nn.conv2d(input=net, filter=weights['feature'], strides=[1, 1, 1, 1], padding='VALID') feature_map = tf.add(feature_map, biases['feature']) # 获取选取的anchors index select = DataDealer.chooseClassficationData(label) select_img = [] select_label = [] select_bbox = [] anchor_type = [[ int(x[0] / NET_SCALE / K), int(x[1] / NET_SCALE / K) ] for x in ANTHORS_TYPIES] for s in select: img_ = feature_map[0, s[1]:(s[1] + anchor_type[s[0]][1] * K), s[2]:(s[2] + anchor_type[s[0]][0] * K)] img_ = tf.expand_dims(img_, 0) select_img.append( tf.nn.avg_pool2d( img_, [1, anchor_type[s[0]][1], anchor_type[s[0]][0], 1], [1, anchor_type[s[0]][1], anchor_type[s[0]][0], 1], padding='VALID')) select_label.append(label[s[0]][s[1]][s[2]]) select_bbox.append(bbox[s[0]][s[1]][s[2]]) # 训练bounding_box bbox_accuracy = trainBbox(select, select_img, select_bbox, select_label) # 建立优化器 随机梯度下降 optimizer = tf.compat.v1.train.GradientDescentOptimizer( learning_rate=learning_rate) # 减少误差,提升准确度 train = optimizer.minimize(tf.compat.v1.losses.get_total_loss()) saver = tf.compat.v1.train.Saver( tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)) with tf.compat.v1.Session() as sess: init = tf.compat.v1.global_variables_initializer() sess.run(init) if RESTORE: saver.restore(sess, self.model_path) else: slim.assign_from_checkpoint_fn( self.init_model_path, slim.get_variables_to_restore( ), # 第一次path = RES_NET_101 后来:model_path ignore_missing_vars=False, reshape_variables=False) for i in range(rounds): if i == 0: ac = sess.run([rpn_accuracy, bbox_accuracy]) sess.run(train) # print(sess.run([rpn_accuracy,bbox_accuracy,tf.compat.v1.losses.get_total_loss()])) if not RESTORE: os.makedirs(os.path.dirname(self.model_path)) saver.save(sess, self.model_path) return ac
def __init__(self, model_path=RPN_BATCH_PATH, init_model_path=RES_NET_101): self.dealer = DataDealer() self.model_path = model_path self.init_model_path = init_model_path
class BatchMaker: def __init__(self): # 获取字典 self.dealer = DataDealer(ANSWERS_DICT_PATH) # 获取样本集信息 self.reader = DataReader(TRAIN_DATA_TYPE) self.reader.set_pos() self.weight_vgg = None self.biase_vgg = None self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format( GLOVE_WIKI_GENSIM_DATA_PATH) def __getOnehot(self, pos, value, len_): """ 获取onehot标签 位置为pos[i]的值为value[i] 其他为0 pos和value的长度必须相等 返回列表,长度为len_ """ if type(pos) != list: pos = [pos] result = [] for i in range(len_): if i in pos: result.append(value) else: result.append(0) return result def __getVGG19Result(self, img_batch): """ 获取VGG19网络的结果 img_batch:tf.Variable->[1,224,224,3] 返回最后一层隐藏层的一维列表 """ # 导入VGG19模型 model = VGG19model() if self.weight_vgg == None and self.biase_vgg == None: self.weight_vgg, self.biase_vgg = model.loadWeightsAndBiases( VGG19_WEIGHTS_AND_BIASE_PATH, False) # 这里输出的是最后一个隐藏层 out = model.getNet(img_batch, self.weight_vgg, self.biase_vgg, 0.2, True) init = tf.global_variables_initializer() with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) as sess: sess.run(init) data = sess.run(out) return data[0] def makeTrainBatch(self, start_pos=0, end_pos=0): """ 制作训练集 """ # 保存位置TRAIN_BATCH_PATH # 速度很慢... if os.path.exists(os.path.dirname(TRAIN_BATCH_PATH)) == False: os.mkdir(os.path.dirname(TRAIN_BATCH_PATH)) path = TRAIN_BATCH_PATH.split('.') writer = tf.python_io.TFRecordWriter(path[0] + str(start_pos) + '_' + str(end_pos) + '.' + path[1]) self.reader.set_pos(start_pos) # 导入VGG19模型 model = VGG19model() # img_batch为占位符 img_batch = tf.placeholder(dtype=tf.float32, shape=[224, 224, 3], name='IMG') img_batch_1 = tf.reshape(img_batch, [1, 224, 224, 3]) weight_vgg, biase_vgg = model.loadWeightsAndBiases( VGG19_WEIGHTS_AND_BIASE_PATH, False) # 这里输出的是最后的池化层 out = model.getNet(img_batch_1, weight_vgg, biase_vgg, 0, True) out = tf.reshape(out, [1, 7 * 7 * 512]) init = tf.global_variables_initializer() with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) as sess: sess.run(init) print('running...') while self.reader.get_pos() < end_pos: now_id = self.reader.get_next_pic_id() img = self.reader.get_pic_data(now_id) # 是否为黑白图判定 if len(img.shape) == 2: continue img_data = sess.run(out, feed_dict={img_batch: img}) img_data = img_data.tolist()[0] qa = self.reader.get_pic_qa(now_id) for q in qa: question = q['question'] question = question.replace('?', '') question = question.replace(',', ' ,') question = question.replace('.', ' .') question = question.split(' ') answers = dict() confidences = [] for a in q['answers']: # 判断条件->对该回答的信心程度,'yes'加权1,'maybe'加权0.5,三分以上为yes answer = a['answer'] weight = 0 if a['answer_confidence'] == 'yes': weight = 1 elif a['answer_confidence'] == 'maybe': weight = 0.5 if not answer in answers.keys(): answers[answer] = 0 answers[answer] = answers[answer] + weight answers_list = [] for key in answers.keys(): if answers[key] >= 3: answers_list.append(self.dealer.deal(key)[1]) # 若这个问题没有正确回答,则跳过 if len(answers_list) == 0: continue label = self.__getOnehot(answers_list, 1, self.dealer.getWordNum()) data = [] for word in question: # data.shape = (len(question)*300) try: data = data + list(self.model_word2vec[word]) except: # 识别不出的填0 # 例如问号 data = data + [0] * 300 else: pass data = data + [0] * ( (QUESTION_MAX_LEN - len(question)) * 300) example = tf.train.Example(features=tf.train.Features( feature={ "answer": tf.train.Feature( int64_list=tf.train.Int64List(value=label) ), # len = self.dealer.getWordNum() "question": tf.train.Feature(float_list=tf.train.FloatList( value=data)), # len = len(question)*300 'img': tf.train.Feature(float_list=tf.train.FloatList( value=img_data)), # len = 7*7*512 'question_len': tf.train.Feature(int64_list=tf.train.Int64List( value=[len(question)])) # len = 1 })) writer.write(example.SerializeToString()) writer.close() print('over!')