Exemplo n.º 1
0
	def run_validation(self, session, tg, num_classifiers_to_test, feed_dict, specific_classifiers=[]):
		avg_f1, avg_prec, avg_rec = 0.0, 0.0, 0.0
		run_on_classifiers = range(num_classifiers_to_test) if len(specific_classifiers)==0 else specific_classifiers 
		for j in run_on_classifiers:
			print("Running validation tests on classifier " + str(j))
			clsfr = tg.classifiers[j]
			self.valid_batch.batch_size = len(self.valid_batch.dataset[id2label(j)])
			num_iters = 1 #len(self.valid_batch.dataset[id2label(j)]) // self.valid_batch.batch_size 
			self.valid_batch.resetCursors(j)
			# if num_iters==0:
			# 	print("classifier {} has no validation data!".format(j))
			# 	continue
			# average f1, precision, recall across all batches
			f1_s,prec_s,rec_s = 0.0, 0.0, 0.0
			for i in range(num_iters):
				batch = self.valid_batch.classifier_next_batch(j)
				op = executeTrainStep(session, j, tg, batch['nodes'], batch['labels'], feed_dict, True)
				pred=hotEncodeDistribution(op['classifier_ops'])

				f1, prec, rec = get_accuracy([pred], [batch])
				f1_summ, prec_summ, rec_summ = session.run([clsfr.f1_summary,clsfr.precision_summary, clsfr.recall_summary],
															feed_dict={clsfr.precision:prec, clsfr.f1:f1, clsfr.recall:rec})
				# map(lambda x:self.valid_summary_writer.add_summary(x,i),[f1_summ, prec_summ, rec_summ])
				self.valid_summary_writer.add_summary(f1_summ,i)
				self.valid_summary_writer.add_summary(prec_summ,i)
				self.valid_summary_writer.add_summary(rec_summ,i)
				f1_s += f1
				prec_s += prec
				rec_s += rec
			self.precision_accum+=prec_s
			self.recall_accum+=rec
			f1_s /= num_iters
			prec_s /= num_iters
			rec_s /= num_iters
			f1_summ, prec_summ, rec_summ = session.run([clsfr.avg_f1_summary, clsfr.avg_precision_summary, clsfr.avg_recall_summary],
														feed_dict={clsfr.avg_f1:f1_s, clsfr.avg_precision:prec_s, clsfr.avg_recall:rec_s})
			print("f1: {}, precision: {}, recall: {}".format(f1_s, prec_s, rec_s))
			# map(lambda x:self.valid_summary_writer.add_summary(x,self.global_counter),[f1_summ, prec_summ, rec_summ])
			self.valid_summary_writer.add_summary(f1_summ,self.global_counter)
			self.valid_summary_writer.add_summary(prec_summ,self.global_counter)
			self.valid_summary_writer.add_summary(rec_summ,self.global_counter)
			avg_f1+=f1_s
			avg_prec+=prec_s
			avg_rec+=rec_s
		if len(specific_classifiers)==0:
			avg_f1 = avg_f1 / num_classifiers_to_test
			avg_prec = avg_prec / num_classifiers_to_test
			avg_rec = avg_rec / num_classifiers_to_test
			f1_summ, prec_summ, rec_summ = session.run([self.avg_f1_summary, self.avg_prec_summary, self.avg_rec_summary],
														feed_dict={self.avg_f1:avg_f1, self.avg_prec:avg_prec, self.avg_rec:avg_rec})
			self.valid_summary_writer.add_summary(f1_summ,self.global_counter)
			self.valid_summary_writer.add_summary(prec_summ,self.global_counter)
			self.valid_summary_writer.add_summary(rec_summ,self.global_counter)
			print("avg f1: {}, avg prec: {}, avg rec: {}".format(avg_f1, avg_prec, avg_rec))
		self.global_counter+=1
Exemplo n.º 2
0
	def create_record(self,index):
		nodes=[0] * self.batch_size
		labels=[0] * self.batch_size
		lbl = id2label(index)	# for indexing in dataset
		for i in range(self.batch_size):
			cur = self.cursors[index]
			entry = self.dataset[lbl][cur]
			nodes[i] = node2id(entry[0])
			if entry[1]==1:
				labels[i] = [1,0]
			else:
				labels[i] = [0,1]
			self.cursors[index] = (self.cursors[index] + 1) % len(self.dataset[lbl])
		return {'nodes':nodes, 'labels':labels}
Exemplo n.º 3
0
def trainSingleClassifier(classifierId, graph, session, trainingGraph, dataset, batch_size, embeddings, batchGen, num_epochs, 
						  train_summary_writer, saver, train_model_file, is_training, valid_test, summary_frequency=-1):
	tg = trainingGraph
	with graph.as_default():
		precision_tf = tf.placeholder(shape=[], dtype=tf.float32,name='precision')
		recall_tf = tf.placeholder(shape=[], dtype=tf.float32,name='recall')
		f1_tf = tf.placeholder(shape=[], dtype=tf.float32,name='f1')
		with tf.variable_scope('label_classifier-'+str(classifierId)):
			macro_f1_tf = tf.placeholder(shape=[], dtype=tf.float32,name='macro_f1')
	total_prec = 0.0
	total_rec = 0.0
	precision_summary = tf.summary.scalar('precision_summary',precision_tf)
	recall_summary = tf.summary.scalar('recall_summary',recall_tf)
	f1_summary = tf.summary.scalar('f1_summary',f1_tf)
	macro_f1_summary = tf.summary.scalar('macro_f1_summary', macro_f1_tf)
	stat_summary = tf.summary.merge([precision_summary, recall_summary, f1_summary])
	stat_dict={}

	recordLength = len(dataset[id2label(classifierId)])
	num_iters = (recordLength // batch_size) * num_epochs	
	feed_dict={}
	feed_dict[tg.embeddings] = embeddings
	classifier_ops = []
	summaries=[]
	print("Classifier {} will take {} iters".format(classifierId, num_iters))
	for i in range(num_iters):
		batch = batchGen.classifier_next_batch(classifierId)
		op = executeTrainStep(session, classifierId, tg, batch['nodes'], batch['labels'], feed_dict, is_training)
		net_loss = op['net_loss']
		pred = [hotEncodeDistribution(op['classifier_ops'])]

		f1,prec,rec = get_accuracy(pred, [batch])
		total_rec+=rec
		total_prec+=prec
		print("step: {} loss:{} f1:{}".format(i,net_loss,f1))
		summaries = op['summaries_calculated']
		for j in range(len(summaries)):
			train_summary_writer.add_summary(summaries[j], i)
		if summary_frequency > 0 and i%summary_frequency == 0:
			save_loc = saver.save(session, train_model_file, global_step=i)
			valid_test.run_validation(session, tg, 1, feed_dict, [classifierId])
			print(pred,batch['labels'])
	valid_test.run_validation(session, tg, 1, feed_dict, [classifierId])
	avg_prec = total_prec/num_iters
	avg_rec = total_rec/num_iters
	macro_f1 = 2 * avg_prec * avg_rec / (avg_prec + avg_rec + 1e-7)
	macro_f1_summ = session.run([macro_f1_summary], feed_dict={macro_f1_tf:macro_f1})
	train_summary_writer.add_summary(macro_f1_summ[0],0)
Exemplo n.º 4
0
def hotDecode(hotVec):
    s = [id2label(i) for i in range(len(hotVec)) if hotVec[i] >= 0.5]
    return s