Пример #1
0
	def create_datasets(self, db_name, dir_prefix, train_percent=0.6, validation_percent=0.2, test_percent=0.2):
		"""
		Splits into train, test and validation datasets and builds them.
		From the given tegaki database name.
		@precondition(train_percent + validation_percent + test_percent == 1.0)
		"""
		db_file = "unipen_db/" + db_name + ".chardb"
		charcol = CharacterCollection(db_file)

		num_chars = charcol.get_total_n_characters()
		print "total chars", num_chars
		chars = charcol.get_random_characters_gen(num_chars)

		train_size = int(num_chars * train_percent)
		validation_size = int(num_chars * validation_percent)
		if (train_percent + validation_percent + test_percent) == 1.0:
			# all the db is used
			test_size = num_chars - train_size - validation_size
		else:
			# only a fraction of the db is used
			test_size = int(num_chars * test_percent)

		print 'train set size:', train_size
		self._create_dataset(chars, train_size, dir_prefix + '_train_' + str(int(train_percent * 100)) + '.nc')
		print 'validation set size:', validation_size
		if validation_percent != 0.0:
			self._create_dataset(chars, validation_size,
			                     dir_prefix + '_validation_' + str(int(validation_percent * 100)) + '.nc')
		print 'test set size:', test_size
		if test_percent != 0.0:
			self._create_dataset(chars, test_size, dir_prefix + '_test_' + str(int(test_percent * 100)) + '.nc')
Пример #2
0
	def changeDatabase(self):
		db_file = QtGui.QFileDialog.getOpenFileName(self, "Open database",
		                                            QtCore.QDir.currentPath())
		db_file = str(db_file)
		if db_file and os.path.splitext(db_file)[1] == '.chardb':
			charcol = CharacterCollection(db_file);
			print "chars in db:", charcol.get_total_n_characters()
			self.char_gen = charcol.get_random_characters_gen(charcol.get_total_n_characters())
			self.random()
		else:
			self.char_gen = None