def create_datasets(self, db_name, dir_prefix, train_percent=0.6, validation_percent=0.2, test_percent=0.2): """ Splits into train, test and validation datasets and builds them. From the given tegaki database name. @precondition(train_percent + validation_percent + test_percent == 1.0) """ db_file = "unipen_db/" + db_name + ".chardb" charcol = CharacterCollection(db_file) num_chars = charcol.get_total_n_characters() print "total chars", num_chars chars = charcol.get_random_characters_gen(num_chars) train_size = int(num_chars * train_percent) validation_size = int(num_chars * validation_percent) if (train_percent + validation_percent + test_percent) == 1.0: # all the db is used test_size = num_chars - train_size - validation_size else: # only a fraction of the db is used test_size = int(num_chars * test_percent) print 'train set size:', train_size self._create_dataset(chars, train_size, dir_prefix + '_train_' + str(int(train_percent * 100)) + '.nc') print 'validation set size:', validation_size if validation_percent != 0.0: self._create_dataset(chars, validation_size, dir_prefix + '_validation_' + str(int(validation_percent * 100)) + '.nc') print 'test set size:', test_size if test_percent != 0.0: self._create_dataset(chars, test_size, dir_prefix + '_test_' + str(int(test_percent * 100)) + '.nc')
def changeDatabase(self): db_file = QtGui.QFileDialog.getOpenFileName(self, "Open database", QtCore.QDir.currentPath()) db_file = str(db_file) if db_file and os.path.splitext(db_file)[1] == '.chardb': charcol = CharacterCollection(db_file); print "chars in db:", charcol.get_total_n_characters() self.char_gen = charcol.get_random_characters_gen(charcol.get_total_n_characters()) self.random() else: self.char_gen = None