def make_character_datasets_and_id_maps(self, sent_sets, id_maps, text_or_pickle,
                                            add_word_end_marker=True, add_word_start_marker=False,
                                            add_sentence_end_marker=False, add_sentence_start_marker=False):

        print('PennTreeBankParser: Making character level datasets')
        char_sent_sets = []
        char_id_maps = []

        for i in range(len(sent_sets)):
            char_map = SubwordMap(self.special_list)
            char_id_maps.append( char_map.convert_to_characters(id_maps[i]) )
            char_sents = self.convert_sents_to_subwords(sent_sets[i], id_maps[i], char_id_maps[i], char_map,
                                                                  add_word_end_marker, add_word_start_marker)
            self.subword_sort(char_sents, char_id_maps[i])
            char_sent_sets.append(char_sents)
            char_id_maps[i].print_IDs_range(range(20))
        self.save_sets(char_sent_sets, char_id_maps, self.data_out_char_level, text_or_pickle)
    def make_morphological_datasets_and_id_maps(self, sent_sets, id_maps, text_or_pickle,
                                                add_word_end_marker=True, add_word_start_marker=False,
                                                add_sentence_end_marker=False, add_sentence_start_marker=False,
                                                morph_train_params={'count_func':'log'}):

        print('PennTreeBankParser: Making morpholigical level datasets')
        morph_sent_sets = []
        morph_id_maps = []

        morph_map = SubwordMap(self.special_list)
        morph_map.train_morph_parser(id_maps[0])

        for i in range(len(sent_sets)):
            morph_id_maps.append( morph_map.convert_to_morphemes(id_maps[i], train_params = morph_train_params))
            morph_sents = self.convert_sents_to_subwords(sent_sets[i], id_maps[i], morph_id_maps[i], morph_map,
                                                         add_word_end_marker, add_word_start_marker)
            self.subword_sort(morph_sents, morph_id_maps[i])
            morph_sent_sets.append(morph_sents)
        self.save_sets(morph_sent_sets, morph_id_maps, self.data_out_morph_level, text_or_pickle)