def clear_modules_caching(self): Grammar.clear_caching() ConstraintSet.clear_caching() Constraint.clear_caching() Word.clear_caching()
file_log_formatter = logging.Formatter( '%(asctime)s %(levelname)s %(name)s %(message)s', "%Y-%m-%d %H:%M:%S") file_log_handler = logging.FileHandler(log_file_path, mode='w') file_log_handler.setFormatter(file_log_formatter) logger.addHandler(file_log_handler) feature_tables_dir_path = join(dir_name, "tests/fixtures/feature_tables") constraint_sets_dir_path = join(dir_name, "tests/fixtures/constraint_sets") feature_table_file_path = join(feature_tables_dir_path, current_simulation.feature_table_file_name) feature_table = FeatureTable.load(feature_table_file_path) constraint_set_file_path = join( constraint_sets_dir_path, current_simulation.constraint_set_file_name) constraint_set = ConstraintSet.load(constraint_set_file_path) corpus = Corpus(current_simulation.corpus) data = corpus.get_words() max_word_length_in_data = max([len(word) for word in data]) lexicon = Lexicon(data, max_word_length_in_data) grammar = Grammar(constraint_set, lexicon) hypothesis = Hypothesis(grammar, data) if hasattr(current_simulation, "target_energy"): target_energy = current_simulation.target_energy else: target_energy = None
configurations_dict = { "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": True, "DATA_ENCODING_LENGTH_MULTIPLIER": 100, "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1, "CORPUS_DUPLICATION_FACTOR": 1, } configuration = Configuration() configuration.load_configurations_from_dict(configurations_dict) feature_table = FeatureTable.load(get_feature_table_fixture("vowel_harmony_simple_feature_table.json")) print(feature_table) initial_constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_permuted_constraint_set.json")) target_constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_simple_constraint_set.json")) target_stems = ["unu", "uku", "nunu", "kunu", "nuku", "kuku", "ini", "iki", "nini", "kini", "niki", "kiki"] underlying_suffixes = ["kun"] surface_suffixes = ["kun", "kin"] def _get_anchor_vowel(word): if "i" in word: return "i" elif "u" in word: return "u" else:
from lexicon import Lexicon from lexicon import Word from configuration import Configuration from tests.persistence_tools import get_feature_table_fixture, get_constraint_set_fixture from simulations.dag_zook import configurations_dict from constraint import VowelHarmonyConstraint, PhonotacticConstraint from transducer import Transducer from debug_tools import write_to_dot as dot configuration = Configuration() configuration.load_configurations_from_dict(configurations_dict) feature_table = FeatureTable.load(get_feature_table_fixture("vowel_harmony_simple_feature_table.json")) constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_simple_constraint_set.json")) data = ["unu", "uku", "nunu", "kunu", "nuku", "kuku", "ini", "iki", "nini", "kini", "niki", "kiki", "unukun", "ukukun", "nunukun", "kunukun", "nukukun", "kukukun", "inikin", "ikikin", "ninikin", "kinikin", "nikikin", "kikikin"] max_word_length_in_data = max([len(word) for word in data]) lexicon = Lexicon(data, max_word_length_in_data) grammar = Grammar(constraint_set, lexicon) grammar_transducer = grammar.get_transducer() underlying_forms_list = ["unu", "uku", "nunu", "kunu", "nuku", "kuku", "ini", "iki", "nini", "kini", "niki", "kiki", "unukun", "ukukun", "nunukun", "kunukun", "nukukun", "kukukun",
configurations_dict = { "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": True, "DATA_ENCODING_LENGTH_MULTIPLIER": 25, "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1, "CORPUS_DUPLICATION_FACTOR": 1, } configuration = Configuration() configuration.load_configurations_from_dict(configurations_dict) feature_table = FeatureTable.load( get_feature_table_fixture("plural_english_feature_table.json")) initial_constraint_set = ConstraintSet.load( get_constraint_set_fixture( "dag_zook_devoicing_permutations_constraint_set.json")) target_constraint_set = ConstraintSet.load( get_constraint_set_fixture("dag_zook_devoicing_constraint_set.json")) target_stems = ['dag', 'kat', 'dot', 'kod', 'gas', 'toz', 'ata', 'aso'] # suffixes - underlying representation: "zook", "gos" and "dod" (+ surface forms: "sook", "kos" and "tod") target_stems_and_suffixes = [ 'dagdod', 'daggos', 'dagzook', 'kattod', 'katkos', 'katsook', 'dottod', 'dotkos', 'dotsook', 'koddod', 'kodgos', 'kodzook', 'gastod', 'gaskos', 'gassook', 'tozdod', 'tozgos', 'tozzook', 'atadod', 'atagos', 'atazook', 'asodod', 'asogos', 'asozook' ]
from grammar import Grammar from lexicon import Lexicon from lexicon import Word from configuration import Configuration from tests.persistence_tools import get_feature_table_fixture, get_constraint_set_fixture from simulations.dag_zook import configurations_dict from constraint import VowelHarmonyConstraint, PhonotacticConstraint from transducer import Transducer from debug_tools import write_to_dot as dot configuration = Configuration() configuration.load_configurations_from_dict(configurations_dict) feature_table = FeatureTable.load( get_feature_table_fixture("tuvan_feature_table.json")) constraint_set = ConstraintSet.load( get_constraint_set_fixture("tuvan_constraint_set.json")) data = [ 'maslo', 'maslolar', 'buga', 'bugalar', 'ygy', 'ygyler', 'teve', 'teveler', 'orun', 'orunnar', 'sivi', 'siviler', 'ygyner', 'ygybygyler' ] max_word_length_in_data = max([len(word) for word in data]) lexicon = Lexicon(data, max_word_length_in_data) grammar = Grammar(constraint_set, lexicon) grammar_transducer = grammar.get_transducer() underlying_forms_list = [ 'maslo', 'maslolar', 'buga', 'bugalar', 'ygy', 'ygylar', 'teve', 'tevelar', 'orun', 'orunnar', 'sivi', 'sivilar', 'ygynar', 'ygybygylar' ]
from debug_tools import write_to_dot as dot, print_empty_line configurations_dict = { "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": False, "DATA_ENCODING_LENGTH_MULTIPLIER": 25, "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1, "CORPUS_DUPLICATION_FACTOR": 1, } configuration = Configuration() configuration.load_configurations_from_dict(configurations_dict) feature_table = FeatureTable.load( get_feature_table_fixture("abnese_feature_table.json")) faith_constraint_set = ConstraintSet.load( get_constraint_set_fixture("faith_constraint_set.json")) target_constraint_set = ConstraintSet.load( get_constraint_set_fixture("abnese_target_constraint_set.json")) prefix = "aab" target_stems = [ 'baabab', 'babaa', 'babaab', 'bababa', 'bababab', 'baabaab', 'baaabaa', 'babaaaab', 'bababaa', 'babababaa', "aaabab", "ababaa" ] target_prefix_and_stem_concat_only = [(prefix + stem) for stem in target_stems] target_prefix_and_stem = [ word.replace("bb", "bab") for word in target_prefix_and_stem_concat_only ] # apply rule