def clear_modules_caching(self):
     Grammar.clear_caching()
     ConstraintSet.clear_caching()
     Constraint.clear_caching()
     Word.clear_caching()
    file_log_formatter = logging.Formatter(
        '%(asctime)s %(levelname)s %(name)s %(message)s', "%Y-%m-%d %H:%M:%S")
    file_log_handler = logging.FileHandler(log_file_path, mode='w')
    file_log_handler.setFormatter(file_log_formatter)
    logger.addHandler(file_log_handler)

    feature_tables_dir_path = join(dir_name, "tests/fixtures/feature_tables")
    constraint_sets_dir_path = join(dir_name, "tests/fixtures/constraint_sets")

    feature_table_file_path = join(feature_tables_dir_path,
                                   current_simulation.feature_table_file_name)
    feature_table = FeatureTable.load(feature_table_file_path)

    constraint_set_file_path = join(
        constraint_sets_dir_path, current_simulation.constraint_set_file_name)
    constraint_set = ConstraintSet.load(constraint_set_file_path)

    corpus = Corpus(current_simulation.corpus)

    data = corpus.get_words()
    max_word_length_in_data = max([len(word) for word in data])
    lexicon = Lexicon(data, max_word_length_in_data)

    grammar = Grammar(constraint_set, lexicon)
    hypothesis = Hypothesis(grammar, data)

    if hasattr(current_simulation, "target_energy"):
        target_energy = current_simulation.target_energy
    else:
        target_energy = None
示例#3
0
configurations_dict = {
    "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": True,
    "DATA_ENCODING_LENGTH_MULTIPLIER": 100,
    "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1,
    "CORPUS_DUPLICATION_FACTOR": 1,

}

configuration = Configuration()
configuration.load_configurations_from_dict(configurations_dict)

feature_table = FeatureTable.load(get_feature_table_fixture("vowel_harmony_simple_feature_table.json"))
print(feature_table)

initial_constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_permuted_constraint_set.json"))
target_constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_simple_constraint_set.json"))

target_stems = ["unu", "uku", "nunu", "kunu", "nuku", "kuku",
                "ini", "iki", "nini", "kini", "niki", "kiki"]

underlying_suffixes = ["kun"]
surface_suffixes = ["kun", "kin"]


def _get_anchor_vowel(word):
    if "i" in word:
        return "i"
    elif "u" in word:
        return "u"
    else:
示例#4
0
from lexicon import Lexicon
from lexicon import Word
from configuration import Configuration
from tests.persistence_tools import get_feature_table_fixture, get_constraint_set_fixture
from simulations.dag_zook import configurations_dict
from constraint import VowelHarmonyConstraint, PhonotacticConstraint
from transducer import Transducer
from debug_tools import write_to_dot as dot

configuration = Configuration()
configuration.load_configurations_from_dict(configurations_dict)



feature_table = FeatureTable.load(get_feature_table_fixture("vowel_harmony_simple_feature_table.json"))
constraint_set = ConstraintSet.load(get_constraint_set_fixture("vowel_harmony_simple_constraint_set.json"))

data = ["unu", "uku", "nunu", "kunu", "nuku", "kuku",
        "ini", "iki", "nini", "kini", "niki", "kiki",
        "unukun", "ukukun", "nunukun", "kunukun", "nukukun", "kukukun",
        "inikin", "ikikin", "ninikin", "kinikin", "nikikin", "kikikin"]
max_word_length_in_data = max([len(word) for word in data])
lexicon = Lexicon(data, max_word_length_in_data)

grammar = Grammar(constraint_set, lexicon)
grammar_transducer = grammar.get_transducer()


underlying_forms_list = ["unu", "uku", "nunu", "kunu", "nuku", "kuku",
        "ini", "iki", "nini", "kini", "niki", "kiki",
        "unukun", "ukukun", "nunukun", "kunukun", "nukukun", "kukukun",
configurations_dict = {
    "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": True,
    "DATA_ENCODING_LENGTH_MULTIPLIER": 25,
    "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1,
    "CORPUS_DUPLICATION_FACTOR": 1,
}

configuration = Configuration()
configuration.load_configurations_from_dict(configurations_dict)

feature_table = FeatureTable.load(
    get_feature_table_fixture("plural_english_feature_table.json"))

initial_constraint_set = ConstraintSet.load(
    get_constraint_set_fixture(
        "dag_zook_devoicing_permutations_constraint_set.json"))
target_constraint_set = ConstraintSet.load(
    get_constraint_set_fixture("dag_zook_devoicing_constraint_set.json"))

target_stems = ['dag', 'kat', 'dot', 'kod', 'gas', 'toz', 'ata', 'aso']

# suffixes -  underlying representation: "zook", "gos" and "dod" (+ surface forms: "sook", "kos" and "tod")

target_stems_and_suffixes = [
    'dagdod', 'daggos', 'dagzook', 'kattod', 'katkos', 'katsook', 'dottod',
    'dotkos', 'dotsook', 'koddod', 'kodgos', 'kodzook', 'gastod', 'gaskos',
    'gassook', 'tozdod', 'tozgos', 'tozzook', 'atadod', 'atagos', 'atazook',
    'asodod', 'asogos', 'asozook'
]
示例#6
0
from grammar import Grammar
from lexicon import Lexicon
from lexicon import Word
from configuration import Configuration
from tests.persistence_tools import get_feature_table_fixture, get_constraint_set_fixture
from simulations.dag_zook import configurations_dict
from constraint import VowelHarmonyConstraint, PhonotacticConstraint
from transducer import Transducer
from debug_tools import write_to_dot as dot

configuration = Configuration()
configuration.load_configurations_from_dict(configurations_dict)

feature_table = FeatureTable.load(
    get_feature_table_fixture("tuvan_feature_table.json"))
constraint_set = ConstraintSet.load(
    get_constraint_set_fixture("tuvan_constraint_set.json"))

data = [
    'maslo', 'maslolar', 'buga', 'bugalar', 'ygy', 'ygyler', 'teve', 'teveler',
    'orun', 'orunnar', 'sivi', 'siviler', 'ygyner', 'ygybygyler'
]
max_word_length_in_data = max([len(word) for word in data])
lexicon = Lexicon(data, max_word_length_in_data)

grammar = Grammar(constraint_set, lexicon)
grammar_transducer = grammar.get_transducer()

underlying_forms_list = [
    'maslo', 'maslolar', 'buga', 'bugalar', 'ygy', 'ygylar', 'teve', 'tevelar',
    'orun', 'orunnar', 'sivi', 'sivilar', 'ygynar', 'ygybygylar'
]
from debug_tools import write_to_dot as dot, print_empty_line

configurations_dict = {
    "ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS": False,
    "DATA_ENCODING_LENGTH_MULTIPLIER": 25,
    "GRAMMAR_ENCODING_LENGTH_MULTIPLIER": 1,
    "CORPUS_DUPLICATION_FACTOR": 1,
}

configuration = Configuration()
configuration.load_configurations_from_dict(configurations_dict)

feature_table = FeatureTable.load(
    get_feature_table_fixture("abnese_feature_table.json"))

faith_constraint_set = ConstraintSet.load(
    get_constraint_set_fixture("faith_constraint_set.json"))
target_constraint_set = ConstraintSet.load(
    get_constraint_set_fixture("abnese_target_constraint_set.json"))

prefix = "aab"

target_stems = [
    'baabab', 'babaa', 'babaab', 'bababa', 'bababab', 'baabaab', 'baaabaa',
    'babaaaab', 'bababaa', 'babababaa', "aaabab", "ababaa"
]

target_prefix_and_stem_concat_only = [(prefix + stem) for stem in target_stems]
target_prefix_and_stem = [
    word.replace("bb", "bab") for word in target_prefix_and_stem_concat_only
]  # apply rule