Пример #1
0
def min_consonant_count_transducer(min_consonant_count=3, add_meta_arc=True):
  """Allows only strings with at least |min_consonant_count| consonants."""
  t = pt.Transducer()
  for i in range(min_consonant_count+1):
    for l in pt.abc.ALL_SYMS:
      t.add_arc(i, i, l, l)
    if i > 0:
      for l in pt.abc.CONSONANTS:
        t.add_arc(i-1, i, l, l)
  t[min_consonant_count].final = True
  if add_meta_arc:
    pt.AddPassThroughArcs(t)
  return t
Пример #2
0
  def ApplyLoanwords(self, ar_vocab_groups, loanwords_transducer,
                     sw_pre_transducer, add_meta_arc, with_syllabification):
    time_a = time.time()
    sw_word_transducer = pt.UnionLinearChains(self.sw_pron_list)
    if add_meta_arc:
      pt.AddPassThroughArcs(sw_word_transducer)
    if with_syllabification:
      pt.AddSyllabificationArcs(sw_word_transducer)
    sw_word_transducer.arc_sort_input()
    time_sw = time.time()
    print("    building SW transducer took:", time_sw-time_a, "sec")

    ar_transducer = pt.UnionLinearChains(self.ar_word_list)
    ar_transducer.arc_sort_output()
    time_b = time.time()
    print("    building AR transducer took:", time_b-time_sw, "sec")

    print("  sw_pre_transducer")
    sw_vocab = sw_pre_transducer >> sw_word_transducer
    sw_vocab.arc_sort_input()
    time_c = time.time()
    print("    applying sw_pre_transducer took:", time_c-time_b, "sec")

    print("  loanwords")
    combined = loanwords_transducer >> sw_vocab
    combined.arc_sort_input()
    time_d = time.time()
    print("    applying loanwords took:", time_d-time_c, "sec")

    print("  ar_vocab")
    self.t_all = pt.Transducer()
    for ar_vocab in ar_vocab_groups:
      print(".", sep="", end="")
      sys.stdout.flush()
      self.t_all.set_union(ar_vocab >> combined)
    print()
    self.t_all.arc_sort_input()
    time_e = time.time()
    print("    ar_vocab >> combined took:", time_e-time_d, "sec")

    print("  t_correct")
    self.t_correct = ar_transducer >> self.t_all
    self.t_correct.arc_sort_output()
    self.t_all.arc_sort_output()
    time_g = time.time()
    print("    building t_correct took:", time_g-time_e, "sec")
    print("    total ApplyLoanwords took:", time_g-time_a, "sec")
Пример #3
0
def ar_morphology_transducer(add_meta_arc=True, with_syllabification=False):
    """Removes one AR prefix and one suffix (optionally)."""
    rule_name = "<<IT_MORPH>>"
    if add_meta_arc:
        operation_weight = None
    else:
        operation_weight = pt.abc.OT_CONSTRAINTS[rule_name]
    t = strip_transducer(morphemes.AR_PREFIXES, operation_weight, add_meta_arc,
                         rule_name)
    t.concatenate(pt.accept_all_transducer())
    t.concatenate(
        strip_transducer(morphemes.AR_SUFFIXES, operation_weight, add_meta_arc,
                         rule_name))
    if add_meta_arc:
        pt.AddPassThroughArcs(t)
    if with_syllabification:
        pt.AddSyllabificationArcs(t)
    #t.arc_sort_input()
    return t
Пример #4
0
def sw_morphology_transducer(add_meta_arc=True, with_syllabification=False):
    """Appends SW prefixes and suffixes (optionally)."""
    rule_name = "<<MT_MORPH>>"
    if add_meta_arc:
        operation_weight = None
    else:
        operation_weight = pt.abc.OT_CONSTRAINTS[rule_name]
    t = append_transducer(morphemes.SW_PREFIXES, operation_weight,
                          add_meta_arc, rule_name)
    #t.concatenate(append_transducer(morphemes.SW_PREFIXES, operation_weight, add_meta_arc, rule_name))
    t.concatenate(pt.accept_all_transducer())
    t.concatenate(
        append_transducer(morphemes.SW_SUFFIXES, operation_weight,
                          add_meta_arc, rule_name))
    if add_meta_arc:
        pt.AddPassThroughArcs(t)
    if with_syllabification:
        pt.AddSyllabificationArcs(t)
    #t.arc_sort_output()
    return t
Пример #5
0
def vowel_deletion_transducer(add_meta_arc=True):
  """Deletion of vowels."""
  t = pt.Transducer()
  for l in pt.abc.ALL_SYMS:
    t.add_arc(0, 0, l, l)
  next_node = 1
  for l in pt.abc.VOWELS:
    t.add_arc(0, next_node, l, pt.abc.EPSILON)

    rule_name = "<<MAX-V>>"
    if add_meta_arc:
      t.add_arc(next_node, 0, pt.abc.EPSILON, rule_name)
    else:
      t.add_arc(next_node, 0, pt.abc.EPSILON, pt.abc.EPSILON, pt.abc.OT_CONSTRAINTS[rule_name])
    next_node += 1

  t[0].final = True
  if add_meta_arc:
    pt.AddPassThroughArcs(t)
  return t
Пример #6
0
def AssertReachable(ar_str, sw_str, ar_post_transducer, loanwords_transducer,
                    sw_pre_transducer, add_meta_arc=True, with_syllabification=False):
  print("Constructing input transducer for word:", ar_str)
  in_t = pt.linear_chain(ar_str)
  in_t = in_t >> ar_post_transducer

  print("Constructing output transducer for word:", sw_str)
  out_t = pt.linear_chain(sw_str)
  if add_meta_arc:
    pt.AddPassThroughArcs(out_t)
  if with_syllabification:
    pt.AddSyllabificationArcs(out_t)
  out_t = sw_pre_transducer >> out_t
  if add_meta_arc:
    out_t = out_t >> pt.weights_transducer()

  out_t.arc_sort_input()
  in_t.arc_sort_output()
  print("Combining loanwords with output")
  test_t1 = loanwords_transducer >> out_t
  if len(test_t1) == 0:
    print("NOT REACHABLE: loanwords_transducer >> out_t")   

  print("Testing input with loanwords")
  test_t3 = in_t >> loanwords_transducer
  if len(test_t3) == 0:
    print("NOT REACHABLE: in_t >> loanwords_transducer")

  print("Combining with input")
  test_t2 = in_t >> test_t1
  if len(test_t2) == 0:
    print("NOT REACHABLE: in_t >> test_t1")
  else:
    print("Reachable!")

  print("Printing full paths")
  pt.PrintFullPaths(test_t2, num_shortest=1)
  print("AssertReachable done.")