예제 #1
0
def getRealCosts():
  global _realUnigramCost, _realBigramCost, _possibleFills

  if _realUnigramCost is None:
    print(f'Training language cost functions [corpus: {CORPUS}]... ', end = '')

    _realUnigramCost, _realBigramCost = wordsegUtil.makeLanguageModels(CORPUS)
    _possibleFills = wordsegUtil.makeInverseRemovalDictionary(CORPUS, 'aeiou')

    print('Done!')
    print('')

  return _realUnigramCost, _realBigramCost, _possibleFills
예제 #2
0
def getRealCosts():
    global _realUnigramCost, _realBigramCost, _possibleFills

    if _realUnigramCost is None:
        sys.stdout.write('Training language cost functions [corpus: %s]... ' % CORPUS)
        sys.stdout.flush()

        _realUnigramCost, _realBigramCost = wordsegUtil.makeLanguageModels(CORPUS)
        _possibleFills = wordsegUtil.makeInverseRemovalDictionary(CORPUS, 'aeiou')

        print 'Done!'
        print ''

    return _realUnigramCost, _realBigramCost, _possibleFills
예제 #3
0
def getRealCosts():
    global _realUnigramCost, _realBigramCost, _possibleFills

    if _realUnigramCost is None:
        sys.stdout.write('Training language cost functions [corpus: %s]... ' % CORPUS)
        sys.stdout.flush()

        _realUnigramCost, _realBigramCost = wordsegUtil.makeLanguageModels(CORPUS)
        _possibleFills = wordsegUtil.makeInverseRemovalDictionary(CORPUS, 'aeiou')

        print('Done!')
        print('')

    return _realUnigramCost, _realBigramCost, _possibleFills
예제 #4
0
def main():
    args = parseArgs()
    if args.model and args.model not in ['seg', 'ins', 'both']:
        print(('Unrecognized model:', args.model))
        sys.exit(1)

    corpus = args.text_corpus or 'leo-will.txt'

    sys.stdout.write('Training language cost functions [corpus: %s]... ' % corpus)
    sys.stdout.flush()

    unigramCost, bigramCost = wordsegUtil.makeLanguageModels(corpus)
    possibleFills = wordsegUtil.makeInverseRemovalDictionary(corpus, 'aeiou')

    print('Done!')
    print('')

    repl(unigramCost, bigramCost, possibleFills, command=args.model)
예제 #5
0
def main():
    args = parseArgs()
    if args.model and args.model not in ['seg', 'ins', 'both']:
        print 'Unrecognized model:', args.model
        sys.exit(1)

    corpus = args.text_corpus or 'leo-will.txt'

    sys.stdout.write('Training language cost functions [corpus: %s]... ' % corpus)
    sys.stdout.flush()

    unigramCost, bigramCost = wordsegUtil.makeLanguageModels(corpus)
    possibleFills = wordsegUtil.makeInverseRemovalDictionary(corpus, 'aeiou')

    print 'Done!'
    print ''

    repl(unigramCost, bigramCost, possibleFills, command=args.model)
예제 #6
0
        self.query = query
        self.bigramCost = bigramCost
        self.possibleFills = possibleFills

    def start_state(self):
        # position before which text is reconstructed & previous word
        return 0, wordsegUtil.SENTENCE_BEGIN

    def is_end(self, state):
        return state[0] == len(self.query)

    def succ_and_cost(self, state):
        raise NotImplementedError


unigramCost, bigramCost = wordsegUtil.makeLanguageModels('leo-will.txt')
smoothCost = wordsegUtil.smoothUnigramAndBigram(unigramCost, bigramCost, 0.2)
possibleFills = wordsegUtil.makeInverseRemovalDictionary(
    'leo-will.txt', 'aeiou')
problem = JointSegmentationInsertionProblem('mgnllthppl', smoothCost,
                                            possibleFills)

import dynamic_programming_search
dps = dynamic_programming_search.DynamicProgrammingSearch(verbose=1)
# dps = dynamic_programming_search.DynamicProgrammingSearch(memory_use=False, verbose=1)
# print(dps.solve(problem))

import uniform_cost_search
ucs = uniform_cost_search.UniformCostSearch(verbose=0)
print(ucs.solve(problem))