Пример #1
0
def score(s, produced):
  global counter
  global cache
  #if counter == 20:
  #  exit()
  print "score called"
  print s
  counter += 1
  subexps = sexp.subexps(s)
  #if s in subexps:
  #  subexps.remove(s)
  #print subexps
  splits = sum([sexp.split(s, subexp) for subexp in subexps], [])
  #print splits
  #for split in splits:
  #  print split
  #print
  if not splits:
    #print "hit bottom"
    #print
    #print 1
    return 1
  #print
  print sexp.pretty_lambda(s)
  for x in splits:
    print "  ", sexp.pretty_lambda(x[0]), "/", sexp.pretty_lambda(x[1])
  print
  scr = max([score_one_split(s, x[0], x[1], produced) for x in splits])
  #print scr
  return scr
Пример #2
0
def random_derivation(s, depth = 0, category = 'S', productions_above = []):
  global lexicon

  #for i in range(depth):
  #  print ' ',
  key = sexp.pretty_lambda(s)
  #print key

  if key in lexicon:
    #print lexicon[key]
    options = [l for l in lexicon[key] if l[0].replace('/', '|').replace('\\',
    '|') == category]
    if len(options) == 0:
      return False
    picked = random.sample(options, 1)[0]
    return [key, picked[0], picked[1]]

  if sexp.totally_vacuous(s):
    return False

  if depth > 3:
    return False

  splits = sum([sexp.split(s, sub) for sub in sexp.subexps(s)], [])
  if not splits:
    return False
  random.shuffle(splits)
  for split in splits:
    #print sexp.pretty_lambda(split[0]) + " : " + sexp.pretty_lambda(s[1])
    #print
    #print '\n'.join([sexp.pretty_lambda(s[0]) + " : " + sexp.pretty_lambda(s[1]) for s in productions_above])
    #print
    #print split in productions_above
    #print
    #print


    f = split[0]
    g = split[1]
    fcat = catf(f)
    if '|' in fcat:
      fcat2 = '(%s)' % fcat
    else:
      fcat2 = fcat
    gcat = '%s|%s' % (category, fcat2)
    #print category, fcat, gcat

    #print f, g
    #exit()

    d1 = random_derivation(split[0], depth+1, fcat, productions_above + [split])
    d2 = random_derivation(split[1], depth+1, gcat, productions_above + [split])
    if d1 and d2:
      return [key, category, d1, d2]
  return False
Пример #3
0
def best_derivation(sent, category, cky=None, depth=0):

  global counter
  global cache
  global lexicon

  if cky == None:
    cky = []

  lkey = sexp.pretty_lambda(sent)
  key = lkey + ' ' + category

  #if key in cache:
  #  return cache[key]

  counter += 1

  if lkey in lexicon:
    terminals = all_lex_entries(lkey, category)
    scored = [(terminal, lm_score(terminal, cky)) for terminal in terminals]
    if terminals:
      r = {'key': key,
          'scored': scored}
    #terminal = choose_lex_entry(lkey, category)
    #if terminal:
    #  r = {'key': key,
    #       'score': 1,
    #       'terminal': terminal}
    else:
      r = False
    cache[key] = r
    return r

  if sexp.totally_vacuous(sent):
    r = False
    cache[key] = r
    return r

  if depth == 3:
    r = False
    return r

  subs = sexp.subexps(sent)
  splits = sum((sexp.split(sent, sub) for sub in subs), [])

  scores = []
  for split in splits:
    ncky = list(cky)
    (fcat, gcat) = make_categories(split, category)
    left = best_derivation(split[0], fcat, ncky, depth+1)
    if not left:
      continue
    right = best_derivation(split[1], gcat, ncky, depth+1)
    if not right:
      continue
    sc = left['score'] + right['score'] + split_potential(sent, split)
    scores.append({'key': key,
                   'score': sc,
                   'left': left,
                   'right': right})

  if not scores:
    return False
  r = max(scores, key=lambda x: x['score'])
  cache[key] = r
  return r