Пример #1
0
  ('nltkword_unigram',),
  feats_content,
  feats_content + ['headingord', 'headingvec'],
  feats_local,
  ['headingord', 'headingvec'] + feats_struct,
  feats_struct,
  feats_local + feats_prev,
  feats_local + feats_post,
  feats_local + feats_window,
  feats_local + feats_prev + feats_post,
  feats_local + feats_prev + feats_post + feats_window,
]

features = [ tuple(sorted(x)) for x in features ]
  
if __name__ == "__main__":
  fallback = Store('store/features.h5' )
  store = Store.from_caller(fallback=fallback)

  learner = repeat.RepeatLearner(liblinear.liblinearL(svm_type=0, output_probability=True))
  ds = ALTA2012Full()

  proxy = StackingProxy(ds, learner, store=store)
  proxy.class_space = 'ebmcat'
  for feats in features:
    print "DOING:", len(feats), feats
    proxy.feature_spaces = feats
    e = Experiment(proxy, learner)
    proxy.store.new_TaskSetResult(e)

Пример #2
0
feats_position = ['positionabs','positionrel','positionrelbyte']


core = ( 'nltkword_unigram', 'treetaggerpos_bigram', 'treetaggerlemmapos_bigram', )
core += ('headingprev', 'headingvec', 'positionrel')
struct_best = core + ('bowpost1', 'bowprev3', 'headingpost', 'isstructured', 'sentlenrel',)
unstruct_best = core + ('bowprev','treetaggerpos_trigram','ttbprev')

feats_all  = tuple(sorted(set(sum(map(tuple, [struct_best, unstruct_best, feats_bow, feats_pos, feats_struct, feats_heading, feats_position]),tuple()))))

datasets = [
  ALTA2012Full(),
  ]

if __name__ == "__main__":
  store = Store.from_caller()

  for ds in datasets:
    proxy = DataProxy(ds, store=store)
    proxy.inducer.process(proxy.dataset, 
      fms=feats_all,
      cms=['ebmcat',], 
      sqs=['abstract',],
    )

    proxy.tokenstream_name = 'treetaggerlemmapos'
    proxy.tokenize(ext.bigram)

    proxy.tokenstream_name = 'treetaggerpos'
    proxy.tokenize(ext.bigram)
    proxy.tokenize(ext.trigram)