Exemplo n.º 1
0
    avg_run_time = 1.0*sum(run_times.values())/len(run_times)
    std_run_time = scipy.array(run_times.values()).std()
    sys.stderr.write('\nTiming results\n')
    sys.stderr.write('Mapper time:  total [%2.2fs]  min [%1.2fs]  max [%1.2fs]\n' %(sum(map_times.values()), min(map_times.values()), max(map_times.values())))
    sys.stderr.write('Run time:     total [%2.2fs]  min [%1.2fs]  max [%1.2fs] avg [%1.4f] std [%1.4f]\n' %(sum(run_times.values()), min(run_times.values()), max(run_times.values()), avg_run_time, std_run_time))
    sys.stderr.write('------\n')    


if __name__ == '__main__':
    
    options, task = parse_options()

    ## create SummaryProblem instances
    setup_start_time = time.time()
    if options.task == 'u08': framework.setup_TAC08(task)
    else: framework.setup_DUC_basic(task, skip_updates=False)

    ## only run the parser if compression is required (this is not known by the pickle stuff)
    parser = None
    if options.compress:
        parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD)
    framework.setup_DUC_sentences(task, parser, reload=options.reload)

    setup_time = time.time() - setup_start_time

    ## go!
    run_standard(options)
    sys.stderr.write('Setup time [%1.2fs]\n' %setup_time)    

    ## evaluate
    if not options.manpath:
Exemplo n.º 2
0
  bigram_path = 'dat/%s/features' % (task_name)
  task = Task(task_name, topic_file, doc_path, man_path)

  # Get documents, split into sentences, tokenize and stem
  if args.load is not None:
    start_time = time.time()
    sys.stderr.write('Loading [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    task.problems = util.load_pickle(args.load)
    sys.stderr.write('Done [%.2f s]\n' % (time.time() - start_time))
  else:
    text.text_processor.load_splitta_model('lib/splitta/model_nb/')
    # Skip update data
    if task_name[:3] == 'tac':
      framework.setup_TAC08(task, True)
    elif task_name[:3] == 'duc':
      framework.setup_DUC_basic(task, True)
    elif task_name[:3] == 'new':
      framework.setup_news(task)
    else:
      raise Exception('Unknown task %s' % task)
    if task_name[:3] != 'new':
      for problem in task.problems:
        problem.load_documents()
    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)

  # Tokenize for parser
  tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer()

  for problem in task.problems:
Exemplo n.º 3
0
        task.data_pickle = '%s/%s_data.pickle' % (options.dataroot, task.name)
        task.punkt_pickle = '%s/%s_punkt.pickle' % (options.dataroot,
                                                    task.name)

    return options, task


if __name__ == '__main__':

    options, task = parse_options()

    ## create SummaryProblem instances
    if options.task == 'u08':
        framework.setup_TAC08(task)
    else:
        framework.setup_DUC_basic(task)

    # only run the parser if compression is required (this is not known by the pickle stuff)
    parser = None
    if options.compress:
        parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD)
    framework.setup_DUC_sentences(task, parser, reload=options.reload)

    #for problem in task.problems:
    #    for sentence in problem.get_new_sentences():
    #        print sentence.parsed
    #sys.exit(0)

    ## create output directory
    try:
        os.popen('rm -rf %s' % options.output)
Exemplo n.º 4
0
    if options.dataroot:
        os.popen("mkdir -p " + options.dataroot)
        task.data_pickle = '%s/%s_data.pickle' %(options.dataroot, task.name)
        task.punkt_pickle = '%s/%s_punkt.pickle' %(options.dataroot, task.name)

    return options, task

if __name__ == '__main__':
    
    options, task = parse_options()

    ## create SummaryProblem instances
    if options.task == 'u08':
        framework.setup_TAC08(task)
    else:
        framework.setup_DUC_basic(task)

    # only run the parser if compression is required (this is not known by the pickle stuff)
    parser = None
    if options.compress:
        parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD)
    framework.setup_DUC_sentences(task, parser, reload=options.reload)

    #for problem in task.problems:
    #    for sentence in problem.get_new_sentences():
    #        print sentence.parsed
    #sys.exit(0)

    ## create output directory
    try: os.popen('rm -rf %s' %options.output)
    except: pass