Пример #1
0
 def get(self):
   user = users.get_current_user()
   if not user:
     self.redirect(users.create_login_url(dest_url="/"))
     return
   q = WCDataSet.all()
   q.order('-start')
   results = q.fetch(1000)
   datasets = [result for result in results]
   datasets_len = len(datasets)
   q = Record.all()
   q.filter('benchmark =', "wordcount")
   q.order('-start')
   results = q.fetch(1000) 
   records = [result for result in results]
   records_len = len(records)
   self.response.out.write(template.render("templates/wordcount.html",
                                           {"user": user.email(),
                                            "datasets_len" : datasets_len,
                                            "datasets" : datasets,
                                            "records": records,
                                            "records_len" : records_len}))
Пример #2
0
  def post(self):
    """ Generate data sets here """
    if self.request.get("fsm_cleanup"):
      if fsm_calculate_run_time():
        self.redirect('/wc')
      else:
        self.response.out.write("Error calculating fsm/wordcount")
      return 
 
    if self.request.get("compute"):
      engine = self.request.get("engine")
      dataset = self.request.get("dataset")
      user = self.request.get('user')
      data = WCDataSet.get_by_key_name(dataset)
      
      record = Record(engine_type=engine, 
                      dataset=dataset,
                      benchmark="wordcount",
                      num_entities=data.num_entries,
                      #shard_count=data.num_pipelines,
                      entries_per_pipe=data.entries_per_pipe,
                      user=user,
                      char_per_word=data.char_per_word,
                      state="Running")
      if engine == "fsm":
        record.put()
        context = {}
        context['user'] = str(user)
        context['num_entries'] = int(data.num_entries)
        fsm.startStateMachine('WordCount', [context])
        self.redirect('/wc')
      elif engine == "pipeline":
        mypipeline = WordCountPipelineLoop(data.num_entries)
        mypipeline.start()
        record.pipeline_id = mypipeline.pipeline_id
        record.put()
        self.redirect('/wc')
        #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id)
        logging.info("wordcount job running")
      elif engine == "mr":
        # Why 1k each per shard or less? is this ideal?
        shards = 10
        if data.num_entries > 1000:
          shards = data.num_entries/1000
        kind = getKindString(data.num_entries)
        mapreduce_id = control.start_map(
            name="Wordcount with just mappers",
            handler_spec="wordcount.mr.wordcount_mapper",
            reader_spec="mapreduce.input_readers.DatastoreInputReader",
            mapper_parameters={
                "entity_kind": "data.wordcount."+kind,
                "processing_rate": 500
            },
            mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                       '/wc/mr/callback'},
            shard_count=shards,
            queue_name="default",
          )

        record.mr_id = mapreduce_id
        record.put()
        self.redirect('/wc')