def testSuccessfulRun(self):
    p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    self.assertTrue(request)
    self.assertTrue(request.shuffle_name().startswith("testjob-"))
    self.assertEquals(2, len(request.input_list()))
    self.assertEquals(1, request.input(0).format())
    self.assertEquals("file1", request.input(0).path())
    self.assertEquals(1, request.input(1).format())
    self.assertEquals("file2", request.input(1).path())
    self.assertEquals(2, len(request.output().path_list()))

    callback = request.callback()
    self.assertTrue(callback.url().startswith(
        "/mapreduce/pipeline/callback?pipeline_id="))
    self.assertEquals(self.version_id, callback.app_version_id())
    self.assertEquals("GET", callback.method())
    self.assertEquals("default", callback.queue())

    callback_task = {
        "url": callback.url(),
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertTrue(p.has_finalized)
    output_files = p.outputs.default.value
    self.assertEquals(2, len(output_files))
    self.assertTrue(output_files[0].startswith("/blobstore/"))
    self.assertTrue(output_files[1].startswith("/blobstore/"))
    def testSuccessfulRun(self):
        p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        self.assertTrue(request)
        self.assertTrue(request.shuffle_name().startswith("testjob-"))
        self.assertEquals(2, len(request.input_list()))
        self.assertEquals(1, request.input(0).format())
        self.assertEquals("file1", request.input(0).path())
        self.assertEquals(1, request.input(1).format())
        self.assertEquals("file2", request.input(1).path())
        self.assertEquals(2, len(request.output().path_list()))

        callback = request.callback()
        self.assertTrue(callback.url().startswith(
            "/mapreduce/pipeline/callback?pipeline_id="))
        self.assertEquals(self.version_id, callback.app_version_id())
        self.assertEquals("GET", callback.method())
        self.assertEquals("default", callback.queue())

        callback_task = {
            "url": callback.url(),
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertTrue(p.has_finalized)
        output_files = p.outputs.default.value
        self.assertEquals(2, len(output_files))
        self.assertTrue(output_files[0].startswith("/blobstore/"))
        self.assertTrue(output_files[1].startswith("/blobstore/"))
Пример #3
0
  def testNoInputFile(self):
    p = shuffler._ShuffleServicePipeline("testjob", [])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    # No shuffle request.
    request = self.file_service.shuffle_request
    self.assertEqual(None, request)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertEqual([], p.outputs.default.value)
    def testNoInputFile(self):
        p = shuffler._ShuffleServicePipeline("testjob", [])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        # No shuffle request.
        request = self.file_service.shuffle_request
        self.assertEqual(None, request)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertEqual([], p.outputs.default.value)
Пример #5
0
  def testSuccessfulRun_CallbackOnDefaultVersion(self):
    os.environ["CURRENT_MODULE_ID"] = "default"
    input_file1 = self._CreateInputFile()
    input_file2 = self._CreateInputFile()
    p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    callback = request.callback()
    self.assertTrue(callback.url().startswith(
        "/mapreduce/pipeline/callback?pipeline_id="))
    self.assertEquals(self.major_version_id, callback.app_version_id())
    self.assertEquals("GET", callback.method())
    self.assertEquals("default", callback.queue())
Пример #6
0
  def testNoData(self):
    input_file = files.blobstore.create()
    files.finalize(input_file)
    input_file = files.blobstore.get_file_name(
        files.blobstore.get_blob_key(input_file))
    p = shuffler._ShuffleServicePipeline("testjob", [input_file, input_file])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    # No shuffle request.
    request = self.file_service.shuffle_request
    self.assertEqual(None, request)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertEqual([], p.outputs.default.value)
  def run(self, job_name, shuffler_params, filenames, shards=None):
    if files.shuffler.available():
      yield _ShuffleServicePipeline(job_name, filenames)
    else:
      hashed_files = yield _HashGSPipeline(job_name, filenames, shards=shards)
      sorted_files = yield _SortChunksPipeline(job_name, hashed_files)
      temp_files = [hashed_files, sorted_files]

      merged_files = yield _MergeGSPipeline(job_name, sorted_files)

      with pipeline.After(merged_files):
        all_temp_files = yield pipeline_common.Extend(*temp_files)
        yield mapper_pipeline._CleanupPipeline(all_temp_files)

      yield pipeline_common.Return(merged_files)
    def testSuccessfulRun_CallbackOnDefaultVersion(self):
        os.environ["CURRENT_MODULE_ID"] = "default"
        input_file1 = self._CreateInputFile()
        input_file2 = self._CreateInputFile()
        p = shuffler._ShuffleServicePipeline("testjob",
                                             [input_file1, input_file2])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        callback = request.callback()
        self.assertTrue(callback.url().startswith(
            "/mapreduce/pipeline/callback?pipeline_id="))
        self.assertEquals(self.major_version_id, callback.app_version_id())
        self.assertEquals("GET", callback.method())
        self.assertEquals("default", callback.queue())
    def testNoData(self):
        input_file = files.blobstore.create()
        files.finalize(input_file)
        input_file = files.blobstore.get_file_name(
            files.blobstore.get_blob_key(input_file))
        p = shuffler._ShuffleServicePipeline("testjob",
                                             [input_file, input_file])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        # No shuffle request.
        request = self.file_service.shuffle_request
        self.assertEqual(None, request)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertEqual([], p.outputs.default.value)
    def testError(self):
        p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        callback = request.callback()

        callback_task = {
            "url": callback.url() + "&error=1",
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertTrue(p.was_aborted)
Пример #11
0
  def testError(self):
    p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    callback = request.callback()

    callback_task = {
        "url": callback.url() + "&error=1",
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertTrue(p.was_aborted)
Пример #12
0
  def testError(self):
    input_file1 = self._CreateInputFile()
    input_file2 = self._CreateInputFile()

    p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2])
    self.assertEquals(1, p.current_attempt)
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    callback = request.callback()

    callback_task = {
        "url": callback.url() + "&error=1",
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertEquals(2, p.current_attempt)
    def testError(self):
        input_file1 = self._CreateInputFile()
        input_file2 = self._CreateInputFile()

        p = shuffler._ShuffleServicePipeline("testjob",
                                             [input_file1, input_file2])
        self.assertEquals(1, p.current_attempt)
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        callback = request.callback()

        callback_task = {
            "url": callback.url() + "&error=1",
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertEquals(2, p.current_attempt)