Пример #1
0
    def _DispatchTracesAndWaitForResult(self, job, traces, num_instances):
        def _slice_it(li, cols=2):
            start = 0
            for i in xrange(cols):
                stop = start + len(li[i::cols])
                yield li[start:stop]
                start = stop

        # TODO(simonhatch): In the future it might be possibly to only specify a
        # reducer and no mapper. Revisit this.
        bucket_path = cloud_config.Get().control_bucket_path + "/jobs/"
        mapper_url = '%s%s.mapper' % (bucket_path, job.key.id())
        mapper_text = job.mapper.encode('ascii', 'ignore')
        cloud_helper.WriteGCS(mapper_url, mapper_text)

        version = self._GetVersion()

        tasks = {}

        # Split the traces up into N buckets.
        for current_traces in _slice_it(traces, num_instances):
            task_id = str(uuid.uuid4())

            payload = {
                'revision': job.revision,
                'traces': json.dumps(current_traces),
                'result': '%s%s.result' % (bucket_path, task_id),
                'mapper': mapper_url,
                'mapper_function': job.mapper_function
            }
            taskqueue.add(queue_name='mapper-queue',
                          url='/cloud_worker/task',
                          target=version,
                          name=task_id,
                          params=payload)
            tasks[task_id] = {'status': 'IN_PROGRESS'}

        job.running_tasks = [task_id for task_id, _ in tasks.iteritems()]
        job.put()

        # On production servers, we could just sit and wait for the results, but
        # dev_server is single threaded and won't run any other tasks until the
        # current one is finished. We'll just do the easy thing for now and
        # queue a task to check for the result.
        timeout = (
            datetime.datetime.now() +
            datetime.timedelta(minutes=10)).strftime('%Y-%m-%d %H:%M:%S')
        taskqueue.add(queue_name='default',
                      url='/cloud_mapper/task',
                      target=version,
                      countdown=1,
                      params={
                          'jobid': job.key.id(),
                          'type': 'check',
                          'tasks': json.dumps(tasks),
                          'timeout': timeout
                      })
Пример #2
0
 def get(self):
     form_html = _FORM_HTML.format(
         mapper=_DEFAULT_MAPPER,
         mapper_function=_DEFAULT_FUNCTION,
         reducer=_DEFAULT_REDUCER,
         reducer_function=_DEFAULT_REDUCER_FUNCTION,
         query='MAX_TRACE_HANDLES=10',
         corpus=cloud_config.Get().default_corpus)
     self.response.out.write(form_html)
Пример #3
0
    def _QueryForTraces(self, corpus, query):
        payload = urllib.urlencode({'q': query})
        query_url = '%s/query?%s' % (corpus, payload)

        headers = {
            'X-URLFetch-Service-Id': cloud_config.Get().urlfetch_service_id
        }

        result = urlfetch.fetch(url=query_url,
                                payload=payload,
                                method=urlfetch.GET,
                                headers=headers,
                                follow_redirects=False,
                                deadline=10)
        return json.loads(result.content)
Пример #4
0
    def get(self):
        self.response.headers['Content-Type'] = 'text/plain'
        raw_query = self.request.get('q')
        if not raw_query:
            raw_query = 'MAX_TRACE_HANDLES=100'

        query = corpus_query.CorpusQuery.FromString(raw_query)
        (gql, args) = query.AsGQLWhereClause()
        reports = trace_info.TraceInfo.gql(gql, *args)
        reports_json = json.dumps([
            'gs://%s/%s.gz' %
            (cloud_config.Get().trace_upload_bucket, i.key.string_id())
            for i in reports
        ])

        self.response.out.write(reports_json)
Пример #5
0
  def _delete_traces(self):
    trace_bucket = cloud_config.Get().trace_upload_bucket
    deleted_traces = 0

    oldest_time = datetime.datetime.now() - datetime.timedelta(days=MAX_DAYS)
    q = TraceInfo.query(TraceInfo.date < oldest_time)

    for key in q.fetch(BATCH_SIZE, keys_only=True):
      gcs_path = '/%s/%s.gz' % (trace_bucket, key.id())
      try:
        gcs.delete(gcs_path, retry_params=default_retry_params)
      except gcs.NotFoundError:
        pass

      key.delete()
      deleted_traces += 1

    return deleted_traces
Пример #6
0
    def post(self):
        trace_uuid = str(uuid.uuid4())

        gcs_path = '/%s/%s.gz' % (cloud_config.Get().trace_upload_bucket,
                                  trace_uuid)
        gcs_file = gcs.open(gcs_path,
                            'w',
                            content_type='application/octet-stream',
                            options={},
                            retry_params=default_retry_params)
        gcs_file.write(self.request.get('trace'))
        gcs_file.close()

        trace_object = trace_info.TraceInfo(id=trace_uuid)
        trace_object.remote_addr = os.environ["REMOTE_ADDR"]

        for arg in self.request.arguments():
            arg_key = arg.replace('-', '_').lower()
            if arg_key in trace_object._properties:
                try:
                    setattr(trace_object, arg_key, self.request.get(arg))
                except datastore_errors.BadValueError:
                    pass

        scenario_config = self.request.get('config')
        if scenario_config:
            config_json = json.loads(scenario_config)
            if 'scenario_name' in config_json:
                trace_object.scenario_name = config_json['scenario_name']

        tags_string = self.request.get('tags')
        if tags_string:
            # Tags are comma separated and should only include alphanumeric + '-'.
            if re.match('^[a-zA-Z0-9-,]+$', tags_string):
                trace_object.tags = tags_string.split(',')
            else:
                logging.warning(
                    'The provided tags string includes one or more invalid'
                    ' characters and will be ignored')

        trace_object.ver = self.request.get('product-version')
        trace_object.put()

        self.response.write(trace_uuid)
Пример #7
0
    def _CheckOnReduceResults(self, job):
        if job.status != 'IN_PROGRESS':
            return

        tasks = json.loads(self.request.get('tasks'))

        # TODO: There's really only one reducer job at the moment
        results = None
        for task_id, _ in tasks.iteritems():
            task_results_path = '%s/jobs/%s.result' % (
                cloud_config.Get().control_bucket_path, task_id)
            stat_result = cloud_helper.StatGCS(task_results_path)
            if stat_result is not None:
                tasks[task_id]['status'] = 'DONE'
                results = task_results_path

        logging.info("Reduce results: %s" % str(tasks))

        if not results:
            timeout = datetime.datetime.strptime(self.request.get('timeout'),
                                                 '%Y-%m-%d %H:%M:%S')
            if datetime.datetime.now() > timeout:
                self._CancelTasks(tasks)
                job.status = 'ERROR'
                job.put()
                logging.error('Task timed out waiting for results.')
                return
            taskqueue.add(url='/cloud_mapper/task',
                          target=self._GetVersion(),
                          countdown=1,
                          params={
                              'jobid': job.key.id(),
                              'type': 'check_reduce_results',
                              'tasks': json.dumps(tasks),
                              'timeout': self.request.get('timeout')
                          })
            return

        logging.info("Finished all tasks.")

        job.status = 'COMPLETE'
        job.results = results
        job.put()
Пример #8
0
    def _CheckOnMapResults(self, job):
        if job.status != 'IN_PROGRESS':
            return

        tasks = json.loads(self.request.get('tasks'))
        reducer_url = self.request.get('reducer')
        reducer_function = job.reducer_function
        revision = job.revision
        timeout = datetime.datetime.strptime(self.request.get('timeout'),
                                             '%Y-%m-%d %H:%M:%S')

        # TODO: There's no reducer yet, so we can't actually collapse multiple
        # results into one results file.
        mappers_done = True
        for task_id, task_values in tasks.iteritems():
            if task_values['status'] == 'DONE':
                continue
            task_results_path = '%s/jobs/%s.result' % (
                cloud_config.Get().control_bucket_path, task_id)
            stat_result = cloud_helper.StatGCS(task_results_path)
            if stat_result is not None:
                logging.info(str(stat_result))
                tasks[task_id]['status'] = 'DONE'
            else:
                mappers_done = False

        logging.info("Tasks: %s" % str(tasks))

        if not mappers_done and datetime.datetime.now() < timeout:
            taskqueue.add(url='/cloud_mapper/task',
                          target=self._GetVersion(),
                          countdown=1,
                          params={
                              'jobid': job.key.id(),
                              'type': 'check_map_results',
                              'reducer': reducer_url,
                              'tasks': json.dumps(tasks),
                              'timeout': self.request.get('timeout')
                          })
            return

        # Clear out any leftover tasks in case we just hit the timeout.
        self._CancelTasks(tasks)

        map_results = []
        for task_id, _ in tasks.iteritems():
            if tasks[task_id]['status'] != 'DONE':
                continue
            task_results_path = '%s/jobs/%s.result' % (
                cloud_config.Get().control_bucket_path, task_id)
            map_results.append(task_results_path)

        # We'll only do 1 reduce job for now, maybe shard it better later
        logging.info("Kicking off reduce.")
        task_id = str(uuid.uuid4())
        payload = {
            'revision':
            revision,
            'traces':
            json.dumps(map_results),
            'result':
            '%s/jobs/%s.result' %
            (cloud_config.Get().control_bucket_path, task_id),
            'reducer':
            reducer_url,
            'reducer_function':
            reducer_function,
            'timeout':
            job.function_timeout,
        }
        taskqueue.add(queue_name='mapper-queue',
                      url='/cloud_worker/task',
                      target=self._GetVersion(),
                      name=task_id,
                      params=payload)

        tasks = {}
        tasks[task_id] = {'status': 'IN_PROGRESS'}

        job.running_tasks = [task_id for task_id, _ in tasks.iteritems()]
        job.put()

        reduce_tasks = {}
        reduce_tasks[task_id] = {'status': 'IN_PROGRESS'}

        # On production servers, we could just sit and wait for the results, but
        # dev_server is single threaded and won't run any other tasks until the
        # current one is finished. We'll just do the easy thing for now and
        # queue a task to check for the result.
        reducer_timeout = int(job.function_timeout)
        timeout = (datetime.datetime.now() + datetime.timedelta(
            seconds=reducer_timeout)).strftime('%Y-%m-%d %H:%M:%S')
        taskqueue.add(queue_name='default',
                      url='/cloud_mapper/task',
                      target=self._GetVersion(),
                      countdown=1,
                      params={
                          'jobid': job.key.id(),
                          'type': 'check_reduce_results',
                          'tasks': json.dumps(reduce_tasks),
                          'timeout': timeout
                      })
Пример #9
0
    def post(self):
        os.putenv('PI_CLOUD_WORKER', '1')
        try:
            traces = json.loads(self.request.get('traces'))
            mapper = self.request.get('mapper')
            map_function = self.request.get('mapper_function')
            revision = self.request.get('revision')
            result_path = self.request.get('result')

            config = cloud_config.Get()

            if not _is_devserver():
                subprocess.call(['git', 'pull'], cwd=config.catapult_path)
                subprocess.call(['git', 'checkout', revision],
                                cwd=config.catapult_path)
                job_path = os.path.join(config.catapult_path, 'perf_insights',
                                        'bin', 'map_traces')
                cwd = config.catapult_path
            else:
                job_path = os.path.join('perf_insights', 'bin', 'map_traces')
                cwd = os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '../../../..'))

            # Download all the traces
            temp_directory = _DownloadTraces(traces)

            # Download the mapper
            map_file_handle, map_file_name = tempfile.mkstemp()
            with open(map_file_name, 'w') as f:
                f.write(cloud_helper.ReadGCS(mapper))

            # Output goes here.
            output_handle, output_name = tempfile.mkstemp()

            try:
                map_handle = '%s:%s' % (map_file_name, map_function)
                args = [
                    job_path, '--jobs=-1', '--corpus=local-directory',
                    map_handle, '--trace_directory', temp_directory,
                    '--output-file', output_name
                ]
                logging.info("Executing map job: %s" % args)

                map_job = subprocess.Popen(args,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE,
                                           cwd=cwd)
                stdout, stderr = map_job.communicate()

                logging.info('stdout:\n' + stdout)
                logging.info('stderr:\n' + stderr)

                with open(output_name, 'r') as f:
                    cloud_helper.WriteGCS(result_path, f.read())
            finally:
                os.close(output_handle)
                os.unlink(output_name)
                os.close(map_file_handle)
                os.unlink(map_file_name)
                shutil.rmtree(temp_directory)
        except Exception:
            logging.info(traceback.format_exc())
Пример #10
0
    def post(self):
        os.putenv('PI_CLOUD_WORKER', '1')
        try:
            traces = json.loads(self.request.get('traces'))
            mapper = self.request.get('mapper')
            map_function = self.request.get('mapper_function')
            reducer = self.request.get('reducer')
            reducer_function = self.request.get('reducer_function')
            revision = self.request.get('revision')
            result_path = self.request.get('result')
            timeout = self.request.get('timeout')
            if timeout:
                timeout = int(timeout)

            config = cloud_config.Get()

            if not _is_devserver():
                logging.info("Updating catapult checkout to: %s" % revision)
                subprocess.call(['git', 'checkout', revision],
                                cwd=config.catapult_path)
                job_path = os.path.join(config.catapult_path, 'perf_insights',
                                        'bin', 'gce_instance_map_job')
                cwd = config.catapult_path
            else:
                logging.info("DevServer: Ignoring update step.")
                job_path = os.path.join('perf_insights', 'bin',
                                        'gce_instance_map_job')
                cwd = os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '../../../..'))

            # Download all the traces
            temp_directory = _DownloadTraces(traces)

            # Output goes here.
            output_handle, output_name = tempfile.mkstemp()

            try:
                args = [
                    job_path, '--corpus=local-directory', '--trace_directory',
                    temp_directory, '--output-file', output_name
                ]
                if mapper:
                    # Download the mapper
                    _, map_file_name = tempfile.mkstemp()
                    with open(map_file_name, 'w') as f:
                        f.write(cloud_helper.ReadGCS(mapper))
                    map_handle = '%s:%s' % (map_file_name, map_function)
                    args.extend(['--map_function_handle', map_handle])
                if reducer:
                    # Download the reducer
                    _, reducer_file_name = tempfile.mkstemp()
                    with open(reducer_file_name, 'w') as f:
                        f.write(cloud_helper.ReadGCS(reducer))
                    reducer_handle = '%s:%s' % (reducer_file_name,
                                                reducer_function)
                    args.extend(['--reduce_function_handle', reducer_handle])
                logging.info("Executing map job: %s" % ' '.join(args))

                map_job = subprocess.Popen(args,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE,
                                           cwd=cwd,
                                           preexec_fn=os.setsid)
                start_time = datetime.datetime.now()
                while datetime.datetime.now(
                ) - start_time < datetime.timedelta(seconds=timeout):
                    time.sleep(1)
                    if map_job.poll():
                        break

                if map_job.poll() is None:
                    logging.warning('Job timed out, terminating.')
                    # TODO: Kill child processes.
                    os.killpg(os.getpgid(map_job.pid), signal.SIGTERM)

                stdout = ''
                stderr = ''
                if map_job.stdout:
                    stdout = map_job.stdout.read()
                if map_job.stderr:
                    stderr = map_job.stderr.read()

                logging.info('stdout:\n' + stdout)
                logging.info('stderr:\n' + stderr)

                with open(output_name, 'r') as f:
                    logging.info('Writing result to: %s' % result_path)
                    cloud_helper.WriteGCS(result_path, f.read())
            finally:
                os.close(output_handle)
                os.unlink(output_name)
                shutil.rmtree(temp_directory)
        except Exception:
            logging.info(traceback.format_exc())