def run(self, build_id, release_name, release_number, run_name, url=None, config_data=None, ref_url=None, ref_config_data=None): post = { 'build_id': build_id, 'release_name': release_name, 'release_number': release_number, 'run_name': run_name, 'url': url, 'config': config_data, } if ref_url and ref_config_data: post.update( ref_url=ref_url, ref_config=ref_config_data) call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/request_run', post=post, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise RequestRunError(call.json.get('error')) if not call.json or not call.json.get('success'): raise RequestRunError('Bad response: %r' % call)
def testForbiddenScheme(self): """Tests that some schemes are not allowed.""" self.worker.start() self.input_queue.put(fetch_worker.FetchItem('file:///etc/passwd')) time.sleep(0.1) result = self.output_queue.get() self.assertEquals(403, result.status_code)
def run(self, build_id, sha1sum, result_path): download_url = '%s/download?sha1sum=%s&build_id=%s' % ( FLAGS.release_server_prefix, sha1sum, build_id) call = yield fetch_worker.FetchItem( download_url, result_path=result_path, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.status_code != 200: raise DownloadArtifactError('Bad response: %r' % call)
def run(self, queue_name, local_queue_workflow, max_tasks=1, wait_seconds=0): queue_url = '%s/%s' % (FLAGS.queue_server_prefix, queue_name) outstanding = [] while True: next_count = max_tasks - len(outstanding) next_tasks = [] if next_count > 0: logging.info( 'Fetching %d tasks from queue_url=%r for workflow=%r', next_count, queue_url, local_queue_workflow) try: next_item = yield fetch_worker.FetchItem( queue_url + '/lease', post={'count': next_count}, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) except Exception, e: logging.error( 'Could not fetch work from queue_url=%r. %s: %s', queue_url, e.__class__.__name__, e) else: if next_item.json: if next_item.json.get('error'): logging.error( 'Could not fetch work from queue_url=%r. %s', queue_url, next_item.json['error']) elif next_item.json['tasks']: next_tasks = next_item.json['tasks'] for index, task in enumerate(next_tasks): item = yield DoTaskWorkflow(queue_url, local_queue_workflow, task, wait_seconds=index * wait_seconds) outstanding.append(item) # Poll for new tasks frequently when we're currently handling # task load. Poll infrequently when there hasn't been anything # to do recently. poll_time = FLAGS.queue_idle_poll_seconds if outstanding: poll_time = FLAGS.queue_busy_poll_seconds yield timer_worker.TimerItem(poll_time) outstanding[:] = [x for x in outstanding if not x.done] logging.debug('%d items for %r still outstanding: %r', len(outstanding), local_queue_workflow, outstanding)
def run(self, queue_url, task_id, message, index): call = yield fetch_worker.FetchItem( queue_url + '/heartbeat', post={ 'task_id': task_id, 'message': message, 'index': index, }, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise HeartbeatError(call.json.get('error')) if not call.json or not call.json.get('success'): raise HeartbeatError('Bad response: %r' % call)
def run(self, build_id, run_name): call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/find_run', post={ 'build_id': build_id, 'run_name': run_name, }, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise FindRunError(call.json.get('error')) if not call.json: raise FindRunError('Bad response: %r' % call) raise workers.Return(call.json)
def run(self, build_id, release_name, release_number): call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/runs_done', post={ 'build_id': build_id, 'release_name': release_name, 'release_number': release_number, }, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise RunsDoneError(call.json.get('error')) if not call.json or not call.json.get('success'): raise RunsDoneError('Bad response: %r' % call) raise workers.Return(call.json['results_url'])
def run(self, build_id, release_name, url): call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/create_release', post={ 'build_id': build_id, 'release_name': release_name, 'url': url, }, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise CreateReleaseError(call.json.get('error')) if not call.json or not call.json.get('release_number'): raise CreateReleaseError('Bad response: %r' % call) raise workers.Return(call.json['release_number'])
def run(self, queue_name, local_queue_workflow, max_tasks=1, wait_seconds=0): queue_url = '%s/%s' % (FLAGS.queue_server_prefix, queue_name) outstanding = [] while True: next_count = max_tasks - len(outstanding) next_tasks = [] if next_count > 0: logging.info( 'Fetching %d tasks from queue_url=%r for workflow=%r', next_count, queue_url, local_queue_workflow) try: next_item = yield fetch_worker.FetchItem( queue_url + '/lease', post={'count': next_count}, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) except Exception, e: logging.error( 'Could not fetch work from queue_url=%r. %s: %s', queue_url, e.__class__.__name__, e) else: if next_item.json: if next_item.json.get('error'): logging.error( 'Could not fetch work from queue_url=%r. %s', queue_url, next_item.json['error']) elif next_item.json['tasks']: next_tasks = next_item.json['tasks'] for index, task in enumerate(next_tasks): item = yield DoTaskWorkflow(queue_url, local_queue_workflow, task, wait_seconds=index * wait_seconds) outstanding.append(item) yield timer_worker.TimerItem(FLAGS.queue_poll_seconds) outstanding[:] = [x for x in outstanding if not x.done]
def run(self, build_id, release_name, release_number, run_name, diff_path=None, log_path=None, diff_failed=False, distortion=None): diff_id = None log_id = None if (isinstance(diff_path, basestring) and os.path.isfile(diff_path) and isinstance(log_path, basestring) and os.path.isfile(log_path)): diff_id, log_id = yield [ UploadFileWorkflow(build_id, diff_path), UploadFileWorkflow(build_id, log_path), ] elif isinstance(log_path, basestring) and os.path.isfile(log_path): log_id = yield UploadFileWorkflow(build_id, log_path) post = { 'build_id': build_id, 'release_name': release_name, 'release_number': release_number, 'run_name': run_name, } if diff_id: post.update(diff_image=diff_id) if log_id: post.update(diff_log=log_id) if diff_failed: post.update(diff_failed='yes') if distortion: post.update(distortion=distortion) call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/report_run', post=post, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise ReportPdiffError(call.json.get('error')) if not call.json or not call.json.get('success'): raise ReportPdiffError('Bad response: %r' % call)
def run(self, build_id, file_path): try: handle = StreamingSha1File(file_path, 'rb') upload = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/upload', post={'build_id': build_id, 'file': handle}, timeout_seconds=120, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if upload.json and upload.json.get('error'): raise UploadFileError(upload.json.get('error')) sha1sum = handle.hexdigest() if not upload.json or upload.json.get('sha1sum') != sha1sum: raise UploadFileError('Bad response: %r' % upload) raise workers.Return(sha1sum) except IOError: raise workers.Return(None)
def run(self, build_id, release_name, release_number, run_name, image_path=None, log_path=None, url=None, config_path=None, ref_url=None, ref_image=None, ref_log=None, ref_config=None, baseline=None, run_failed=False): if baseline and (ref_url or ref_image or ref_log or ref_config): raise ReportRunError( 'Cannot specify "baseline" along with any "ref_*" arguments.') upload_jobs = [ UploadFileWorkflow(build_id, log_path), ] if image_path: image_index = len(upload_jobs) upload_jobs.append(UploadFileWorkflow(build_id, image_path)) if config_path: config_index = len(upload_jobs) upload_jobs.append(UploadFileWorkflow(build_id, config_path)) results = yield upload_jobs log_id = results[0] image_id = None config_id = None if image_path: image_id = results[image_index] if config_path: config_id = results[config_index] post = { 'build_id': build_id, 'release_name': release_name, 'release_number': release_number, 'run_name': run_name, } if baseline: ref_url = url ref_log = log_id ref_image = image_id ref_config = config_id url = None log_id = None image_id = None config_id = None if url: post.update(url=url) if image_id: post.update(image=image_id) if log_id: post.update(log=log_id) if config_id: post.update(config=config_id) if run_failed: post.update(run_failed='yes') if ref_url: post.update(ref_url=ref_url) if ref_image: post.update(ref_image=ref_image) if ref_log: post.update(ref_log=ref_log) if ref_config: post.update(ref_config=ref_config) call = yield fetch_worker.FetchItem( FLAGS.release_server_prefix + '/report_run', post=post, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) if call.json and call.json.get('error'): raise ReportRunError(call.json.get('error')) if not call.json or not call.json.get('success'): raise ReportRunError('Bad response: %r' % call)
class DoTaskWorkflow(workers.WorkflowItem): """Runs a local workflow for a task and marks it done in the remote queue. Args: queue_url: Base URL of the work queue. local_queue_workflow: WorkflowItem sub-class to create using parameters from the remote work payload that will execute the task. task: JSON payload of the task. wait_seconds: Wait this many seconds before starting work. Defaults to zero. """ fire_and_forget = True def run(self, queue_url, local_queue_workflow, task, wait_seconds=0): LOGGER.info('Starting work item from queue_url=%r, ' 'task=%r, workflow=%r, wait_seconds=%r', queue_url, task, local_queue_workflow, wait_seconds) if wait_seconds > 0: yield timer_worker.TimerItem(wait_seconds) # Define a heartbeat closure that will return a workflow for # reporting status. This will auto-increment the index on each # call, so only the latest update will be saved. index = [0] task_id = task['task_id'] def heartbeat(message): next_index = index[0] index[0] = next_index + 1 return HeartbeatWorkflow( queue_url, task_id, message, next_index) payload = task['payload'] payload.update(heartbeat=heartbeat) error = False try: yield local_queue_workflow(**payload) except Exception, e: LOGGER.exception('Exception while processing work from ' 'queue_url=%r, task=%r', queue_url, task) yield heartbeat('%s: %s' % (e.__class__.__name__, str(e))) if (isinstance(e, GiveUpAfterAttemptsError) and task['lease_attempts'] >= e.max_attempts): LOGGER.warning( 'Hit max attempts on task=%r, marking task as error', task) error = True else: # The task has legimiately failed. Do not mark the task as # finished. Let it retry in the queue again. return finish_params = {'task_id': task_id} if error: finish_params['error'] = '1' try: finish_item = yield fetch_worker.FetchItem( queue_url + '/finish', post=finish_params, username=FLAGS.release_client_id, password=FLAGS.release_client_secret) except Exception, e: LOGGER.error('Could not finish work with ' 'queue_url=%r, task=%r. %s: %s', queue_url, task, e.__class__.__name__, e)
def run(self, start_url, ignore_prefixes, upload_build_id, upload_release_name=None, heartbeat=None): if not ignore_prefixes: ignore_prefixes = [] pending_urls = set([clean_url(start_url)]) seen_urls = set() good_urls = set() yield heartbeat('Scanning for content') limit_depth = FLAGS.crawl_depth >= 0 depth = 0 while (not limit_depth or depth <= FLAGS.crawl_depth) and pending_urls: # TODO: Enforce a job-wide timeout on the whole process of # URL discovery, to make sure infinitely deep sites do not # cause this job to never stop. seen_urls.update(pending_urls) yield heartbeat('Scanning %d pages for good urls' % len(pending_urls)) output = yield [fetch_worker.FetchItem(u) for u in pending_urls] pending_urls.clear() for item in output: if not item.data: logging.debug('No data from url=%r', item.url) continue if item.headers.gettype() != 'text/html': logging.debug('Skipping non-HTML document url=%r', item.url) continue good_urls.add(item.url) found = extract_urls(item.url, item.data) pruned = prune_urls(found, start_url, [start_url], ignore_prefixes) new = pruned - seen_urls pending_urls.update(new) yield heartbeat('Found %d new URLs from %s' % (len(new), item.url)) yield heartbeat('Finished crawl at depth %d' % depth) depth += 1 yield heartbeat('Found %d total URLs, %d good HTML pages; starting ' 'screenshots' % (len(seen_urls), len(good_urls))) # TODO: Make the default release name prettier. if not upload_release_name: upload_release_name = str(datetime.datetime.utcnow()) release_number = yield release_worker.CreateReleaseWorkflow( upload_build_id, upload_release_name, start_url) run_requests = [] for url in good_urls: yield heartbeat('Requesting run for %s' % url) parts = urlparse.urlparse(url) run_name = parts.path config_dict = { 'viewportSize': { 'width': 1280, 'height': 1024, } } if FLAGS.inject_css: config_dict['injectCss'] = FLAGS.inject_css if FLAGS.inject_js: config_dict['injectJs'] = FLAGS.inject_js if FLAGS.cookies: config_dict['cookies'] = json.loads(open(FLAGS.cookies).read()) config_data = json.dumps(config_dict) run_requests.append( release_worker.RequestRunWorkflow(upload_build_id, upload_release_name, release_number, run_name, url, config_data)) yield run_requests yield heartbeat('Marking runs as complete') release_url = yield release_worker.RunsDoneWorkflow( upload_build_id, upload_release_name, release_number) yield heartbeat('Results viewable at: %s' % release_url)