def send_to_api(self, path, post, access_token): ctx = ndb.get_context() try: resp = yield ctx.urlfetch('https://alpha-api.app.net/stream/0/%s' % path, payload=json.dumps(post), deadline=30, method='POST', headers={ 'Authorization': 'Bearer %s' % access_token, 'Content-Type': 'application/json', }) except: logger.exception('Failed to post path: %s data: %s' % (path, post)) raise deferred.SingularTaskFailure() parsed_resp = json.loads(resp.content) if resp.status_code == 401: logger.info('unauthorized') yield self.handle_unauthorized(parsed_resp, post) raise deferred.PermanentTaskFailure() elif resp.status_code == 200: self.handle_success(parsed_resp, post) elif resp.status_code == 400: yield self.handle_bad_response(parsed_resp, post) raise deferred.PermanentTaskFailure() elif resp.status_code == 403: yield self.handle_forbidden(parsed_resp, post) raise deferred.PermanentTaskFailure() else: logger.warn("Couldn't post entry key=%s. Error: %s Post:%s", self.entry_key, parsed_resp, post) raise deferred.SingularTaskFailure()
def deferred_ratings(): """ This is the deferred ratings table calculation process """ # Disable the in-context cache to save memory # (it doesn't give any speed advantage for this processing) Context.disable_cache() t0 = time.time() try: _create_ratings() except DeadlineExceededError as ex: # Hit deadline: save the stuff we already have and # defer a new task to continue where we left off logging.error(u"Deadline exceeded in ratings, failing permamently") # Normal return prevents this task from being run again raise deferred.PermanentTaskFailure() except Exception as ex: logging.error( u"Exception in ratings, failing permanently: {0}".format(ex)) # Avoid having the task retried raise deferred.PermanentTaskFailure() t1 = time.time() logging.info(u"Ratings calculation finished in {0:.2f} seconds".format(t1 - t0)) StatsModel.log_cache_stats() StatsModel.clear_cache( ) # Do not maintain the cache in memory between runs
def _put_annotations_batch(annotation_data): try: annotation_objects = [] qry = BookRecord.query( BookRecord.item_id_array.IN(annotation_data.keys())) keys = qry.fetch(MAX_OBJECTS_PER_BATCH, keys_only=True) for key in keys: first_item_id = key.string_id().split('|')[0] if first_item_id not in annotation_data: continue short_text = annotation_data[first_item_id][0] long_text = annotation_data[first_item_id][1] anno_key = ndb.Key(BookAnnotation, key.string_id()) annotation = BookAnnotation(key=anno_key, short=short_text, long=long_text) annotation_objects.append(annotation) ndb.put_multi(annotation_objects) logging.info("{} annotations put into datastore".format( len(annotation_objects))) except Exception as e: logging.error(e) raise deferred.PermanentTaskFailure()
def main(self, sequence_num): """Main method of the deferred task.""" with Namespace(self._namespace): logging.info('Job started: %s w/ sequence number %d', self._job_name, sequence_num) time_started = time.time() try: db.run_in_transaction(DurableJobEntity._start_job, self._job_name, sequence_num) result = self.run() db.run_in_transaction(DurableJobEntity._complete_job, self._job_name, sequence_num, transforms.dumps(result), long(time.time() - time_started)) logging.info('Job completed: %s', self._job_name) except (Exception, runtime.DeadlineExceededError) as e: logging.error(traceback.format_exc()) logging.error('Job failed: %s\n%s', self._job_name, e) db.run_in_transaction(DurableJobEntity._fail_job, self._job_name, sequence_num, traceback.format_exc(), long(time.time() - time_started)) raise deferred.PermanentTaskFailure(e)
def main(self): """Main method of the deferred task.""" logging.info('Job started: %s', self._job_name) time_started = time.time() old_namespace = namespace_manager.get_namespace() try: namespace_manager.set_namespace(self._namespace) try: db.run_in_transaction(DurableJobEntity._start_job, self._job_name) result = self.run() db.run_in_transaction(DurableJobEntity._complete_job, self._job_name, transforms.dumps(result), long(time.time() - time_started)) logging.info('Job completed: %s', self._job_name) except (Exception, runtime.DeadlineExceededError) as e: logging.error(traceback.format_exc()) logging.error('Job failed: %s\n%s', self._job_name, e) db.run_in_transaction(DurableJobEntity._fail_job, self._job_name, traceback.format_exc(), long(time.time() - time_started)) raise deferred.PermanentTaskFailure(e) finally: namespace_manager.set_namespace(old_namespace)
def deferred_update(): """ Update all users in the datastore with lowercase nick and full name """ logging.info("Deferred user update starting") CHUNK_SIZE = 200 count = 0 offset = 0 Context.disable_cache() try: q = UserModel.query() while True: ulist = [] chunk = 0 for um in q.fetch(CHUNK_SIZE, offset = offset): chunk += 1 if um.nick_lc is None: try: um.nick_lc = um.nickname.lower() um.name_lc = um.prefs.get("full_name", "").lower() if um.prefs else "" ulist.append(um) except Exception as e: logging.info("Exception in deferred_update() when setting nick_lc: {0}".format(e)) if ulist: try: ndb.put_multi(ulist) count += len(ulist) except Exception as e: logging.info("Exception in deferred_update() when updating ndb: {0}".format(e)) if chunk < CHUNK_SIZE: break offset += CHUNK_SIZE except Exception as e: logging.info("Exception in deferred_update(): {0}, already updated {1} records".format(e, count)) # Do not retry the task raise deferred.PermanentTaskFailure() logging.info("Completed updating {0} user records".format(count))
def ExtractDetailsCrawlerIssueTracker(project_name, bug_id): """Extract useful information for a given bug.""" logging.debug('Scraping details for bug %s in project %s.', bug_id, project_name) phclient = gdata.projecthosting.client.ProjectHostingClient() try: query = gdata.projecthosting.client.Query(issue_id=bug_id) feed = phclient.get_issues(project_name, query=query) except gdata.client.RequestError, e: if ('HTTP_X_APPENGINE_TASKRETRYCOUNT' in environ and int( environ['HTTP_X_APPENGINE_TASKRETRYCOUNT']) < _MAX_RETRIES): if e.status == 403: # Skip 403 (Unautorized)errors. logging.info( 'Unautorized to access this issue, skipping: %s, %s', bug_id, project_name) # Nuke cache data for private bugs. url_bug_map.DeleteBugAndMappings( bug_id, project_name, bugs_util.Provider.ISSUETRACKER) return else: raise BugCrawlerError( 'Error while trying to get details for %s. Error %s' % (str(bug_id), str(e))) else: raise deferred.PermanentTaskFailure( 'Error hit too many times, aborting ' 'extracting details for bug %s on project %s. Error: %s' % (str(bug_id), str(project_name), str(e)))
def main(self, sequence_num): """Main method of the deferred task.""" with Namespace(self._namespace): logging.info('Job started: %s w/ sequence number %d', self._job_name, sequence_num) time_started = time.time() try: # Check we haven't been canceled before we start. if self._already_finished(sequence_num): logging.info( 'Job %s sequence %d already canceled or subsequent ' 'run completed; not running this version.', self._job_name, sequence_num) return db.run_in_transaction(DurableJobEntity._start_job, self._job_name, sequence_num) result = self.run() db.run_in_transaction(DurableJobEntity._complete_job, self._job_name, sequence_num, transforms.dumps(result), long(time.time() - time_started)) logging.info('Job completed: %s', self._job_name) except (Exception, runtime.DeadlineExceededError) as e: logging.error(traceback.format_exc()) logging.error('Job failed: %s\n%s', self._job_name, e) db.run_in_transaction(DurableJobEntity._fail_job, self._job_name, sequence_num, traceback.format_exc(), long(time.time() - time_started)) raise deferred.PermanentTaskFailure(e)
def deferred_check_user(user_id): user = ModelProxy.User.get_by_id(user_id) if not user: raise deferred.PermanentTaskFailure("User id {} is invalid.".format(user_id)) unique_email_address(user) unique_final_submission(user) unique_group(user)
def insert_gtest_results(build_step_key): """Inserts GTest results into the datastore, replacing any existing ones. Also records used parser version.""" step = BuildStep.get(build_step_key) log_contents = '' if step.log_gs: with cloudstorage.open(step.log_gs) as gs_file: log_contents = html2text(gs_file.read().decode('utf-8', 'replace')) else: try: blob_reader = blobstore.BlobReader(step.log_stdio) log_contents = html2text(blob_reader.read().decode( 'utf-8', 'replace')) except (ValueError, blobstore.BlobNotFoundError) as e: raise deferred.PermanentTaskFailure(e) gtest_results = gtest_parser.parse(log_contents) to_put = [] for fullname, result in gtest_results.iteritems(): # Only store failure results. if result['is_successful']: continue if isinstance(result['log'], unicode): log = db.Text(result['log']) else: log = db.Text(result['log'], encoding='utf-8') result_entity = GTestResult( parent=db.Key.from_path('GTestResult', str(step.key())), build_step=step, time_finished=step.time_finished, gtest_parser_version=gtest_parser.VERSION, is_crash_or_hang=result['is_crash_or_hang'], fullname=fullname, run_time_ms=result['run_time_ms'], log=log) to_put.append(result_entity) for chunk in chunks(to_put, BATCH_SIZE): db.put(chunk) def tx_parser_version(): step = BuildStep.get(build_step_key) orig_parser_version = step.gtest_parser_version if step.gtest_parser_version < gtest_parser.VERSION: step.gtest_parser_version = gtest_parser.VERSION step.put() return (orig_parser_version, step.gtest_parser_version) _, parser_version = \ db.run_in_transaction_custom_retries(10, tx_parser_version) query = GTestResult.all(keys_only=True) query.filter('build_step =', build_step_key) query.filter('gtest_parser_version <', parser_version) db.delete(query)
def main(self, sequence_num): logging.info('Drive job waking up') job = self.load() if not job: raise deferred.PermanentTaskFailure( 'Job object for {} not found!'.format(self._job_name)) if job.has_finished: return try: # pylint: disable=protected-access drive_manager = drive_api_manager._DriveManager.from_app_context( self._app_context) # pylint: enable=protected-access except errors.NotConfigured: self.complete(sequence_num) return except errors.Misconfigured as error: logging.error('%s: Drive is misconfigured in %s: %s', self._job_name, self._app_context.get_title(), error) raise deferred.PermanentTaskFailure('Job {} failed: {}'.format( self._job_name, error)) try: dto = self.get_a_valid_work_item() except IndexError: self.complete(sequence_num) return try: logging.info('Starting download of %s', dto.title) drive_manager.download_file(dto) logging.info('Finished download of %s', dto.title) except Exception as error: #pylint: disable=broad-except # Normally errors.Error covers everything, but this covers the # possibility of an unexpected parse error. logging.info('Failed to sync %s (%s) from drive: %s', dto.title, dto.key, error) deferred.defer(self.main, sequence_num)
def update_majors(): try: ndb.delete_multi(Major.get_majors().iter(keys_only=True)) all_majors = get_majors() logging.info("All majors extracted: " + str(len(all_majors))) for major in all_majors: logging.info(major['discipline'] + " was created") deferred.defer(create_major, major) except Exception as e: logging.error(e) raise deferred.PermanentTaskFailure()
def reparse_suppression_results(build_step_key, _build_step_name): step = BuildStep.get(build_step_key) log_contents = '' if step.log_gs: with cloudstorage.open(step.log_gs) as gs_file: log_contents = html2text(gs_file.read().decode('utf-8', 'replace')) else: try: blob_reader = blobstore.BlobReader(step.log_stdio) log_contents = html2text(blob_reader.read().decode( 'utf-8', 'replace')) except (ValueError, blobstore.BlobNotFoundError), e: raise deferred.PermanentTaskFailure(e)
def _run(self): self.job_dump.update_status(self.MAPPING) try: self.job_mapper.run() self.job_dump.update_status(self.REDUCING) self.job_reducer.run(self.job_dump.map_result) if not self.save_output: self.clean_up() self.job_dump.update_status(self.SUCCESS) except JobException as e: self.clean_up() self.job_dump.error = '%s: %s' % (e.job_type, e.message) self.job_dump.update_status(self.FAILURE) raise deferred.PermanentTaskFailure()
def create_suggestions_json(suggestions_key): suggestions = suggestions_key.get() item_ids = suggestions.key.string_id() assert isinstance(suggestions, SuggestionsRecord) if not suggestions.completed: logging.warning( "create_suggestions_json for {} called although " "suggestions are still not completed.".format(item_ids)) # deferred.defer(create_suggestions_json, suggestions_key, # _countdown=3) return json_object = { 'version': Suggester.VERSION, 'item_ids': item_ids, 'job_started': suggestions.job_started.isoformat() } original_book = suggestions.original_book.get() if not original_book: raise deferred.PermanentTaskFailure( "Original book not found for {} " "when creating JSON.".format(item_ids)) assert isinstance(original_book, BookRecord) json_object['original_book'] = { 'author': original_book.author, 'title': original_book.title, 'year': original_book.year } json_object['status'] = 'completed' json_object['suggestions'] = [] book_records = ndb.get_multi(suggestions.books) assert len(book_records) == len(suggestions.books_prediction) for i in xrange(len(book_records)): book = book_records[i] assert isinstance(book, BookRecord) json_object['suggestions'].append({ 'author': book.author, 'title': book.title, 'item_ids': book.key.string_id(), 'prediction': suggestions.books_prediction[i] }) precomputed_json = json.dumps(json_object, indent=2) suggestions.json = precomputed_json suggestions.put() logging.info("Created and saved JSON for {}".format(item_ids))
def post(self): """Adds a BigQuery row to Datastore and streams it using a deferred task. Raises: deferred.PermanentTaskFailure: if we encounter any exception to avoid adding duplicate rows during task retries. """ payload = pickle.loads(self.request.body) bigquery_row_model.BigQueryRow.add(**payload) try: if bigquery_row_model.BigQueryRow.threshold_reached(): deferred.defer(bigquery_row_model.BigQueryRow.stream_rows) else: logging.info('Not streaming rows, thresholds not met.') except Exception as e: # pylint: disable=broad-except raise deferred.PermanentTaskFailure( 'Exception caught for BigQuery streaming: %s.' % e)
def wrapper(*args, **kwargs): """Wrapper for managed task decorator.""" status_entity = bootstrap_status_model.BootstrapStatus.get_or_insert( task_function.__name__) status_entity.description = _TASK_DESCRIPTIONS.get( task_function.__name__, task_function.__name__) status_entity.timestamp = datetime.datetime.utcnow() try: task_function(*args, **kwargs) status_entity.success = True status_entity.details = None status_entity.put() except Exception as e: status_entity.success = False status_entity.details = '{} {}'.format(str(type(e)), str(e)) status_entity.put() raise deferred.PermanentTaskFailure( 'Task {} failed; error: {}'.format(task_function.__name__, status_entity.details))
def SpawnDetailsCrawlersIssueTracker(recent_issues, project_name, skip_recent_check=False): """Queues the tasks to do the actual crawling for recent updates.""" count = 0 try: for issue in recent_issues: bug_id = issue['id'] logging.info( 'Adding crawler to the queue for issue_id %s, project: %s.', bug_id, project_name) end = bug_id.find('/') if end > 0: bug_id = bug_id[0:end] bug = bugs.GetBug(bug_id=bug_id, project=project_name, provider=bugs_util.Provider.ISSUETRACKER) if bug: if not skip_recent_check and bug.last_update == issue[ 'updated']: logging.info('Bug %s is up-to-date.', bug.key().id_or_name()) count += 1 continue else: logging.info('Bug %s needs to be updated.', bug.key().id_or_name()) else: logging.info('Bug %s seems to be a new issue.', bug_id) deferred.defer(ExtractDetailsCrawlerIssueTracker, project_name, bug_id, _queue='find-bugs-queue') count += 1 except DeadlineExceededError: remaining = recent_issues[count:] deferred.defer(SpawnDetailsCrawlersIssueTracker, remaining, project_name) deferred.PermanentTaskFailure( 'Deadline exceeded, started a new SpawnDetailsCrawler' ' for the remaining %d urls.' % len(remaining)) return
def wrapper(*args, **kwargs): status_entity = bootstrap_status_model.BootstrapStatus.get_or_insert( task_function.__name__) docstring_first_line = re.search('^.*', task_function.__doc__) if docstring_first_line: status_entity.description = docstring_first_line.group(0) else: status_entity.description = task_function.__name__ status_entity.timestamp = datetime.datetime.utcnow() try: task_function(*args, **kwargs) status_entity.success = True status_entity.details = None status_entity.put() except Exception as e: status_entity.success = False status_entity.details = '{} {}'.format(str(type(e)), str(e)) status_entity.put() raise deferred.PermanentTaskFailure( 'Task {} failed; error: {}'.format(task_function.__name__, status_entity.details))
def update_annotations_from_csv(filename="anotace.csv"): logging.info("Starting to update from CSV") my_default_retry_params = gcs.RetryParams(initial_delay=0.2, max_delay=5.0, backoff_factor=2, max_retry_period=15) gcs.set_default_retry_params(my_default_retry_params) # bucket_name = os.environ.get('BUCKET_NAME', # app_identity.get_default_gcs_bucket_name()) bucket_name = "tisic-knih.appspot.com" bucket = '/' + bucket_name filename = bucket + '/' + filename try: if is_dev_server(): gcs_file = open("anotace.csv") else: gcs_file = gcs.open(filename) # gcs_file.seek(1000) r = ucsv.reader(gcs_file, encoding='utf-8') r.next() # Skip first line (header). annotation_data = {} for row_number, row in enumerate(r): if row_number % 10000 == 0: logging.info("Processing row number {} of CSV file. ".format( row_number)) item_id = row[0] short_text = row[1] long_text = row[2] annotation_data[item_id] = (short_text, long_text) if len(annotation_data) > MAX_OBJECTS_PER_BATCH: deferred.defer(_put_annotations_batch, annotation_data) annotation_data = {} deferred.defer(_put_annotations_batch, annotation_data) gcs_file.close() except Exception as e: logging.error(e) raise deferred.PermanentTaskFailure()
def __email_task(): try: logging.info("Email task started") yesterday = datetime.datetime.now() - datetime.timedelta(days=1) stale_game_q = Game.query(Game.active == True, Game.updated < yesterday) stale_games = {} for game in stale_game_q.iter(): player_key = game.active_player_key if not player_key in stale_games: stale_games[player_key] = [] stale_games[player_key].append(game.key.id()) __send_emails(stale_games) logging.info("Email task complete") except Exception as e: # It can be dangerous to allow exceptions to bubble up to the job # scheduler in this case -- depending on what happens, it can get # stuck in a retry loop and start spamming our users. # Instead, log the problem and raise a "stop doing that!" exception. msg = "Unhandled exception during email task, aborting" logging.exception(msg) raise deferred.PermanentTaskFailure(msg)
def CommitChangeSet(change_key): """Attempts to commit and delete a given RuleChangeSet.""" change = change_key.get() if change is None: logging.info('Change no longer exists. (already committed?)') return logging.info('Committing a %s change set of %s rules for blockable %s', change.change_type, len(change.rule_keys), change.blockable_key.id()) blockable = change.blockable_key.get() rules = ndb.get_multi(change.rule_keys) if change.change_type == constants.RULE_POLICY.WHITELIST: change_func = _Whitelist elif change.change_type == constants.RULE_POLICY.BLACKLIST: change_func = _Blacklist elif change.change_type == constants.RULE_POLICY.REMOVE: change_func = _Remove elif change.change_type in constants.RULE_POLICY.SET_INSTALLER: change_func = _ChangeInstallerState else: raise NotImplementedError try: change_func(blockable, rules) except api.RequestError: # For normal request errors, rely on the builtin task queue retry settings. raise except Exception as e: # pylint: disable=broad-except # For all other (likely fatal) errors, make sure the task doesn't retry. raise deferred.PermanentTaskFailure(repr(e)) else: for rule in rules: rule.is_committed = True ndb.put_multi(rules) change.key.delete()
def create_major(obj): try: major_info = parse_url(obj['link']) parsed = True if (len(major_info['first_year']['first_semester']) == 0 and len(major_info['first_year']['second_semester']) == 0 and len(major_info['second_year']['first_semester']) == 0 and len(major_info['second_year']['second_semester']) == 0 and len(major_info['third_year']['first_semester']) == 0 and len(major_info['third_year']['second_semester']) == 0 and len(major_info['fourth_year']['first_semester']) == 0 and len(major_info['fourth_year']['second_semester']) == 0): parsed = False obj['major_description'] = json.dumps(major_info) obj['parsed'] = parsed Major.add_new(obj) except Exception as e: logging.error(e) raise deferred.PermanentTaskFailure()
def read_ophan(section_id=None): client = memcache.Client() last_read = client.get(section_id + ".epoch_seconds") if last_read and fresh(last_read): return ophan_json = None if section_id == 'all': ophan_json = ophan.popular() else: ophan_json = ophan.popular(section_id=section_id) if not ophan_json: raise deferred.PermanentTaskFailure() ophan_data = json.loads(ophan_json) resolved_stories = [resolve_content(entry['url']) for entry in ophan_data] resolved_stories = [ story for story in resolved_stories if not story == None ] client = memcache.Client() base_key = 'all' if section_id: base_key = section_id if len(resolved_stories) > 0: client.set(base_key, json.dumps(resolved_stories)) client.set(base_key + '.epoch_seconds', time.time()) logging.info("Updated data for section %s; listing %d stories" % (section_id, len(resolved_stories)))
def main(self): """Main method of the deferred task.""" logging.info('Job started: %s', self._job_name) time_started = time.time() old_namespace = namespace_manager.get_namespace() try: namespace_manager.set_namespace(self._namespace) try: result = self.run() DurableJobEntity.complete_job(self._job_name, json.dumps(result), long(time.time() - time_started)) logging.info('Job completed: %s', self._job_name) except Exception as e: logging.error(traceback.format_exc()) logging.error('Job failed: %s\n%s', self._job_name, e) DurableJobEntity.fail_job(self._job_name, traceback.format_exc(), long(time.time() - time_started)) raise deferred.PermanentTaskFailure(e) finally: namespace_manager.set_namespace(old_namespace)
# Nuke cache data for private bugs. url_bug_map.DeleteBugAndMappings( bug_id, project_name, bugs_util.Provider.ISSUETRACKER) return else: raise BugCrawlerError( 'Error while trying to get details for %s. Error %s' % (str(bug_id), str(e))) else: raise deferred.PermanentTaskFailure( 'Error hit too many times, aborting ' 'extracting details for bug %s on project %s. Error: %s' % (str(bug_id), str(project_name), str(e))) if not feed or not feed.entry: raise deferred.PermanentTaskFailure( 'Failed to fetch full details for bug %s', bug_id) entry = feed.entry[0] urls = [] if entry.title.text: urls = [(u, url_bug_map.UrlPosition.TITLE) for u in url_util.ExtractUrls(entry.title.text)] if entry.content.text: urls.extend([(u, url_bug_map.UrlPosition.MAIN) for u in url_util.ExtractUrls(entry.content.text)]) comments = GetComments(project_name, bug_id, phclient) comments_text = GetTextInComments(comments) if comments_text: urls.extend([(u, url_bug_map.UrlPosition.COMMENTS) for u in url_util.ExtractUrls(comments_text)])
def _send_to_keen(survey_key, course_key, answer_keys): events = [] student = Student.query(Student.user == survey_key.get().participant).get() course = course_key.get() answers = ndb.get_multi(answer_keys) for answer in answers: question = answer.question.get() if answer.string_value != '': response = urlfetch.fetch( 'http://text-processing.com/api/sentiment', payload=urllib.urlencode({'text': answer.string_value}), method=urlfetch.POST) # If we've been throttled, just give up and die if response.status_code == 503: continue elif response.status_code != 200: raise deferred.PermanentTaskFailure() sentiment = json.loads(response.content) answer.sentiment = sentiment['label'] answer.put() lecturer = course.lecturer.get() event = { 'question_key': question.key.urlsafe(), 'survey_key': answer.key.parent().urlsafe(), 'course_key': course.key.urlsafe(), 'course': { 'name': course.course.get().name, 'department': course.course.get().department.urlsafe(), 'faculty': course.course.get().faculty.urlsafe(), 'school': course.course.get().faculty.get().school.urlsafe(), }, 'question_number': question.number, 'question_text': question.question, 'lecturer': { 'key': lecturer.key.urlsafe(), 'name': lecturer.name, 'department': lecturer.department.get().name, 'faculty': lecturer.department.get().faculty.get().name, }, 'student': { 'key': student.key.urlsafe(), 'age': student.calculate_age(), 'gender': student.gender, 'status': student.status, 'year': student.year, }, } if question.question_type == 'closed': event['response'] = answer.int_value else: event['sentiment'] = answer.sentiment events.append(event) keen.add_events({'answers': events})
def _send_mail_task(cls, notification_key, payload_key, test_send_mail_fn=None): exception = None failed_permanently = False now = datetime.datetime.utcnow() # pylint: disable=unbalanced-tuple-unpacking,unpacking-non-sequence notification, payload = db.get([notification_key, payload_key]) # pylint: enable=unbalanced-tuple-unpacking,unpacking-non-sequence send_mail_fn = (test_send_mail_fn if test_send_mail_fn else mail.send_mail) sent = False COUNTER_SEND_MAIL_TASK_STARTED.inc() if not notification: COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc() raise deferred.PermanentTaskFailure('Notification missing: ' + str(notification_key)) if not payload: COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc() raise deferred.PermanentTaskFailure('Payload missing: ' + str(payload_key)) policy = _RETENTION_POLICIES.get(notification._retention_policy) if not policy: COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc() raise deferred.PermanentTaskFailure('Unknown retention policy: ' + notification._retention_policy) if (cls._done(notification) or cls._failed(notification) or cls._sent(notification)): COUNTER_SEND_MAIL_TASK_SKIPPED.inc() COUNTER_SEND_MAIL_TASK_SUCCESS.inc() return if notification._recoverable_failure_count > _RECOVERABLE_FAILURE_CAP: message = ( 'Recoverable failure cap (%s) exceeded for notification with ' 'key %s') % (_RECOVERABLE_FAILURE_CAP, str(notification.key())) _LOG.error(message) permanent_failure = deferred.PermanentTaskFailure(message) try: COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_CALLED.inc() cls._record_failure(notification, payload, permanent_failure, dt=now, permanent=True, policy=policy) COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_SUCCESS.inc() # Must be vague. pylint: disable=broad-except except Exception, e: _LOG.error( cls._get_record_failure_error_message( notification, payload, e)) COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_FAILED.inc() COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc() COUNTER_SEND_MAIL_TASK_FAILURE_CAP_EXCEEDED.inc() raise permanent_failure
def _run_stats(from_time, to_time): """ Runs a process to update user statistics and Elo ratings """ logging.info(u"Generating stats from {0} to {1}".format( from_time, to_time)) if from_time is None or to_time is None: # Time range must be specified return if from_time >= to_time: # Null time range return # Iterate over all finished games within the time span in temporal order q = GameModel.query(ndb.AND(GameModel.ts_last_move > from_time, GameModel.ts_last_move <= to_time)) \ .order(GameModel.ts_last_move) \ .filter(GameModel.over == True) # The accumulated user statistics users = dict() def _init_stat(user_id, robot_level): """ Returns the newest StatsModel instance available for the given user """ return StatsModel.newest_before(from_time, user_id, robot_level) cnt = 0 ts_last_processed = None try: # Use i as a progress counter i = 0 for gm in iter_q(q, chunk_size=250): i += 1 lm = Alphabet.format_timestamp(gm.ts_last_move or gm.timestamp) p0 = None if gm.player0 is None else gm.player0.id() p1 = None if gm.player1 is None else gm.player1.id() robot_game = (p0 is None) or (p1 is None) if robot_game: rl = gm.robot_level else: rl = 0 s0 = gm.score0 s1 = gm.score1 if (s0 == 0) and (s1 == 0): # When a game ends by resigning immediately, # make sure that the weaker player # doesn't get Elo points for a draw; in fact, # ignore such a game altogether in the statistics continue if p0 is None: k0 = "robot-" + str(rl) else: k0 = p0 if p1 is None: k1 = "robot-" + str(rl) else: k1 = p1 if k0 in users: urec0 = users[k0] else: users[k0] = urec0 = _init_stat(p0, rl if p0 is None else 0) if k1 in users: urec1 = users[k1] else: users[k1] = urec1 = _init_stat(p1, rl if p1 is None else 0) # Number of games played urec0.games += 1 urec1.games += 1 if not robot_game: urec0.human_games += 1 urec1.human_games += 1 # Total scores urec0.score += s0 urec1.score += s1 urec0.score_against += s1 urec1.score_against += s0 if not robot_game: urec0.human_score += s0 urec1.human_score += s1 urec0.human_score_against += s1 urec1.human_score_against += s0 # Wins and losses if s0 > s1: urec0.wins += 1 urec1.losses += 1 elif s1 > s0: urec1.wins += 1 urec0.losses += 1 if not robot_game: if s0 > s1: urec0.human_wins += 1 urec1.human_losses += 1 elif s1 > s0: urec1.human_wins += 1 urec0.human_losses += 1 # Find out whether players are established or beginners est0 = urec0.games > ESTABLISHED_MARK est1 = urec1.games > ESTABLISHED_MARK # Save the Elo point state used in the calculation gm.elo0, gm.elo1 = urec0.elo, urec1.elo # Compute the Elo points of both players adj = _compute_elo((urec0.elo, urec1.elo), s0, s1, est0, est1) # When an established player is playing a beginning (provisional) player, # leave the Elo score of the established player unchanged # Adjust player 0 if est0 and not est1: adj = (0, adj[1]) gm.elo0_adj = adj[0] urec0.elo += adj[0] # Adjust player 1 if est1 and not est0: adj = (adj[0], 0) gm.elo1_adj = adj[1] urec1.elo += adj[1] # If not a robot game, compute the human-only Elo if not robot_game: gm.human_elo0, gm.human_elo1 = urec0.human_elo, urec1.human_elo adj = _compute_elo((urec0.human_elo, urec1.human_elo), s0, s1, est0, est1) # Adjust player 0 if est0 and not est1: adj = (0, adj[1]) gm.human_elo0_adj = adj[0] urec0.human_elo += adj[0] # Adjust player 1 if est1 and not est0: adj = (adj[0], 0) gm.human_elo1_adj = adj[1] urec1.human_elo += adj[1] # Save the game object with the new Elo adjustment statistics gm.put() # Save the last processed timestamp ts_last_processed = lm cnt += 1 # Report on our progress if i % 1000 == 0: logging.info(u"Processed {0} games".format(i)) except DeadlineExceededError as ex: # Hit deadline: save the stuff we already have and # defer a new task to continue where we left off logging.info( u"Deadline exceeded in stats loop after {0} games and {1} users". format(cnt, len(users))) logging.info(u"Resuming from timestamp {0}".format(ts_last_processed)) if ts_last_processed is not None: _write_stats(ts_last_processed, users) deferred.defer(deferred_stats, from_time=ts_last_processed or from_time, to_time=to_time) # Normal return prevents this task from being run again return except Exception as ex: logging.info(u"Exception in stats loop: {0}".format(ex)) # Avoid having the task retried raise deferred.PermanentTaskFailure() # Completed without incident logging.info( u"Normal completion of stats for {1} games and {0} users".format( len(users), cnt)) _write_stats(to_time, users)
def check_bq_job(job_id, item_ids, suggestions_key, page_token): bq = BigQueryClient() logging.info("Polling suggestion job {}.".format(job_id)) # TODO: catch 404 errors for jobs created 24+ hours ago, retry with new jobid try: bq_json = bq.get_async_job_results(job_id, page_token, MAX_RESULTS_PER_SUGGESTIONS_QUERY) except Exception as e: logging.error("Error from BigQuery with item_id={}.".format(item_ids)) raise deferred.PermanentTaskFailure(e) if not bq_json['jobComplete']: logging.info("- job not completed yet.") deferred.defer(check_bq_job, job_id, item_ids, suggestions_key, "", _countdown=5) return if not 'rows' in bq_json: logging.error( u"Invalid json for BigQueryTable. Job for {} is probably " u"invalid (bad item_id?).\n" u"JSON:\n" u"{}".format(item_ids, bq_json)) raise deferred.PermanentTaskFailure("No rows in BigQuery response for " "{}.".format(item_ids)) table = BigQueryTable(bq_json) item_ids_array = item_ids.split('|') # Get the consolidated book for each item_id suggestions = suggestions_key.get() assert isinstance(suggestions, SuggestionsRecord) for row in table.data: item_id = row[0] prediction = float(row[1]) if item_id in item_ids_array: continue # Original book. consolidated_book_key = BookRecord.query( BookRecord.item_id_array == item_id).get(keys_only=True) if not consolidated_book_key: logging.info( "No consolidated book with item_id '{}' found.".format( item_id)) continue if not consolidated_book_key in suggestions.books: suggestions.books.append(consolidated_book_key) suggestions.books_prediction.append(prediction) if len(suggestions.books) >= 1000: break next_page_token = bq_json.get('pageToken', "") if next_page_token != "" and len(suggestions.books) < 1000: suggestions.put() deferred.defer(check_bq_job, job_id, item_ids, suggestions_key, next_page_token) logging.info("Suggestions for item_ids '{}' partly fetched. " "Running again.".format(item_ids)) else: suggestions.completed = True suggestions.json_generation_started = True suggestions.put() logging.info( "Suggestions for item_ids '{}' completed and saved.".format( item_ids)) deferred.defer(create_suggestions_json, suggestions.key)