def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) filtered_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) full_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break full_records = self.get_person_records_with_notes(repo, persons) full_writer.write(full_records) filtered_records = copy.deepcopy(full_records) utils.filter_sensitive_fields(filtered_records) filtered_writer.write(filtered_records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) for kind, writer in [ ('filtered', filtered_writer), ('full', full_writer)]: base_name = '%s-persons-%s-%s' % ( repo, kind, timestamp.strftime('%Y-%m-%d-%H%M%S')) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object( final_csv_name, 'text/csv', writer.io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object( temp_csv_name, 'text/csv', writer.io.getvalue()) self.storage.compose_objects( [final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: key = 'latest_%s_csv_object_name' % kind config.set_for_repo(repo, **{key: final_csv_name}) if not scan_completed: self.schedule_next_task(query.cursor(), timestamp)
def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) filtered_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) full_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break full_records = self.get_person_records_with_notes(repo, persons) full_writer.write(full_records) filtered_records = copy.deepcopy(full_records) utils.filter_sensitive_fields(filtered_records) filtered_writer.write(filtered_records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) for kind, writer in [('filtered', filtered_writer), ('full', full_writer)]: base_name = '%s-persons-%s-%s' % ( repo, kind, timestamp.strftime('%Y-%m-%d-%H%M%S')) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object(final_csv_name, 'text/csv', writer.io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object(temp_csv_name, 'text/csv', writer.io.getvalue()) self.storage.compose_objects([final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: key = 'latest_%s_csv_object_name' % kind config.set_for_repo(repo, **{key: final_csv_name}) if not scan_completed: self.schedule_next_task(query.cursor(), timestamp)
def setUp(self): set_utcnow_for_test(datetime(2010, 1, 1)) self.p1 = model.Person.create_original( 'haiti', first_name='John', last_name='Smith', home_street='Washington St.', home_city='Los Angeles', home_state='California', home_postal_code='11111', home_neighborhood='Good Neighborhood', author_name='Alice Smith', author_phone='111-111-1111', author_email='*****@*****.**', source_url='https://www.source.com', source_date=datetime(2010, 1, 1), source_name='Source Name', entry_date=datetime(2010, 1, 1), expiry_date=datetime(2010, 2, 1), other='') self.p2 = model.Person.create_original( 'haiti', first_name='Tzvika', last_name='Hartman', home_street='Herzl St.', home_city='Tel Aviv', home_state='Israel', entry_date=datetime(2010, 1, 1), expiry_date=datetime(2010, 3, 1), other='') self.key_p1 = db.put(self.p1) self.key_p2 = db.put(self.p2) self.n1_1 = model.Note.create_original( 'haiti', person_record_id=self.p1.record_id, linked_person_record_id=self.p2.record_id, status=u'believed_missing', found=False, entry_date=get_utcnow(), source_date=datetime(2000, 1, 1)) self.n1_2 = model.Note.create_original( 'haiti', person_record_id=self.p1.record_id, found=True, entry_date=get_utcnow(), source_date=datetime(2000, 2, 2)) self.key_n1_1 = db.put(self.n1_1) self.key_n1_2 = db.put(self.n1_2) # Update the Person entity according to the Note. self.p1.update_from_note(self.n1_1) self.p1.update_from_note(self.n1_2) db.put(self.p1)
def get(self): utcnow_before_change = get_utcnow() utcnow = self.params.utcnow if self.is_test_mode(): try: logging.info('Setting utcnow to %r' % utcnow) set_utcnow_for_test(utcnow) self.render('templates/set_utcnow.html', utcnow=get_utcnow(), utcbefore=utcnow_before_change) except Exception, e: # bad param. return self.error(400, 'bad timestamp %s, e=%s' % (utcnow, e))
def test_set_utcnow_for_test(self): max_delta = datetime.timedelta(0, 0, 100) utcnow = datetime.datetime.utcnow() utilsnow = utils.get_utcnow() # max sure we're getting the current time. assert (utilsnow - utcnow) < max_delta # now set the utils time. test_time = datetime.datetime(2011, 1, 1, 0, 0) utils.set_utcnow_for_test(test_time) assert utils.get_utcnow() == test_time # now unset. utils.set_utcnow_for_test(None) assert utils.get_utcnow() assert utils.get_utcnow() != test_time
def test_set_utcnow_for_test(self): max_delta = datetime.timedelta(0,0,100) utcnow = datetime.datetime.utcnow() utilsnow = utils.get_utcnow() # max sure we're getting the current time. assert (utilsnow - utcnow) < max_delta # now set the utils time. test_time = datetime.datetime(2011, 1, 1, 0, 0) utils.set_utcnow_for_test(test_time) assert utils.get_utcnow() == test_time # now unset. utils.set_utcnow_for_test(None) assert utils.get_utcnow() assert utils.get_utcnow() != test_time
def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time base_name = '%s-persons-%s' % (repo, timestamp.strftime('%Y-%m-%d-%H%M%S')) is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) csv_io = StringIO.StringIO() writer = record_writer.PersonWithNoteCsvWriter(csv_io, write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break records = self.get_person_records_with_notes(repo, persons) # So far it only supports dump of records without sensitive fields. utils.filter_sensitive_fields(records) writer.write(records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object(final_csv_name, 'text/csv', csv_io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object(temp_csv_name, 'text/csv', csv_io.getvalue()) self.storage.compose_objects([final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: config.set_for_repo(repo, latest_csv_object_name=final_csv_name) else: self.schedule_next_task(query.cursor(), timestamp)
def past_due_records(repo): """Returns a query for all Person records with expiry_date in the past, or None, regardless of their is_expired flags.""" import utils return Person.all(filter_expired=False).filter( 'expiry_date <=', utils.get_utcnow()).filter( 'repo =', repo)
def create_note(repo, fields): """Creates a Note entity in the given repository with the given field values. If 'fields' contains a 'note_record_id', calling put() on the resulting entity will overwrite any existing (original or clone) record with the same note_record_id. Otherwise, a new original note record is created in the given repository.""" assert strip(fields.get('person_record_id')), 'person_record_id is required' assert strip(fields.get('source_date')), 'source_date is required' note_fields = dict( person_record_id=strip(fields['person_record_id']), linked_person_record_id=strip(fields.get('linked_person_record_id')), author_name=strip(fields.get('author_name')), author_email=strip(fields.get('author_email')), author_phone=strip(fields.get('author_phone')), source_date=validate_datetime(fields.get('source_date')), status=validate_status(fields.get('status')), author_made_contact=validate_boolean(fields.get('author_made_contact')), email_of_found_person=strip(fields.get('email_of_found_person')), phone_of_found_person=strip(fields.get('phone_of_found_person')), last_known_location=strip(fields.get('last_known_location')), text=fields.get('text'), photo_url=fields.get('photo_url'), entry_date=get_utcnow(), ) record_id = strip(fields.get('note_record_id')) if record_id: # create a record that might overwrite an existing one if is_clone(repo, record_id): return Note.create_clone(repo, record_id, **note_fields) else: return Note.create_original_with_record_id( repo, record_id, **note_fields) else: # create a new original record return Note.create_original(repo, **note_fields)
def add(self, key, value, time_to_live_in_seconds): """Adds the key/value pair to cache and updates the expiry time. If key already exists, its value and expiry are updated.""" expiry = utils.get_utcnow() + timedelta(seconds=time_to_live_in_seconds) self.storage[key] = (value, expiry) self.items_count += 1 self.max_items += 1
def create_note(subdomain, fields): """Creates a Note entity in the given subdomain's repository with the given field values. If 'fields' contains a 'note_record_id', calling put() on the resulting entity will overwrite any existing (original or clone) record with the same note_record_id. Otherwise, a new original note record is created in the given subdomain.""" assert strip( fields.get('person_record_id')), 'person_record_id is required' assert strip(fields.get('source_date')), 'source_date is required' note_fields = dict( person_record_id=strip(fields['person_record_id']), linked_person_record_id=strip(fields.get('linked_person_record_id')), author_name=strip(fields.get('author_name')), author_email=strip(fields.get('author_email')), author_phone=strip(fields.get('author_phone')), source_date=validate_datetime(fields.get('source_date')), status=validate_status(fields.get('status')), found=validate_boolean(fields.get('found')), email_of_found_person=strip(fields.get('email_of_found_person')), phone_of_found_person=strip(fields.get('phone_of_found_person')), last_known_location=strip(fields.get('last_known_location')), text=fields.get('text'), entry_date=get_utcnow(), ) record_id = strip(fields.get('note_record_id')) if record_id: # create a record that might overwrite an existing one if is_clone(subdomain, record_id): return Note.create_clone(subdomain, record_id, **note_fields) else: return Note.create_original_with_record_id(subdomain, record_id, **note_fields) else: # create a new original record return Note.create_original(subdomain, **note_fields)
def get(self): if self.repo: query = self.query() if self.params.cursor: query.with_cursor(self.params.cursor) cursor = self.params.cursor try: for person in query: # query.cursor() returns a cursor which returns the entity # next to this "person" as the first result. next_cursor = query.cursor() was_expired = person.is_expired person.put_expiry_flags() if (utils.get_utcnow() - person.get_effective_expiry_date() > EXPIRED_TTL): person.wipe_contents() else: # treat this as a regular deletion. if person.is_expired and not was_expired: delete.delete_person(self, person) cursor = next_cursor except runtime.DeadlineExceededError: self.schedule_next_task(cursor) except datastore_errors.Timeout: # This exception is sometimes raised, maybe when the query # object live too long? self.schedule_next_task(cursor) else: for repo in model.Repo.list(): self.add_task_for_repo(repo, self.task_name(), self.ACTION)
def post(self): note = model.Note.get(self.repo, self.params.id) if not note: return self.error(400, 'No note with ID: %r' % self.params.id) captcha_response = note.hidden and self.get_captcha_response() if not note.hidden or captcha_response.is_valid or self.env.test_mode: note.hidden = not note.hidden # When "hidden" changes, update source_date and entry_date (melwitt) # http://code.google.com/p/googlepersonfinder/issues/detail?id=58 now = utils.get_utcnow() note.source_date = now note.entry_date = now db.put(note) model.UserActionLog.put_new( (note.hidden and 'hide') or 'unhide', note, self.request.get('reason_for_report', '')) person = model.Person.get(self.repo, note.person_record_id) if person: person.update_latest_status(note) self.redirect(self.get_url('/view', id=note.person_record_id, signature=self.params.signature)) elif not captcha_response.is_valid: captcha_html = self.get_captcha_html(captcha_response.error_code) self.render('flag_note.html', note=note, captcha_html=captcha_html, signature=self.params.signature)
def validate_expired_records_removed(self): """Validates that if the current time is at least one day greater than any person's expiry_date, all fields other than person_record_id, expiry_date, source_date, and entry_date must be empty or omitted. Also, source_date and entry_date must be the time that the placeholder was created. Returns a list with the person_record_ids of any persons that violate those conditions""" messages = [] if self.version >= 1.3: persons = self.tree.get_all_persons() top_level_notes_by_person = self.get_top_level_notes_by_person() for person in persons: expiry_date = self.get_expiry_datetime(person) curr_date = utils.get_utcnow() # if the record is expired if expiry_date != None and expiry_date < curr_date: # the person itself can't have data messages.extend( self.validate_personal_data_removed(person)) # the placeholder dates must match messages.extend( self.validate_placeholder_dates(person, expiry_date)) # top level notes associated with the expired person can't have data associated_notes = top_level_notes_by_person.get( self.tree.get_field_text(person, 'person_record_id'), []) for note in associated_notes: messages.extend( self.validate_personal_data_removed(note)) return messages
def put_expiry_flags(self): """Updates the is_expired flags on this Person and related Notes to make them consistent with the effective_expiry_date() on this Person, and commits the changes to the datastore.""" import utils now = utils.get_utcnow() expired = self.get_effective_expiry_date() <= now if self.is_expired != expired: # NOTE: This should be the ONLY code that modifies is_expired. self.is_expired = expired # if we neglected to capture the original_creation_date, # make a best effort to grab it now, for posterity. if not self.original_creation_date: self.original_creation_date = self.source_date # If the record is expiring (being replaced with a placeholder, # see http://zesty.ca/pfif/1.3/#data-expiry) or un-expiring (being # restored from deletion), we want the source_date and entry_date # updated so downstream clients will see this as the newest state. self.source_date = now self.entry_date = now # All the Notes on the Person also expire or unexpire, to match. notes = self.get_notes(filter_expired=False) for note in notes: note.is_expired = expired # Store these changes in the datastore. db.put(notes + [self])
def record_action( repo, api_key, version, action, person_records, note_records, people_skipped, notes_skipped, user_agent, ip_address, request_url, timestamp=None, ): import utils try: ApiActionLog( repo=repo, api_key=api_key, action=action, person_records=person_records, note_records=note_records, people_skipped=people_skipped, notes_skipped=notes_skipped, user_agent=user_agent, ip_address=ip_address, request_url=request_url, version=version, timestamp=timestamp or utils.get_utcnow(), ).put() except Exception: # swallow anything to prevent the main action from failing. pass
def validate_expired_records_removed(self): """Validates that if the current time is at least one day greater than any person's expiry_date, all fields other than person_record_id, expiry_date, source_date, and entry_date must be empty or omitted. Also, source_date and entry_date must be the time that the placeholder was created. Returns a list with the person_record_ids of any persons that violate those conditions""" messages = [] if self.version >= 1.3: persons = self.tree.get_all_persons() top_level_notes_by_person = self.get_top_level_notes_by_person() for person in persons: expiry_date = self.get_expiry_datetime(person) curr_date = utils.get_utcnow() # if the record is expired if expiry_date != None and expiry_date < curr_date: # the person itself can't have data messages.extend(self.validate_personal_data_removed(person)) # the placeholder dates must match messages.extend(self.validate_placeholder_dates(person, expiry_date)) # top level notes associated with the expired person can't have data associated_notes = top_level_notes_by_person.get( self.tree.get_field_text(person, 'person_record_id'), []) for note in associated_notes: messages.extend(self.validate_personal_data_removed(note)) return messages
def put_new(cls, action, entity, detail='', ip_address='', copy_properties=True): """Adds an entry to the UserActionLog. 'action' is the action that the user performed, 'entity' is the entity that was operated on, and 'detail' is a string containing any other details.""" import utils kind = entity.kind() entry = cls(time=utils.get_utcnow(), repo=entity.repo, action=action, entity_kind=kind, entity_key_name=entity.key().name(), detail=detail, ip_address=ip_address) # copy the properties of the entity if copy_properties: for name in entity.properties(): value = getattr(entity, name) if isinstance(value, db.Model): value = value.key() setattr(entry, kind + '_' + name, value) entry.put()
def add_feed_elements(self, root): ET.SubElement(root, 'id').text = self.build_absolute_uri() ET.SubElement(root, 'title').text = RepoFeedView._TITLE if self.env.repo == 'global': repos = model.Repo.all().filter( 'activation_status !=', model.Repo.ActivationStatus.STAGING) else: repo = model.Repo.get(self.env.repo) if repo.activation_status == model.Repo.ActivationStatus.ACTIVE: repos = [repo] else: raise django.http.Http404() repo_confs = {} for repo in repos: repo_id = repo.key().name() repo_conf = config.Configuration(repo_id, include_global=False) repo_confs[repo_id] = repo_conf updated_dates = [conf.updated_date for conf in repo_confs.values()] # If there's no non-staging repositories, it's not really clear what # updated_date should be; we just use the current time. latest_updated_date = (max(updated_dates) if updated_dates else utils.get_utcnow()) ET.SubElement( root, 'updated').text = utils.format_utc_timestamp(latest_updated_date) for repo in repos: if repo.activation_status == model.Repo.ActivationStatus.ACTIVE: self._add_repo_entry(root, repo, repo_confs[repo.key().name()])
def record_action(repo, api_key, version, action, person_records, note_records, people_skipped, notes_skipped, user_agent, ip_address, request_url, timestamp=None): import utils try: ApiActionLog(repo=repo, api_key=api_key, action=action, person_records=person_records, note_records=note_records, people_skipped=people_skipped, notes_skipped=notes_skipped, user_agent=user_agent, ip_address=ip_address, request_url=request_url, version=version, timestamp=timestamp or utils.get_utcnow()).put() except Exception: # swallow anything to prevent the main action from failing. pass
def get(self): if self.repo: # To reuse the cursor from the previous task, we need to apply # exactly the same filter. So we use utcnow previously used # instead of the current time. utcnow = self.params.utcnow or utils.get_utcnow() max_entry_date = ( utcnow - datetime.timedelta( seconds=CleanUpInTestMode.DELETION_AGE_SECONDS)) query = model.Person.all_in_repo(self.repo) query.filter('entry_date <=', max_entry_date) if self.params.cursor: query.with_cursor(self.params.cursor) # Uses query.get() instead of "for person in query". # If we use for-loop, query.cursor() points to an unexpected # position. person = query.get() # When the repository is no longer in test mode, aborts the # deletion. while person and self.in_test_mode(self.repo): person.delete_related_entities(delete_self=True) if quota.get_request_cpu_usage() > CPU_MEGACYCLES_PER_REQUEST: # Stop before running into the hard limit on CPU time per # request, to avoid aborting in the middle of an operation. # Add task back in, restart at current spot: self.schedule_next_task(query, utcnow) break person = query.get() else: for repo in model.Repo.list(): if self.in_test_mode(repo): self.add_task_for_repo(repo, self.task_name(), self.ACTION)
def put_expiry_flags(self): """Updates the is_expired flags on this Person and related Notes to make them consistent with the expiry_date on this Person, and commits these changes to the datastore.""" now = utils.get_utcnow() expired = self.expiry_date and now >= self.expiry_date if self.is_expired != expired: # NOTE: This should be the ONLY code that modifies is_expired. self.is_expired = expired # If the record is expiring (being replaced with a placeholder, # see http://zesty.ca/pfif/1.3/#data-expiry) or un-expiring (being # restored from deletion), we want the source_date and entry_date # updated so downstream clients will see this as the newest state. self.source_date = now self.entry_date = now # All the Notes on the Person also expire or unexpire, to match. notes = self.get_notes(filter_expired=False) for note in notes: note.is_expired = expired # Store these changes in the datastore. db.put(notes + [self])
def post(self): note = model.Note.get(self.repo, self.params.id) if not note: return self.error(400, 'No note with ID: %r' % self.params.id) captcha_response = note.hidden and self.get_captcha_response() if not note.hidden or captcha_response.is_valid: note.hidden = not note.hidden # When "hidden" changes, update source_date and entry_date (melwitt) # http://code.google.com/p/googlepersonfinder/issues/detail?id=58 now = utils.get_utcnow() note.source_date = now note.entry_date = now db.put(note) model.UserActionLog.put_new( (note.hidden and 'hide') or 'unhide', note, self.request.get('reason_for_report', '')) person = model.Person.get(self.repo, note.person_record_id) if person: person.update_latest_status(note) self.redirect(self.get_url('/view', id=note.person_record_id, signature=self.params.signature)) elif not captcha_response.is_valid: captcha_html = self.get_captcha_html(captcha_response.error_code) self.render('flag_note.html', note=note, captcha_html=captcha_html, signature=self.params.signature)
def potentially_expired_records(repo, days_to_expire=DEFAULT_EXPIRATION_DAYS): """Returns a query for all Person records with source date older than days_to_expire (or empty source_date), regardless of is_expired flags value.""" import utils cutoff_date = utils.get_utcnow() - timedelta(days_to_expire) return Person.all(filter_expired=False).filter("source_date <=", cutoff_date).filter("repo =", repo)
def add_task(self, subdomain): """Queues up a task for an individual subdomain.""" timestamp = utils.get_utcnow().strftime('%Y%m%d-%H%M%S') task_name = '%s-%s-%s' % (subdomain, self.SCAN_NAME, timestamp) taskqueue.add(name=task_name, method='GET', url=self.URL, params={'subdomain': subdomain})
def get(self, request, *args, **kwargs): del request, args, kwargs # Unused. if not self.params.id: return self.error(400) person = model.Person.get(self.env.repo, self.params.id) if not person: return self.error(404) # Check if it's expired, just in case the expiry cron hasn't gotten to # it yet. if person.expiry_date and person.expiry_date < utils.get_utcnow(): return self.error(404) notes = [{ 'note_record_id': note.record_id, 'source_date': self._js_date(note.source_date), 'author_name': note.author_name, 'author_made_contact': note.author_made_contact, 'status': note.status, 'text': note.text, } for note in person.unexpired_notes] data = { 'name': person.full_name, # TODO(nworden): maybe change the UI to handle an empty string 'sex': person.sex or None, 'age': person.fuzzified_age, 'home_city': person.home_city, 'home_state': person.home_state, 'home_country': person.home_country, 'description': person.description, 'profile_pages': view.get_profile_pages(person.profile_urls, self.env.config, self.build_absolute_uri), 'author_name': person.author_name, 'author_email': person.author_email, 'author_phone': person.author_phone, 'source_date': self._js_date(person.source_date), 'source_name': person.source_name, 'notes': notes, } if person.photo: data['localPhotoUrl'] = person.photo_url elif person.photo_url: data['externalPhotoUrl'] = person.photo_url return self._json_response(data)
def potentially_expired_records(repo, days_to_expire=DEFAULT_EXPIRATION_DAYS): """Returns a query for all Person records with source date older than days_to_expire (or empty source_date), regardless of is_expired flags value.""" import utils cutoff_date = utils.get_utcnow() - timedelta(days_to_expire) return Person.all(filter_expired=False).filter('source_date <=', cutoff_date).filter( 'repo =', repo)
def _get_user_admin_permission(self): user_repo_admin_object = admin_acls_model.AdminPermission.get( self.env.repo, self.env.user.email()) if (user_repo_admin_object and user_repo_admin_object.expiration_date < utils.get_utcnow()): user_repo_admin_object = None user_global_admin_object = admin_acls_model.AdminPermission.get( 'global', self.env.user.email()) if (user_global_admin_object and user_global_admin_object.expiration_date < utils.get_utcnow()): user_global_admin_object = None if user_repo_admin_object is None: return user_global_admin_object if user_global_admin_object is None: return user_repo_admin_object if user_repo_admin_object.compare_level_to( user_global_admin_object.access_level) > 0: return user_repo_admin_object return user_global_admin_object
def create_person(repo, fields): """Creates a Person entity in the given repository with the given field values. If 'fields' contains a 'person_record_id', calling put() on the resulting entity will overwrite any existing (original or clone) record with the same person_record_id. Otherwise, a new original person record is created in the given repository.""" person_fields = dict( entry_date=get_utcnow(), expiry_date=validate_datetime(fields.get('expiry_date')), author_name=strip(fields.get('author_name')), author_email=strip(fields.get('author_email')), author_phone=strip(fields.get('author_phone')), source_name=strip(fields.get('source_name')), source_url=strip(fields.get('source_url')), source_date=validate_datetime(fields.get('source_date')), full_name=strip(fields.get('full_name')), given_name=strip(fields.get('given_name')), family_name=strip(fields.get('family_name')), alternate_names=strip(fields.get('alternate_names')), description=strip(fields.get('description')), sex=validate_sex(fields.get('sex')), date_of_birth=validate_approximate_date(fields.get('date_of_birth')), age=validate_age(fields.get('age')), home_street=strip(fields.get('home_street')), home_neighborhood=strip(fields.get('home_neighborhood')), home_city=strip(fields.get('home_city')), home_state=strip(fields.get('home_state')), home_postal_code=strip(fields.get('home_postal_code')), home_country=strip(fields.get('home_country')), photo_url=strip(fields.get('photo_url')), profile_urls=strip(fields.get('profile_urls')), ) # For PFIF 1.3 or older, populate full_name (it was an optional field # before), using given_name and family_name if it is empty. if not person_fields['full_name'].strip(): person_fields['full_name'] = get_full_name( person_fields['given_name'], person_fields['family_name'], config.Configuration(repo)) # TODO(liuhsinwen): Separate existed and non-existed record id and # increment person counter for new records record_id = strip(fields.get('person_record_id')) if record_id: # create a record that might overwrite an existing one if is_clone(repo, record_id): return Person.create_clone(repo, record_id, **person_fields) else: return Person.create_original_with_record_id( repo, record_id, **person_fields) else: # create a new original record # TODO(liuhsinwen): fix performance problem by incrementing the counter # by the number of upload records # UsageCounter.increment_person_counter(repo) return Person.create_original(repo, **person_fields)
def setUp(self): logging.basicConfig(level=logging.INFO, stream=sys.stderr) self.mox = None # Setup cheerfully stolen from test_model. set_utcnow_for_test(datetime.datetime(2010, 1, 1)) self.photo = model.Photo.create('haiti', image_data='xyz') self.photo.put() self.photo_key = self.photo.key() self.p1 = model.Person.create_original( 'haiti', given_name='John', family_name='Smith', home_street='Washington St.', home_city='Los Angeles', home_state='California', home_postal_code='11111', home_neighborhood='Good Neighborhood', author_name='Alice Smith', author_phone='111-111-1111', author_email='*****@*****.**', photo_url='', photo=self.photo, source_url='https://www.source.com', source_date=datetime.datetime(2010, 1, 1), source_name='Source Name', entry_date=datetime.datetime(2010, 1, 1), expiry_date=datetime.datetime(2010, 2, 1), other='') self.p2 = model.Person.create_original( 'haiti', given_name='Tzvika', family_name='Hartman', home_street='Herzl St.', home_city='Tel Aviv', home_state='Israel', source_date=datetime.datetime(2010, 1, 1), entry_date=datetime.datetime(2010, 1, 1), expiry_date=datetime.datetime(2010, 3, 1), other='') self.key_p1 = db.put(self.p1) self.key_p2 = db.put(self.p2) self.n1_1 = model.Note.create_original( 'haiti', person_record_id=self.p1.record_id, linked_person_record_id=self.p2.record_id, status=u'believed_missing', author_made_contact=False, entry_date=get_utcnow(), source_date=datetime.datetime(2010, 1, 2)) self.note_id = self.n1_1.note_record_id db.put(self.n1_1) self.to_delete = [self.p1, self.p2, self.n1_1, self.photo]
def _check_person(self, person): # Check things that were expired yesterday, just in case this job is # ahead of the deletion job. yesterday = utils.get_utcnow() - datetime.timedelta(days=1) if not (person.expiry_date and person.expiry_date < yesterday): return for name, prop in person.properties().items(): if name not in ['repo', 'is_expired', 'original_creation_date', 'source_date', 'entry_date', 'expiry_date', 'last_modified']: if getattr(person, name) != prop.default: self.alert( 'An expired person record still has data (%s, %s).' % (person.record_id, name))
def get(self): query = model.Person.past_due_records() for person in query: if quota.get_request_cpu_usage() > CPU_MEGACYCLES_PER_REQUEST: # Stop before running into the hard limit on CPU time per # request, to avoid aborting in the middle of an operation. # TODO(kpy): Figure out whether to queue another task here. # Is it safe for two tasks to run in parallel over the same # set of records returned by the query? break person.put_expiry_flags() if (person.expiry_date and utils.get_utcnow() - person.expiry_date > EXPIRED_TTL): person.wipe_contents()
def _check_person(self, person): # Check things that were expired yesterday, just in case this job is # ahead of the deletion job. yesterday = utils.get_utcnow() - datetime.timedelta(days=1) if not (person.expiry_date and person.expiry_date < yesterday): return for name, prop in person.properties().items(): if name not in [ 'repo', 'is_expired', 'original_creation_date', 'source_date', 'entry_date', 'expiry_date', 'last_modified' ]: if getattr(person, name) != prop.default: self.alert( 'An expired person record still has data (%s, %s).' % (person.record_id, name))
def sign_url(self, object_name, url_lifetime): """ Generates Cloud Storage signed URL to download Google Cloud Storage object without sign in. See: https://cloud.google.com/storage/docs/access-control/signed-urls This only works on a real App Engine app, not in a dev app server. Args: object_name (str): The name of the object which is signed. url_lifetime (datetime.timedelta): Lifetime of the signed URL. The server rejects any requests received after this time from now. """ if utils.is_dev_app_server(): # Not working on a dev app server because it doesn't support # app_identity.sign_blob(). An alternative implementation would # be needed to make it work on a dev app server. raise Exception( 'sign_url only works on a real App Engine app, not on a dev ' 'app server.') method = 'GET' expiration_time = utils.get_utcnow() + url_lifetime expiration_sec = int(time.mktime(expiration_time.timetuple())) path = '/%s/%s' % (self.bucket_name, object_name) # These are unused in our use case. content_md5 = '' content_type = '' signed_text = '\n'.join([ method, content_md5, content_type, str(expiration_sec), path, ]) (_, signature) = app_identity.sign_blob(signed_text.encode('utf-8')) query_params = { 'GoogleAccessId': app_identity.get_service_account_name(), 'Expires': str(expiration_sec), 'Signature': base64.b64encode(signature), } return 'https://storage.googleapis.com%s?%s' % ( path, urllib.urlencode(query_params))
def expire_person(person): """Expires a person record and associated data.""" person_text = person_to_text(person) if person.is_original(): # Set the expiry_date to now, and set is_expired flags to match. # (The externally visible result will be as if we overwrote the # record with an expiry date and blank fields.) person.expiry_date = utils.get_utcnow() person.put_expiry_flags() logging.info('Expired: %s' % person_text) else: # For a clone record, we don't have authority to change the # expiry_date, so we just delete the record now. (The externally # visible result will be as if we had never received a copy of it.) person.delete_related_entities(delete_self=True) logging.info('Deleted completely: %s' % person_text)
def get_effective_expiry_date(self): """Gets the expiry_date, or if no expiry_date is present, returns the source_date plus the configurable default_expiration_days interval. If there's no source_date, we use original_creation_date. Returns: A datetime date (not None). """ if self.expiry_date: return self.expiry_date else: expiration_days = config.get_for_repo(self.repo, "default_expiration_days") or (DEFAULT_EXPIRATION_DAYS) # in theory, we should always have original_creation_date, but since # it was only added recently, we might have legacy # records without it. start_date = self.source_date or self.original_creation_date or utils.get_utcnow() return start_date + timedelta(expiration_days)
def sign_url(self, object_name, url_lifetime): """ Generates Cloud Storage signed URL to download Google Cloud Storage object without sign in. See: https://cloud.google.com/storage/docs/access-control/signed-urls This only works on a real App Engine app, not in a dev app server. Args: object_name (str): The name of the object which is signed. url_lifetime (datetime.timedelta): Lifetime of the signed URL. The server rejects any requests received after this time from now. """ if utils.is_dev_app_server(): # Not working on a dev app server because it doesn't support # app_identity.sign_blob(). An alternative implementation would # be needed to make it work on a dev app server. raise Exception( 'sign_url only works on a real App Engine app, not on a dev ' 'app server.') method = 'GET' expiration_time = utils.get_utcnow() + url_lifetime expiration_sec = int(time.mktime(expiration_time.timetuple())) path = '/%s/%s' % (self.bucket_name, object_name) # These are unused in our use case. content_md5 = '' content_type = '' signed_text = '\n'.join([ method, content_md5, content_type, str(expiration_sec), path, ]) (_, signature) = app_identity.sign_blob(signed_text.encode('utf-8')) query_params = { 'GoogleAccessId': app_identity.get_service_account_name(), 'Expires': str(expiration_sec), 'Signature': base64.b64encode(signature), } return 'https://storage.googleapis.com%s?%s' % (path, urllib.urlencode(query_params))
def delete_person(self, person): """Delete a person record and associated data. If it's an original record, deletion can be undone within EXPIRED_TTL_DAYS days.""" if person.is_original(): # For an original record, set the expiry date and send notifiations # to all the related e-mail addresses offering an undelete link. # (The externally visible result will be as if we overwrote the # record with an expiry date and blank fields.) # i18n: Subject line of an e-mail message notifying a user # i18n: that a person record has been deleted subject = _( '[Person Finder] Deletion notice for ' '"%(first_name)s %(last_name)s"' ) % {'first_name': person.first_name, 'last_name': person.last_name} # Send e-mail to all the addresses notifying them of the deletion. for email in person.get_associated_emails(): if email == person.author_email: template_name = 'deletion_email_for_person_author.txt' else: template_name = 'deletion_email_for_note_author.txt' self.send_mail( subject=subject, to=email, body=self.render_to_string( template_name, first_name=person.first_name, last_name=person.last_name, site_url=self.get_url('/'), days_until_deletion=EXPIRED_TTL_DAYS, restore_url=self.get_restore_url(person) ) ) # Set the expiry_date to now, and set is_expired flags to match. person.expiry_date = utils.get_utcnow() person.put_expiry_flags() else: # For a clone record, we don't have authority to change the # expiry_date, so we just delete the record now. (The externally # visible result will be as if we had never received a copy of it.) db.delete([person] + person.get_notes(filter_expired=False))
def read(self, key, default=None): """Gets the value corresponding to the key from cache. If cache entry has expired, it is deleted from the cache and None is returned.""" value, expiry = self.storage.get(key, (None, 0)) if value is None : self.miss_count += 1 return default now = utils.get_utcnow() if (expiry > now) : self.hit_count += 1 return value else: # Stale cache entry. Evicting from cache self.delete(key) self.evict_count += 1 self.miss_count += 1 return default
def read(self, key, default=None): """Gets the value corresponding to the key from cache. If cache entry has expired, it is deleted from the cache and None is returned.""" value, expiry = self.storage.get(key, (None, 0)) if value is None: self.miss_count += 1 return default now = utils.get_utcnow() if (expiry > now): self.hit_count += 1 return value else: # Stale cache entry. Evicting from cache self.delete(key) self.evict_count += 1 self.miss_count += 1 return default
def get_effective_expiry_date(self): """Gets the expiry_date, or if no expiry_date is present, returns the source_date plus the configurable default_expiration_days interval. If there's no source_date, we use original_creation_date. Returns: A datetime date (not None). """ if self.expiry_date: return self.expiry_date else: expiration_days = config.get_for_repo( self.repo, 'default_expiration_days') or (DEFAULT_EXPIRATION_DAYS) # in theory, we should always have original_creation_date, but since # it was only added recently, we might have legacy # records without it. start_date = self.original_creation_date or utils.get_utcnow() return start_date + timedelta(expiration_days)
def put_new(cls, action, entity, detail='', ip_address='', copy_properties=True): """Adds an entry to the UserActionLog. 'action' is the action that the user performed, 'entity' is the entity that was operated on, and 'detail' is a string containing any other details.""" import utils kind = entity.kind() entry = cls( time=utils.get_utcnow(), repo=entity.repo, action=action, entity_kind=kind, entity_key_name=entity.key().name(), detail=detail, ip_address=ip_address) # copy the properties of the entity if copy_properties: for name in entity.properties(): value = getattr(entity, name) if isinstance(value, db.Model): value = value.key() setattr(entry, kind + '_' + name, value) entry.put()
def delete_person(self, person): """Delete a person record and associated data. If it's an original record, deletion can be undone within EXPIRED_TTL_DAYS days.""" if person.is_original(): # For an original record, set the expiry date and send notifiations # to all the related e-mail addresses offering an undelete link. # (The externally visible result will be as if we overwrote the # record with an expiry date and blank fields.) # i18n: Subject line of an e-mail message notifying a user # i18n: that a person record has been deleted subject = _('[Person Finder] Deletion notice for ' '"%(first_name)s %(last_name)s"') % { 'first_name': person.first_name, 'last_name': person.last_name } # Send e-mail to all the addresses notifying them of the deletion. for email in person.get_associated_emails(): if email == person.author_email: template_name = 'deletion_email_for_person_author.txt' else: template_name = 'deletion_email_for_note_author.txt' self.send_mail(subject=subject, to=email, body=self.render_to_string( template_name, first_name=person.first_name, last_name=person.last_name, site_url=self.get_url('/'), days_until_deletion=EXPIRED_TTL_DAYS, restore_url=self.get_restore_url(person))) # Set the expiry_date to now, and set is_expired flags to match. person.expiry_date = utils.get_utcnow() person.put_expiry_flags() else: # For a clone record, we don't have authority to change the # expiry_date, so we just delete the record now. (The externally # visible result will be as if we had never received a copy of it.) db.delete([person] + person.get_notes(filter_expired=False))
def create_person(subdomain, fields): """Creates a Note entity in the given subdomain's repository with the given field values. If 'fields' contains a 'person_record_id', calling put() on the resulting entity will overwrite any existing (original or clone) record with the same person_record_id. Otherwise, a new original person record is created in the given subdomain.""" person_fields = dict( entry_date=get_utcnow(), expiry_date=validate_datetime(fields.get('expiry_date')), author_name=strip(fields.get('author_name')), author_email=strip(fields.get('author_email')), author_phone=strip(fields.get('author_phone')), source_name=strip(fields.get('source_name')), source_url=strip(fields.get('source_url')), source_date=validate_datetime(fields.get('source_date')), full_name=strip(fields.get('full_name')), first_name=strip(fields.get('first_name')), last_name=strip(fields.get('last_name')), sex=validate_sex(fields.get('sex')), date_of_birth=validate_approximate_date(fields.get('date_of_birth')), age=validate_age(fields.get('age')), home_street=strip(fields.get('home_street')), home_neighborhood=strip(fields.get('home_neighborhood')), home_city=strip(fields.get('home_city')), home_state=strip(fields.get('home_state')), # Fall back to 'home_zip' for backward compatibility with PFIF 1.1. home_postal_code=strip( fields.get('home_postal_code', fields.get('home_zip'))), home_country=strip(fields.get('home_country')), photo_url=strip(fields.get('photo_url')), other=fields.get('other') ) record_id = strip(fields.get('person_record_id')) if record_id: # create a record that might overwrite an existing one if is_clone(subdomain, record_id): return Person.create_clone(subdomain, record_id, **person_fields) else: return Person.create_original_with_record_id( subdomain, record_id, **person_fields) else: # create a new original record return Person.create_original(subdomain, **person_fields)
def delete_person(handler, person, send_notices=True): """Delete a person record and associated data. If it's an original record, deletion can be undone within EXPIRED_TTL_DAYS days.""" if person.is_original(): if send_notices: # For an original record, send notifiations # to all the related e-mail addresses offering an undelete link. send_delete_notice(handler, person) # Set the expiry_date to now, and set is_expired flags to match. # (The externally visible result will be as if we overwrote the # record with an expiry date and blank fields.) person.expiry_date = utils.get_utcnow() person.put_expiry_flags() else: # For a clone record, we don't have authority to change the # expiry_date, so we just delete the record now. (The externally # visible result will be as if we had never received a copy of it.) person.delete_related_entities(delete_self=True)
def _render_form(self): existing_acls = admin_acls_model.AdminPermission.get_for_repo( self.env.repo) editable_acls = [ acl for acl in existing_acls if self.env.user_admin_permission.compare_level_to(acl.access_level) >= 0] fixed_acls = [ acl for acl in existing_acls if self.env.user_admin_permission.compare_level_to(acl.access_level) < 0] default_expiration_date = ( utils.get_utcnow() + datetime.timedelta(days=365)) return self.render( 'admin_acls.html', editable_acls=editable_acls, fixed_acls=fixed_acls, default_expiration_date=default_expiration_date, xsrf_token=self.xsrf_tool.generate_token(self.env.user.user_id(), self.ACTION_ID))
def create_person(subdomain, fields): """Creates a Note entity in the given subdomain's repository with the given field values. If 'fields' contains a 'person_record_id', calling put() on the resulting entity will overwrite any existing (original or clone) record with the same person_record_id. Otherwise, a new original person record is created in the given subdomain.""" person_fields = dict( entry_date=get_utcnow(), expiry_date=validate_datetime(fields.get('expiry_date')), author_name=strip(fields.get('author_name')), author_email=strip(fields.get('author_email')), author_phone=strip(fields.get('author_phone')), source_name=strip(fields.get('source_name')), source_url=strip(fields.get('source_url')), source_date=validate_datetime(fields.get('source_date')), full_name=strip(fields.get('full_name')), first_name=strip(fields.get('first_name')), last_name=strip(fields.get('last_name')), sex=validate_sex(fields.get('sex')), date_of_birth=validate_approximate_date(fields.get('date_of_birth')), age=validate_age(fields.get('age')), home_street=strip(fields.get('home_street')), home_neighborhood=strip(fields.get('home_neighborhood')), home_city=strip(fields.get('home_city')), home_state=strip(fields.get('home_state')), # Fall back to 'home_zip' for backward compatibility with PFIF 1.1. home_postal_code=strip( fields.get('home_postal_code', fields.get('home_zip'))), home_country=strip(fields.get('home_country')), photo_url=strip(fields.get('photo_url')), other=fields.get('other')) record_id = strip(fields.get('person_record_id')) if record_id: # create a record that might overwrite an existing one if is_clone(subdomain, record_id): return Person.create_clone(subdomain, record_id, **person_fields) else: return Person.create_original_with_record_id( subdomain, record_id, **person_fields) else: # create a new original record return Person.create_original(subdomain, **person_fields)
def post(self, request, *args, **kwargs): del request, args, kwargs # unused q = self.get_query() cursor = self.params.get('cursor', '') if cursor: q.with_cursor(cursor) try: now = utils.get_utcnow() for item in q: next_cursor = q.cursor() associated_person = model.Person.get( self.env.repo, self.get_person_record_id(item)) if not associated_person: if now - self.get_base_timestamp(item) > _STRAY_CLEANUP_TTL: db.delete(item) cursor = next_cursor except runtime.DeadlineExceededError: self.schedule_task(self.env.repo, cursor=cursor) except datastore_errors.Timeout: self.schedule_task(self.env.repo, cursor=cursor) return django.http.HttpResponse('')
def post(self): # TODO: factor all this out somewhere shared person = Person.create_original( self.repo, entry_date=utils.get_utcnow(), family_name=self.params.family_name, given_name=self.params.given_name, age=self.params.age, sex=self.params.sex, home_city=self.params.home_city, home_state=self.params.home_state, home_country=self.params.home_country, ) if self.params.photo: p, photo_url = photo.create_photo(self.params.photo, self) p.put() person.photo = p person.photo_url = photo_url person.update_index(['old', 'new']) person.put_new() json = {'personId': person.record_id} self._return_json(json)