def test_estimate_spam_score(self): d = SpamDetector('foo, BAR') assert d.estimate_spam_score('a sentence with foo, bar') == 0.4 assert d.estimate_spam_score("It's a Foo day.") == 0.25 assert d.estimate_spam_score('x') == 0 assert d.estimate_spam_score('123') == 0 assert d.estimate_spam_score(' ,') == None assert d.estimate_spam_score('') == None
class Handler(BaseHandler): def get(self): self.params.create_mode = True profile_websites = [add_profile_icon_url(website, self) for website in self.config.profile_websites or []] self.render('create.html', profile_websites=profile_websites, profile_websites_json=simplejson.dumps(profile_websites), onload_function='view_page_loaded()') def post(self): now = get_utcnow() # Several messages here exceed the 80-column limit because django's # makemessages script can't handle messages split across lines. :( if self.config.use_family_name: if not (self.params.given_name and self.params.family_name): return self.error(400, _('The Given name and Family name are both required. Please go back and try again.')) else: if not self.params.given_name: return self.error(400, _('Name is required. Please go back and try again.')) # If user is inputting his/her own information, set some params automatically if self.params.own_info == 'yes': self.params.author_name = self.params.given_name self.params.status = 'is_note_author' self.params.author_made_contact = 'yes' if self.params.your_own_email: self.params.author_email = self.params.your_own_email if self.params.your_own_phone: self.params.author_phone = self.params.your_own_phone else: if not self.params.author_name: if self.params.clone: return self.error(400, _('The Original author\'s name is required. Please go back and try again.')) else: return self.error(400, _('Your name is required in the "Source" section. Please go back and try again.')) if self.params.add_note: if not self.params.text: return self.error(400, _('Message is required. Please go back and try again.')) if (self.params.status == 'is_note_author' and not self.params.author_made_contact): return self.error(400, _('Please check that you have been in contact with the person after the earthquake, or change the "Status of this person" field.')) if (self.params.status == 'believed_dead' and not self.config.allow_believed_dead_via_ui): return self.error(400, _('Not authorized to post notes with the status "I have received information that this person is dead".')) source_date = None if self.params.source_date: try: source_date = validate_date(self.params.source_date) except ValueError: return self.error(400, _('Original posting date is not in YYYY-MM-DD format, or is a nonexistent date. Please go back and try again.')) if source_date > now: return self.error(400, _('Date cannot be in the future. Please go back and try again.')) expiry_date = days_to_date(self.params.expiry_option or self.config.default_expiry_days) # If nothing was uploaded, just use the photo_url that was provided. photo, photo_url = (None, self.params.photo_url) note_photo, note_photo_url = (None, self.params.note_photo_url) try: # If a photo was uploaded, create a Photo entry and get the URL # where we serve it. if self.params.photo is not None: photo, photo_url = create_photo(self.params.photo, self) if self.params.note_photo is not None: note_photo, note_photo_url = \ create_photo(self.params.note_photo, self) except PhotoError, e: return self.error(400, e.message) # Finally, store the Photo. Past this point, we should NOT self.error. if photo: photo.put() if note_photo: note_photo.put() profile_urls = [] if self.params.profile_url1: profile_urls.append(self.params.profile_url1) if self.params.profile_url2: profile_urls.append(self.params.profile_url2) if self.params.profile_url3: profile_urls.append(self.params.profile_url3) # Person records have to have a source_date; if none entered, use now. source_date = source_date or now # Determine the source name, or fill it in if the record is original # (i.e. created for the first time here, not copied from elsewhere). source_name = self.params.source_name if not self.params.clone: # record originated here if self.params.referrer: source_name = "%s (referred by %s)" % (self.env.netloc, self.params.referrer) else: source_name = self.env.netloc person = Person.create_original( self.repo, entry_date=now, expiry_date=expiry_date, given_name=self.params.given_name, family_name=self.params.family_name, full_name=get_full_name(self.params.given_name, self.params.family_name, self.config), alternate_names=get_full_name(self.params.alternate_given_names, self.params.alternate_family_names, self.config), description=self.params.description, sex=self.params.sex, date_of_birth=self.params.date_of_birth, age=self.params.age, home_street=self.params.home_street, home_city=self.params.home_city, home_state=self.params.home_state, home_postal_code=self.params.home_postal_code, home_neighborhood=self.params.home_neighborhood, home_country=self.params.home_country, profile_urls='\n'.join(profile_urls), author_name=self.params.author_name, author_phone=self.params.author_phone, author_email=self.params.author_email, source_url=self.params.source_url, source_date=source_date, source_name=source_name, photo=photo, photo_url=photo_url ) person.update_index(['old', 'new']) if self.params.add_note: spam_detector = SpamDetector(self.config.bad_words) spam_score = spam_detector.estimate_spam_score(self.params.text) if (spam_score > 0): note = NoteWithBadWords.create_original( self.repo, entry_date=get_utcnow(), person_record_id=person.record_id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=source_date, author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=note_photo, photo_url=note_photo_url, spam_score=spam_score, confirmed=False) # Write the new NoteWithBadWords to the datastore note.put_new() # Write the person record to datastore before redirect db.put(person) UserActionLog.put_new('add', person, copy_properties=False) # When the note is detected as spam, we do not update person # record with this note or log action. We ask the note author # for confirmation first. return self.redirect('/post_flagged_note', id=note.get_record_id(), author_email=note.author_email, repo=self.repo) else: note = Note.create_original( self.repo, entry_date=get_utcnow(), person_record_id=person.record_id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=source_date, author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=note_photo, photo_url=note_photo_url) # Write the new Note to the datastore note.put_new() person.update_from_note(note) # Specially log 'believed_dead'. if note.status == 'believed_dead': UserActionLog.put_new( 'mark_dead', note, person.primary_full_name, self.request.remote_addr) # Write the person record to datastore person.put_new() # TODO(ryok): we could do this earlier so we don't neet to db.put twice. if not person.source_url and not self.params.clone: # Put again with the URL, now that we have a person_record_id. person.source_url = self.get_url('/view', id=person.record_id) db.put(person) # TODO(ryok): batch-put person, note, photo, note_photo here. # if unchecked the subscribe updates about your own record, skip the subscribe page if not self.params.subscribe_own_info: self.params.subscribe = False # If user wants to subscribe to updates, redirect to the subscribe page if self.params.subscribe: return self.redirect('/subscribe', id=person.record_id, subscribe_email=self.params.author_email, context='create_person') self.redirect('/view', id=person.record_id)
home_postal_code=home_postal_code, home_neighborhood=home_neighborhood, home_country=home_country, profile_urls='\n'.join(profile_urls), author_name=author_name, author_phone=author_phone, author_email=author_email, source_url=source_url, source_date=source_date, source_name=source_name, photo=photo, photo_url=photo_url) person.update_index(['old', 'new']) if add_note: spam_detector = SpamDetector(config.bad_words) spam_score = spam_detector.estimate_spam_score(text) if (spam_score > 0): note = NoteWithBadWords.create_original( repo, entry_date=get_utcnow(), person_record_id=person.record_id, author_name=author_name, author_email=author_email, author_phone=author_phone, source_date=source_date, author_made_contact=bool(author_made_contact), status=status, email_of_found_person=email_of_found_person, phone_of_found_person=phone_of_found_person, last_known_location=last_known_location,
def test_init(self): d = SpamDetector('foo, BAR') assert set(['foo', 'bar']) == d.bad_words_set
def create_note( repo, person, config, user_ip_address, status, source_date, author_name, author_email, author_phone, author_made_contact, note_photo, note_photo_url, text, email_of_found_person, phone_of_found_person, last_known_location, validate_data=True): if validate_data: validate_note_data( config=config, status=status, author_name=author_name, author_email=author_email, author_made_contact=author_made_contact, text=text) spam_detector = SpamDetector(config.bad_words) spam_score = spam_detector.estimate_spam_score(text) if spam_score > 0: note = NoteWithBadWords.create_original( repo, entry_date=get_utcnow(), person_record_id=person.record_id, author_name=author_name, author_email=author_email, author_phone=author_phone, source_date=source_date, author_made_contact=author_made_contact, status=status, email_of_found_person=email_of_found_person, phone_of_found_person=phone_of_found_person, last_known_location=last_known_location, text=text, photo=note_photo, photo_url=note_photo_url, spam_score=spam_score, confirmed=False) note.put_new() raise FlaggedNoteException(note) note = Note.create_original( repo, entry_date=get_utcnow(), person_record_id=person.record_id, author_name=author_name, author_email=author_email, author_phone=author_phone, source_date=source_date, author_made_contact=author_made_contact, status=status, email_of_found_person=email_of_found_person, phone_of_found_person=phone_of_found_person, last_known_location=last_known_location, text=text, photo=note_photo, photo_url=note_photo_url) note.put_new() # Specially log notes that make a person dead or switch to an alive status. if status == 'believed_dead': UserActionLog.put_new( 'mark_dead', note, person.record_id, user_ip_address) if (status in ['believed_alive', 'is_note_author'] and person.latest_status not in ['believed_alive', 'is_note_author']): UserActionLog.put_new('mark_alive', note, person.record_id) # TODO(nworden): add sending subscription notifications here return note
class Handler(BaseHandler): def get(self): # Check the request parameters. if not self.params.id: return self.error(404, _('No person id was specified.')) try: person = Person.get(self.repo, self.params.id) # TODO(ichikawa) Consider removing this "except" clause. # I don't think ValueError is thrown here. except ValueError: return self.error(404, _("This person's entry does not exist or has been deleted.")) if not person: return self.error(404, _("This person's entry does not exist or has been deleted.")) # Render the page. enable_notes_url = self.get_url('/enable_notes', id=self.params.id) self.render('add_note.html', person=person, enable_notes_url=enable_notes_url) def post(self): """Post a note in person's record view page""" if not self.params.text: return self.error( 400, _('Message is required. Please go back and try again.')) if not self.params.author_name: return self.error( 400, _('Your name is required in the "About you" section. ' 'Please go back and try again.')) if (self.params.status == 'is_note_author' and not self.params.author_made_contact): return self.error( 400, _('Please check that you have been in contact with ' 'the person after the disaster, or change the ' '"Status of this person" field.')) if (self.params.status == 'believed_dead' and not self.config.allow_believed_dead_via_ui): return self.error( 400, _('Not authorized to post notes with the status ' '"believed_dead".')) if (self.params.author_email and not utils.validate_email(self.params.author_email)): return self.error(400, _( 'The email address you entered appears to be invalid.')) person = Person.get(self.repo, self.params.id) if person.notes_disabled: return self.error( 400, _('The author has disabled status updates ' 'on this record.')) # If a photo was uploaded, create and store a new Photo entry and get # the URL where it's served; otherwise, use the note_photo_url provided. photo, photo_url = (None, self.params.note_photo_url) if self.params.note_photo is not None: try: photo, photo_url = create_photo( self.params.note_photo, self.repo, self.transitionary_get_url) except PhotoError, e: return self.error(400, e.message) photo.put() spam_detector = SpamDetector(self.config.bad_words) spam_score = spam_detector.estimate_spam_score(self.params.text) if (spam_score > 0): note = NoteWithBadWords.create_original( self.repo, entry_date=get_utcnow(), person_record_id=self.params.id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=get_utcnow(), author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=photo, photo_url=photo_url, spam_score=spam_score, confirmed=False) # Write the new NoteWithBadWords to the datastore note.put_new() # When the note is detected as spam, we do not update person record # or log action. We ask the note author for confirmation first. return self.redirect('/post_flagged_note', id=note.get_record_id(), author_email=note.author_email, repo=self.repo) else: note = Note.create_original( self.repo, entry_date=get_utcnow(), person_record_id=self.params.id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=get_utcnow(), author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=photo, photo_url=photo_url) # Write the new regular Note to the datastore note.put_new() # Specially log 'believed_dead'. if note.status == 'believed_dead': UserActionLog.put_new( 'mark_dead', note, person.primary_full_name, self.request.remote_addr) # Specially log a switch to an alive status. if (note.status in ['believed_alive', 'is_note_author'] and person.latest_status not in ['believed_alive', 'is_note_author']): UserActionLog.put_new('mark_alive', note, person.primary_full_name) # Update the Person based on the Note. if person: person.update_from_note(note) # Send notification to all people # who subscribed to updates on this person subscribe.send_notifications(self, person, [note]) # write the updated person record to datastore db.put(person) # If user wants to subscribe to updates, redirect to the subscribe page if self.params.subscribe: return self.redirect('/subscribe', id=person.record_id, subscribe_email=self.params.author_email, context='add_note') # Redirect to view page so the browser's back button works properly. self.redirect('/view', id=self.params.id, query=self.params.query)
class Handler(BaseHandler): def get(self): # Check the request parameters. if not self.params.id: return self.error(404, 'No person id was specified.') try: person = Person.get(self.repo, self.params.id) except ValueError: return self.error( 404, _("This person's entry does not exist or has been deleted.")) if not person: return self.error( 404, _("This person's entry does not exist or has been deleted.")) standalone = self.request.get('standalone') # Check if private info should be revealed. content_id = 'view:' + self.params.id reveal_url = reveal.make_reveal_url(self, content_id) show_private_info = reveal.verify(content_id, self.params.signature) # Compute the local times for the date fields on the person. person.source_date_local_string = self.to_formatted_local_time( person.source_date) person.expiry_date_local_string = self.to_formatted_local_time( person.get_effective_expiry_date()) # Get the notes and duplicate links. try: notes = person.get_notes() except datastore_errors.NeedIndexError: notes = [] person.sex_text = get_person_sex_text(person) for note in notes: self.add_fields_to_notes(note) try: linked_persons = person.get_all_linked_persons() except datastore_errors.NeedIndexError: linked_persons = [] linked_person_info = [] for linked_person in linked_persons: try: linked_notes = linked_person.get_notes() except datastore_errors.NeedIndexError: linked_notes = [] for note in linked_notes: self.add_fields_to_notes(note) linked_person_info.append( dict(id=linked_person.record_id, name=linked_person.primary_full_name, view_url=self.get_url('/view', id=linked_person.record_id), notes=linked_notes)) # Render the page. dupe_notes_url = self.get_url('/view', id=self.params.id, dupe_notes='yes') results_url = self.get_url('/results', role=self.params.role, query=self.params.query, given_name=self.params.given_name, family_name=self.params.family_name) feed_url = self.get_url('/feeds/note', person_record_id=self.params.id, repo=self.repo) subscribe_url = self.get_url('/subscribe', id=self.params.id) delete_url = self.get_url('/delete', id=self.params.id) disable_notes_url = self.get_url('/disable_notes', id=self.params.id) enable_notes_url = self.get_url('/enable_notes', id=self.params.id) extend_url = None extension_days = 0 expiration_days = None expiry_date = person.get_effective_expiry_date() if expiry_date and not person.is_clone(): expiration_delta = expiry_date - get_utcnow() extend_url = self.get_url('/extend', id=self.params.id) extension_days = extend.get_extension_days(self) if expiration_delta.days < EXPIRY_WARNING_THRESHOLD: # round 0 up to 1, to make the msg read better. expiration_days = expiration_delta.days + 1 if person.is_clone(): person.provider_name = person.get_original_domain() sanitize_urls(person) for note in notes: sanitize_urls(note) if person.profile_urls: person.profile_pages = get_profile_pages(person.profile_urls, self) self.render('view.html', person=person, notes=notes, linked_person_info=linked_person_info, standalone=standalone, onload_function='view_page_loaded()', show_private_info=show_private_info, admin=users.is_current_user_admin(), dupe_notes_url=dupe_notes_url, results_url=results_url, reveal_url=reveal_url, feed_url=feed_url, subscribe_url=subscribe_url, delete_url=delete_url, disable_notes_url=disable_notes_url, enable_notes_url=enable_notes_url, extend_url=extend_url, extension_days=extension_days, expiration_days=expiration_days) def post(self): if not self.params.text: return self.error( 200, _('Message is required. Please go back and try again.')) if not self.params.author_name: return self.error( 200, _('Your name is required in the "About you" section. ' 'Please go back and try again.')) if (self.params.status == 'is_note_author' and not self.params.author_made_contact): return self.error( 200, _('Please check that you have been in contact with ' 'the person after the earthquake, or change the ' '"Status of this person" field.')) if (self.params.status == 'believed_dead' and not self.config.allow_believed_dead_via_ui): return self.error( 200, _('Not authorized to post notes with the status ' '"believed_dead".')) person = Person.get(self.repo, self.params.id) if person.notes_disabled: return self.error( 200, _('The author has disabled status updates ' 'on this record.')) # If a photo was uploaded, create and store a new Photo entry and get # the URL where it's served; otherwise, use the note_photo_url provided. photo, photo_url = (None, self.params.note_photo_url) if self.params.note_photo is not None: try: photo, photo_url = create_photo(self.params.note_photo, self) except PhotoError, e: return self.error(400, e.message) photo.put() spam_detector = SpamDetector(self.config.bad_words) spam_score = spam_detector.estimate_spam_score(self.params.text) if (spam_score > 0): note = NoteWithBadWords.create_original( self.repo, entry_date=get_utcnow(), person_record_id=self.params.id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=get_utcnow(), author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=photo, photo_url=photo_url, spam_score=spam_score, confirmed=False) # Write the new NoteWithBadWords to the datastore db.put(note) UserActionLog.put_new('add', note, copy_properties=False) # When the note is detected as spam, we do not update person record # or log action. We ask the note author for confirmation first. return self.redirect('/post_flagged_note', id=note.get_record_id(), author_email=note.author_email, repo=self.repo) else: note = Note.create_original( self.repo, entry_date=get_utcnow(), person_record_id=self.params.id, author_name=self.params.author_name, author_email=self.params.author_email, author_phone=self.params.author_phone, source_date=get_utcnow(), author_made_contact=bool(self.params.author_made_contact), status=self.params.status, email_of_found_person=self.params.email_of_found_person, phone_of_found_person=self.params.phone_of_found_person, last_known_location=self.params.last_known_location, text=self.params.text, photo=photo, photo_url=photo_url) # Write the new regular Note to the datastore db.put(note) UserActionLog.put_new('add', note, copy_properties=False) # Specially log 'believed_dead'. if note.status == 'believed_dead': UserActionLog.put_new('mark_dead', note, person.primary_full_name, self.request.remote_addr) # Specially log a switch to an alive status. if (note.status in ['believed_alive', 'is_note_author'] and person.latest_status not in ['believed_alive', 'is_note_author']): UserActionLog.put_new('mark_alive', note, person.primary_full_name) # Update the Person based on the Note. if person: person.update_from_note(note) # Send notification to all people # who subscribed to updates on this person subscribe.send_notifications(self, person, [note]) # write the updated person record to datastore db.put(person) # If user wants to subscribe to updates, redirect to the subscribe page if self.params.subscribe: return self.redirect('/subscribe', id=person.record_id, subscribe_email=self.params.author_email, context='add_note') # Redirect to this page so the browser's back button works properly. self.redirect('/view', id=self.params.id, query=self.params.query)