def test_estimate_spam_score(self):
     d = SpamDetector('foo, BAR')
     assert d.estimate_spam_score('a sentence with foo, bar') == 0.4
     assert d.estimate_spam_score("It's a Foo day.") == 0.25
     assert d.estimate_spam_score('x') == 0
     assert d.estimate_spam_score('123') == 0
     assert d.estimate_spam_score('  ,') == None
     assert d.estimate_spam_score('') == None
Пример #2
0
 def test_estimate_spam_score(self):
     d = SpamDetector('foo, BAR')
     assert d.estimate_spam_score('a sentence with foo, bar') == 0.4
     assert d.estimate_spam_score("It's a Foo day.") == 0.25 
     assert d.estimate_spam_score('x') == 0
     assert d.estimate_spam_score('123') == 0
     assert d.estimate_spam_score('  ,') == None
     assert d.estimate_spam_score('') == None 
Пример #3
0
class Handler(BaseHandler):
    def get(self):
        self.params.create_mode = True
        profile_websites = [add_profile_icon_url(website, self)
                for website in self.config.profile_websites or []]
        self.render('create.html',
                    profile_websites=profile_websites,
                    profile_websites_json=simplejson.dumps(profile_websites),
                    onload_function='view_page_loaded()')

    def post(self):
        now = get_utcnow()

        # Several messages here exceed the 80-column limit because django's
        # makemessages script can't handle messages split across lines. :(
        if self.config.use_family_name:
            if not (self.params.given_name and self.params.family_name):
                return self.error(400, _('The Given name and Family name are both required.  Please go back and try again.'))
        else:
            if not self.params.given_name:
                return self.error(400, _('Name is required.  Please go back and try again.'))

        # If user is inputting his/her own information, set some params automatically
        if self.params.own_info == 'yes':
            self.params.author_name = self.params.given_name
            self.params.status = 'is_note_author'
            self.params.author_made_contact = 'yes'
            if self.params.your_own_email:
                self.params.author_email = self.params.your_own_email
            if self.params.your_own_phone:
                self.params.author_phone = self.params.your_own_phone

        else:
            if not self.params.author_name:
                if self.params.clone:
                    return self.error(400, _('The Original author\'s name is required.  Please go back and try again.'))
                else:
                    return self.error(400, _('Your name is required in the "Source" section.  Please go back and try again.'))

        if self.params.add_note:
            if not self.params.text:
                return self.error(400, _('Message is required. Please go back and try again.'))
            if (self.params.status == 'is_note_author' and
                not self.params.author_made_contact):
                return self.error(400, _('Please check that you have been in contact with the person after the earthquake, or change the "Status of this person" field.'))
            if (self.params.status == 'believed_dead' and
                not self.config.allow_believed_dead_via_ui):
                return self.error(400, _('Not authorized to post notes with the status "I have received information that this person is dead".'))

        source_date = None
        if self.params.source_date:
            try:
                source_date = validate_date(self.params.source_date)
            except ValueError:
                return self.error(400, _('Original posting date is not in YYYY-MM-DD format, or is a nonexistent date.  Please go back and try again.'))
            if source_date > now:
                return self.error(400, _('Date cannot be in the future.  Please go back and try again.'))

        expiry_date = days_to_date(self.params.expiry_option or
                                   self.config.default_expiry_days)

        # If nothing was uploaded, just use the photo_url that was provided.
        photo, photo_url = (None, self.params.photo_url)
        note_photo, note_photo_url = (None, self.params.note_photo_url)
        try:
            # If a photo was uploaded, create a Photo entry and get the URL
            # where we serve it.
            if self.params.photo is not None:
                photo, photo_url = create_photo(self.params.photo, self)
            if self.params.note_photo is not None:
                note_photo, note_photo_url = \
                    create_photo(self.params.note_photo, self)
        except PhotoError, e:
            return self.error(400, e.message)
        # Finally, store the Photo. Past this point, we should NOT self.error.
        if photo:
            photo.put()
        if note_photo:
            note_photo.put()

        profile_urls = []
        if self.params.profile_url1:
            profile_urls.append(self.params.profile_url1)
        if self.params.profile_url2:
            profile_urls.append(self.params.profile_url2)
        if self.params.profile_url3:
            profile_urls.append(self.params.profile_url3)

        # Person records have to have a source_date; if none entered, use now.
        source_date = source_date or now

        # Determine the source name, or fill it in if the record is original
        # (i.e. created for the first time here, not copied from elsewhere).
        source_name = self.params.source_name
        if not self.params.clone:
            # record originated here
            if self.params.referrer:
                source_name = "%s (referred by %s)" % (self.env.netloc,
                                                       self.params.referrer)
            else:
                source_name = self.env.netloc

        person = Person.create_original(
            self.repo,
            entry_date=now,
            expiry_date=expiry_date,
            given_name=self.params.given_name,
            family_name=self.params.family_name,
            full_name=get_full_name(self.params.given_name,
                                    self.params.family_name,
                                    self.config),
            alternate_names=get_full_name(self.params.alternate_given_names,
                                          self.params.alternate_family_names,
                                          self.config),
            description=self.params.description,
            sex=self.params.sex,
            date_of_birth=self.params.date_of_birth,
            age=self.params.age,
            home_street=self.params.home_street,
            home_city=self.params.home_city,
            home_state=self.params.home_state,
            home_postal_code=self.params.home_postal_code,
            home_neighborhood=self.params.home_neighborhood,
            home_country=self.params.home_country,
            profile_urls='\n'.join(profile_urls),
            author_name=self.params.author_name,
            author_phone=self.params.author_phone,
            author_email=self.params.author_email,
            source_url=self.params.source_url,
            source_date=source_date,
            source_name=source_name,
            photo=photo,
            photo_url=photo_url
        )
        person.update_index(['old', 'new'])

        if self.params.add_note:
            spam_detector = SpamDetector(self.config.bad_words)
            spam_score = spam_detector.estimate_spam_score(self.params.text)
            if (spam_score > 0):
                note = NoteWithBadWords.create_original(
                    self.repo,
                    entry_date=get_utcnow(),
                    person_record_id=person.record_id,
                    author_name=self.params.author_name,
                    author_email=self.params.author_email,
                    author_phone=self.params.author_phone,
                    source_date=source_date,
                    author_made_contact=bool(self.params.author_made_contact),
                    status=self.params.status,
                    email_of_found_person=self.params.email_of_found_person,
                    phone_of_found_person=self.params.phone_of_found_person,
                    last_known_location=self.params.last_known_location,
                    text=self.params.text,
                    photo=note_photo,
                    photo_url=note_photo_url,
                    spam_score=spam_score,
                    confirmed=False)

                # Write the new NoteWithBadWords to the datastore
                note.put_new()
                # Write the person record to datastore before redirect
                db.put(person)
                UserActionLog.put_new('add', person, copy_properties=False)

                # When the note is detected as spam, we do not update person
                # record with this note or log action. We ask the note author
                # for confirmation first.
                return self.redirect('/post_flagged_note',
                                     id=note.get_record_id(),
                                     author_email=note.author_email,
                                     repo=self.repo)
            else:
                note = Note.create_original(
                    self.repo,
                    entry_date=get_utcnow(),
                    person_record_id=person.record_id,
                    author_name=self.params.author_name,
                    author_email=self.params.author_email,
                    author_phone=self.params.author_phone,
                    source_date=source_date,
                    author_made_contact=bool(self.params.author_made_contact),
                    status=self.params.status,
                    email_of_found_person=self.params.email_of_found_person,
                    phone_of_found_person=self.params.phone_of_found_person,
                    last_known_location=self.params.last_known_location,
                    text=self.params.text,
                    photo=note_photo,
                    photo_url=note_photo_url)

                # Write the new Note to the datastore
                note.put_new()
                person.update_from_note(note)

            # Specially log 'believed_dead'.
            if note.status == 'believed_dead':
                UserActionLog.put_new(
                    'mark_dead', note, person.primary_full_name,
                    self.request.remote_addr)

        # Write the person record to datastore
        person.put_new()

        # TODO(ryok): we could do this earlier so we don't neet to db.put twice.
        if not person.source_url and not self.params.clone:
            # Put again with the URL, now that we have a person_record_id.
            person.source_url = self.get_url('/view', id=person.record_id)
            db.put(person)

        # TODO(ryok): batch-put person, note, photo, note_photo here.

        # if unchecked the subscribe updates about your own record, skip the subscribe page
        if not self.params.subscribe_own_info:
            self.params.subscribe = False

        # If user wants to subscribe to updates, redirect to the subscribe page
        if self.params.subscribe:
            return self.redirect('/subscribe',
                                 id=person.record_id,
                                 subscribe_email=self.params.author_email,
                                 context='create_person')

        self.redirect('/view', id=person.record_id)
Пример #4
0
        home_postal_code=home_postal_code,
        home_neighborhood=home_neighborhood,
        home_country=home_country,
        profile_urls='\n'.join(profile_urls),
        author_name=author_name,
        author_phone=author_phone,
        author_email=author_email,
        source_url=source_url,
        source_date=source_date,
        source_name=source_name,
        photo=photo,
        photo_url=photo_url)
    person.update_index(['old', 'new'])

    if add_note:
        spam_detector = SpamDetector(config.bad_words)
        spam_score = spam_detector.estimate_spam_score(text)
        if (spam_score > 0):
            note = NoteWithBadWords.create_original(
                repo,
                entry_date=get_utcnow(),
                person_record_id=person.record_id,
                author_name=author_name,
                author_email=author_email,
                author_phone=author_phone,
                source_date=source_date,
                author_made_contact=bool(author_made_contact),
                status=status,
                email_of_found_person=email_of_found_person,
                phone_of_found_person=phone_of_found_person,
                last_known_location=last_known_location,
 def test_init(self):
     d = SpamDetector('foo, BAR')
     assert set(['foo', 'bar']) == d.bad_words_set
Пример #6
0
def create_note(
        repo,
        person,
        config,
        user_ip_address,
        status,
        source_date,
        author_name,
        author_email,
        author_phone,
        author_made_contact,
        note_photo,
        note_photo_url,
        text,
        email_of_found_person,
        phone_of_found_person,
        last_known_location,
        validate_data=True):
    if validate_data:
        validate_note_data(
            config=config,
            status=status,
            author_name=author_name,
            author_email=author_email,
            author_made_contact=author_made_contact,
            text=text)
    spam_detector = SpamDetector(config.bad_words)
    spam_score = spam_detector.estimate_spam_score(text)
    if spam_score > 0:
        note = NoteWithBadWords.create_original(
            repo,
            entry_date=get_utcnow(),
            person_record_id=person.record_id,
            author_name=author_name,
            author_email=author_email,
            author_phone=author_phone,
            source_date=source_date,
            author_made_contact=author_made_contact,
            status=status,
            email_of_found_person=email_of_found_person,
            phone_of_found_person=phone_of_found_person,
            last_known_location=last_known_location,
            text=text,
            photo=note_photo,
            photo_url=note_photo_url,
            spam_score=spam_score,
            confirmed=False)
        note.put_new()
        raise FlaggedNoteException(note)
    note = Note.create_original(
        repo,
        entry_date=get_utcnow(),
        person_record_id=person.record_id,
        author_name=author_name,
        author_email=author_email,
        author_phone=author_phone,
        source_date=source_date,
        author_made_contact=author_made_contact,
        status=status,
        email_of_found_person=email_of_found_person,
        phone_of_found_person=phone_of_found_person,
        last_known_location=last_known_location,
        text=text,
        photo=note_photo,
        photo_url=note_photo_url)
    note.put_new()
    # Specially log notes that make a person dead or switch to an alive status.
    if status == 'believed_dead':
        UserActionLog.put_new(
            'mark_dead', note, person.record_id, user_ip_address)
    if (status in ['believed_alive', 'is_note_author'] and
            person.latest_status not in ['believed_alive', 'is_note_author']):
        UserActionLog.put_new('mark_alive', note, person.record_id)
    # TODO(nworden): add sending subscription notifications here
    return note
Пример #7
0
class Handler(BaseHandler):

    def get(self):
        # Check the request parameters.
        if not self.params.id:
            return self.error(404, _('No person id was specified.'))
        try:
            person = Person.get(self.repo, self.params.id)
        # TODO(ichikawa) Consider removing this "except" clause.
        #     I don't think ValueError is thrown here.
        except ValueError:
            return self.error(404,
                _("This person's entry does not exist or has been deleted."))
        if not person:
            return self.error(404,
                _("This person's entry does not exist or has been deleted."))

        # Render the page.
        enable_notes_url = self.get_url('/enable_notes', id=self.params.id)

        self.render('add_note.html',
                    person=person,
                    enable_notes_url=enable_notes_url)

    def post(self):
        """Post a note in person's record view page"""
        if not self.params.text:
            return self.error(
                400, _('Message is required. Please go back and try again.'))

        if not self.params.author_name:
            return self.error(
                400, _('Your name is required in the "About you" section.  '
                       'Please go back and try again.'))

        if (self.params.status == 'is_note_author' and
            not self.params.author_made_contact):
            return self.error(
                400, _('Please check that you have been in contact with '
                       'the person after the disaster, or change the '
                       '"Status of this person" field.'))
        if (self.params.status == 'believed_dead' and
            not self.config.allow_believed_dead_via_ui):
            return self.error(
                400, _('Not authorized to post notes with the status '
                       '"believed_dead".'))

        if (self.params.author_email and
            not utils.validate_email(self.params.author_email)):
            return self.error(400, _(
                'The email address you entered appears to be invalid.'))

        person = Person.get(self.repo, self.params.id)
        if person.notes_disabled:
            return self.error(
                400, _('The author has disabled status updates '
                       'on this record.'))

        # If a photo was uploaded, create and store a new Photo entry and get
        # the URL where it's served; otherwise, use the note_photo_url provided.
        photo, photo_url = (None, self.params.note_photo_url)
        if self.params.note_photo is not None:
            try:
                photo, photo_url = create_photo(
                    self.params.note_photo, self.repo,
                    self.transitionary_get_url)
            except PhotoError, e:
                return self.error(400, e.message)
            photo.put()

        spam_detector = SpamDetector(self.config.bad_words)
        spam_score = spam_detector.estimate_spam_score(self.params.text)

        if (spam_score > 0):
            note = NoteWithBadWords.create_original(
                self.repo,
                entry_date=get_utcnow(),
                person_record_id=self.params.id,
                author_name=self.params.author_name,
                author_email=self.params.author_email,
                author_phone=self.params.author_phone,
                source_date=get_utcnow(),
                author_made_contact=bool(self.params.author_made_contact),
                status=self.params.status,
                email_of_found_person=self.params.email_of_found_person,
                phone_of_found_person=self.params.phone_of_found_person,
                last_known_location=self.params.last_known_location,
                text=self.params.text,
                photo=photo,
                photo_url=photo_url,
                spam_score=spam_score,
                confirmed=False)
            # Write the new NoteWithBadWords to the datastore
            note.put_new()
            # When the note is detected as spam, we do not update person record
            # or log action. We ask the note author for confirmation first.
            return self.redirect('/post_flagged_note', id=note.get_record_id(),
                                 author_email=note.author_email,
                                 repo=self.repo)
        else:
            note = Note.create_original(
                self.repo,
                entry_date=get_utcnow(),
                person_record_id=self.params.id,
                author_name=self.params.author_name,
                author_email=self.params.author_email,
                author_phone=self.params.author_phone,
                source_date=get_utcnow(),
                author_made_contact=bool(self.params.author_made_contact),
                status=self.params.status,
                email_of_found_person=self.params.email_of_found_person,
                phone_of_found_person=self.params.phone_of_found_person,
                last_known_location=self.params.last_known_location,
                text=self.params.text,
                photo=photo,
                photo_url=photo_url)
            # Write the new regular Note to the datastore
            note.put_new()

        # Specially log 'believed_dead'.
        if note.status == 'believed_dead':
            UserActionLog.put_new(
                'mark_dead', note, person.primary_full_name,
                self.request.remote_addr)

        # Specially log a switch to an alive status.
        if (note.status in ['believed_alive', 'is_note_author'] and
            person.latest_status not in ['believed_alive', 'is_note_author']):
            UserActionLog.put_new('mark_alive', note, person.primary_full_name)

        # Update the Person based on the Note.
        if person:
            person.update_from_note(note)
            # Send notification to all people
            # who subscribed to updates on this person
            subscribe.send_notifications(self, person, [note])
            # write the updated person record to datastore
            db.put(person)

        # If user wants to subscribe to updates, redirect to the subscribe page
        if self.params.subscribe:
            return self.redirect('/subscribe',
                                 id=person.record_id,
                                 subscribe_email=self.params.author_email,
                                 context='add_note')

        # Redirect to view page so the browser's back button works properly.
        self.redirect('/view', id=self.params.id, query=self.params.query)
Пример #8
0
class Handler(BaseHandler):
    def get(self):
        # Check the request parameters.
        if not self.params.id:
            return self.error(404, 'No person id was specified.')
        try:
            person = Person.get(self.repo, self.params.id)
        except ValueError:
            return self.error(
                404,
                _("This person's entry does not exist or has been deleted."))
        if not person:
            return self.error(
                404,
                _("This person's entry does not exist or has been deleted."))
        standalone = self.request.get('standalone')

        # Check if private info should be revealed.
        content_id = 'view:' + self.params.id
        reveal_url = reveal.make_reveal_url(self, content_id)
        show_private_info = reveal.verify(content_id, self.params.signature)

        # Compute the local times for the date fields on the person.
        person.source_date_local_string = self.to_formatted_local_time(
            person.source_date)
        person.expiry_date_local_string = self.to_formatted_local_time(
            person.get_effective_expiry_date())

        # Get the notes and duplicate links.
        try:
            notes = person.get_notes()
        except datastore_errors.NeedIndexError:
            notes = []
        person.sex_text = get_person_sex_text(person)
        for note in notes:
            self.add_fields_to_notes(note)
        try:
            linked_persons = person.get_all_linked_persons()
        except datastore_errors.NeedIndexError:
            linked_persons = []
        linked_person_info = []
        for linked_person in linked_persons:
            try:
                linked_notes = linked_person.get_notes()
            except datastore_errors.NeedIndexError:
                linked_notes = []
            for note in linked_notes:
                self.add_fields_to_notes(note)
            linked_person_info.append(
                dict(id=linked_person.record_id,
                     name=linked_person.primary_full_name,
                     view_url=self.get_url('/view',
                                           id=linked_person.record_id),
                     notes=linked_notes))

        # Render the page.
        dupe_notes_url = self.get_url('/view',
                                      id=self.params.id,
                                      dupe_notes='yes')
        results_url = self.get_url('/results',
                                   role=self.params.role,
                                   query=self.params.query,
                                   given_name=self.params.given_name,
                                   family_name=self.params.family_name)
        feed_url = self.get_url('/feeds/note',
                                person_record_id=self.params.id,
                                repo=self.repo)
        subscribe_url = self.get_url('/subscribe', id=self.params.id)
        delete_url = self.get_url('/delete', id=self.params.id)
        disable_notes_url = self.get_url('/disable_notes', id=self.params.id)
        enable_notes_url = self.get_url('/enable_notes', id=self.params.id)
        extend_url = None
        extension_days = 0
        expiration_days = None
        expiry_date = person.get_effective_expiry_date()
        if expiry_date and not person.is_clone():
            expiration_delta = expiry_date - get_utcnow()
            extend_url = self.get_url('/extend', id=self.params.id)
            extension_days = extend.get_extension_days(self)
            if expiration_delta.days < EXPIRY_WARNING_THRESHOLD:
                # round 0 up to 1, to make the msg read better.
                expiration_days = expiration_delta.days + 1

        if person.is_clone():
            person.provider_name = person.get_original_domain()

        sanitize_urls(person)
        for note in notes:
            sanitize_urls(note)

        if person.profile_urls:
            person.profile_pages = get_profile_pages(person.profile_urls, self)

        self.render('view.html',
                    person=person,
                    notes=notes,
                    linked_person_info=linked_person_info,
                    standalone=standalone,
                    onload_function='view_page_loaded()',
                    show_private_info=show_private_info,
                    admin=users.is_current_user_admin(),
                    dupe_notes_url=dupe_notes_url,
                    results_url=results_url,
                    reveal_url=reveal_url,
                    feed_url=feed_url,
                    subscribe_url=subscribe_url,
                    delete_url=delete_url,
                    disable_notes_url=disable_notes_url,
                    enable_notes_url=enable_notes_url,
                    extend_url=extend_url,
                    extension_days=extension_days,
                    expiration_days=expiration_days)

    def post(self):
        if not self.params.text:
            return self.error(
                200, _('Message is required. Please go back and try again.'))

        if not self.params.author_name:
            return self.error(
                200,
                _('Your name is required in the "About you" section.  '
                  'Please go back and try again.'))

        if (self.params.status == 'is_note_author'
                and not self.params.author_made_contact):
            return self.error(
                200,
                _('Please check that you have been in contact with '
                  'the person after the earthquake, or change the '
                  '"Status of this person" field.'))

        if (self.params.status == 'believed_dead'
                and not self.config.allow_believed_dead_via_ui):
            return self.error(
                200,
                _('Not authorized to post notes with the status '
                  '"believed_dead".'))

        person = Person.get(self.repo, self.params.id)
        if person.notes_disabled:
            return self.error(
                200,
                _('The author has disabled status updates '
                  'on this record.'))

        # If a photo was uploaded, create and store a new Photo entry and get
        # the URL where it's served; otherwise, use the note_photo_url provided.
        photo, photo_url = (None, self.params.note_photo_url)
        if self.params.note_photo is not None:
            try:
                photo, photo_url = create_photo(self.params.note_photo, self)
            except PhotoError, e:
                return self.error(400, e.message)
            photo.put()

        spam_detector = SpamDetector(self.config.bad_words)
        spam_score = spam_detector.estimate_spam_score(self.params.text)

        if (spam_score > 0):
            note = NoteWithBadWords.create_original(
                self.repo,
                entry_date=get_utcnow(),
                person_record_id=self.params.id,
                author_name=self.params.author_name,
                author_email=self.params.author_email,
                author_phone=self.params.author_phone,
                source_date=get_utcnow(),
                author_made_contact=bool(self.params.author_made_contact),
                status=self.params.status,
                email_of_found_person=self.params.email_of_found_person,
                phone_of_found_person=self.params.phone_of_found_person,
                last_known_location=self.params.last_known_location,
                text=self.params.text,
                photo=photo,
                photo_url=photo_url,
                spam_score=spam_score,
                confirmed=False)
            # Write the new NoteWithBadWords to the datastore
            db.put(note)
            UserActionLog.put_new('add', note, copy_properties=False)
            # When the note is detected as spam, we do not update person record
            # or log action. We ask the note author for confirmation first.
            return self.redirect('/post_flagged_note',
                                 id=note.get_record_id(),
                                 author_email=note.author_email,
                                 repo=self.repo)
        else:
            note = Note.create_original(
                self.repo,
                entry_date=get_utcnow(),
                person_record_id=self.params.id,
                author_name=self.params.author_name,
                author_email=self.params.author_email,
                author_phone=self.params.author_phone,
                source_date=get_utcnow(),
                author_made_contact=bool(self.params.author_made_contact),
                status=self.params.status,
                email_of_found_person=self.params.email_of_found_person,
                phone_of_found_person=self.params.phone_of_found_person,
                last_known_location=self.params.last_known_location,
                text=self.params.text,
                photo=photo,
                photo_url=photo_url)
            # Write the new regular Note to the datastore
            db.put(note)
            UserActionLog.put_new('add', note, copy_properties=False)

        # Specially log 'believed_dead'.
        if note.status == 'believed_dead':
            UserActionLog.put_new('mark_dead', note, person.primary_full_name,
                                  self.request.remote_addr)

        # Specially log a switch to an alive status.
        if (note.status in ['believed_alive', 'is_note_author']
                and person.latest_status
                not in ['believed_alive', 'is_note_author']):
            UserActionLog.put_new('mark_alive', note, person.primary_full_name)

        # Update the Person based on the Note.
        if person:
            person.update_from_note(note)
            # Send notification to all people
            # who subscribed to updates on this person
            subscribe.send_notifications(self, person, [note])
            # write the updated person record to datastore
            db.put(person)

        # If user wants to subscribe to updates, redirect to the subscribe page
        if self.params.subscribe:
            return self.redirect('/subscribe',
                                 id=person.record_id,
                                 subscribe_email=self.params.author_email,
                                 context='add_note')

        # Redirect to this page so the browser's back button works properly.
        self.redirect('/view', id=self.params.id, query=self.params.query)