def submit_extract_keywords_hit(note): """Create a Mechanical Turk HIT that asks a worker to choose keywords and definitions from the given note.""" try: MTURK_HOST = os.environ['MTURK_HOST'] except: logger.warn('Could not find Mechanical Turk secrets, not running submit_extract_keywords_hit') return connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=MTURK_HOST) if note.course.school: title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.school.name) else: title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.department.school.name) overview = Overview() overview.append(FormattedContent(KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(domain=Site.objects.get_current(), link=note.get_absolute_url()))) keyword_fta = FreeTextAnswer() keyword_fta.num_lines = 1 definition_fta = FreeTextAnswer() definition_fta.num_lines = 3 question_form = QuestionForm() question_form.append(overview) for i in range(min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))): keyword_content = QuestionContent() keyword_content.append_field('Title', KEYWORDS_HIT_KEYWORD_FIELDS[i][1]) keyword_question = Question(identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0], content=keyword_content, answer_spec=AnswerSpecification(keyword_fta), is_required=True if i <= 10 else False) question_form.append(keyword_question) definition_content = QuestionContent() definition_content.append_field('Title', KEYWORDS_HIT_DEFINITION_FIELDS[i][1]) definition_question = Question(identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0], content=definition_content, answer_spec=AnswerSpecification(definition_fta), is_required=False) question_form.append(definition_question) hit = connection.create_hit(questions=question_form, max_assignments=1, title=title, description=KEYWORDS_HIT_DESCRIPTION, keywords=KEYWORDS_HIT_KEYWORDS, duration=KEYWORDS_HIT_DURATION, reward=KEYWORDS_HIT_REWARD, qualifications=KEYWORDS_HIT_QUALIFICATION, annotation=str(note.id))[0] HIT.objects.create(HITId=hit.HITId, note=note, processed=False)
def check_notes_mailbox(): MTURK_HOST = run_mturk('get_extract_keywords_results') if not MTURK_HOST: return try: MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME'] MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD'] FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY'] except: logger.warn( 'Could not find notes mailbox secrets, not running check_notes_mailbox' ) return connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=MTURK_HOST) mailbox = poplib.POP3_SSL('pop.gmail.com', 995) mailbox.user(MAILBOX_USER) mailbox.pass_(MAILBOX_PASSWORD) numMessages = len(mailbox.list()[1]) for i in range(numMessages): # construct message object from raw message raw_message_string = '\n'.join(mailbox.retr(i + 1)[1]) message = email.message_from_string(raw_message_string) if not message.is_multipart(): logger.warn('Got an email with no attachments') continue attachments = [] message_body = '' message_parts = message.get_payload() for part in message_parts: # Look for the message's plain text body if part.get_content_type( ) == 'text/plain' and part['Content-Disposition'] is None: message_body = part.get_payload() # Look for attachments elif part['Content-Disposition'] and 'attachment;' in part[ 'Content-Disposition']: attachment_mimetype = part.get_content_type() attachment_filename = re.search( CONTENT_DISPOSITION_REGEX, part['Content-Disposition']).group('filename') if part['Content-Transfer-Encoding'] == 'base64': attachment_data = base64.decodestring(part.get_payload()) else: attachment_data = part.get_payload() # Upload attachment to filepicker resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \ 'signature={signature}&mimetype={mimetype}&filename={filename}' .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE, signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype, filename=attachment_filename), data=attachment_data) if resp.status_code == 200: url = json.loads(resp.text)['url'] url = url + '?policy={policy}&signature={signature}'\ .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ) attachments.append((url, attachment_filename)) else: logger.warn('Could not upload an attachment to filepicker') message_subject = message['Subject'] overview = Overview() overview.append( FormattedContent( EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject, body=message_body, attachments=''))) single_line_answer = FreeTextAnswer() single_line_answer.num_lines = 1 question_form = QuestionForm() question_form.append(overview) course_spam_content = QuestionContent() course_spam_content.append_field( 'Title', 'Does the email contain course notes (check attachments below)?') answer = SelectionAnswer(style='dropdown', selections=[('No', 'no'), ('Yes', 'yes')]) course_spam = Question(identifier=COURSE_SPAM_QID, content=course_spam_content, answer_spec=AnswerSpecification(answer), is_required=True) question_form.append(course_spam) course_name_content = QuestionContent() course_name_content.append_field('Title', 'Course Name') course_name = Question( identifier=COURSE_NAME_QID, content=course_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(course_name) instructor_names_content = QuestionContent() instructor_names_content.append_field('Title', 'Instructor Name(s)') instructor_names = Question( identifier=INSTRUCTOR_NAMES_QID, content=instructor_names_content, answer_spec=AnswerSpecification(single_line_answer), is_required=False) question_form.append(instructor_names) school_name_content = QuestionContent() school_name_content.append_field('Title', 'School Name') school_name = Question( identifier=SCHOOL_NAME_QID, content=school_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(school_name) department_name_content = QuestionContent() department_name_content.append_field('Title', 'Department Name') department_name = Question( identifier=DEPARTMENT_NAME_QID, content=department_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=False) question_form.append(department_name) for i in range(len(attachments)): overview = Overview() overview.append( FormattedContent( EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format( link=attachments[i][0], name=attachments[i][1]))) question_form.append(overview) note_title_content = QuestionContent() note_title_content.append_field('Title', 'Note Title') note_title = Question( identifier=NOTE_TITLE_QID_TEMPLATE + str(i), content=note_title_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(note_title) note_category_content = QuestionContent() note_category_content.append_field('Title', 'Note Category') answer = SelectionAnswer(style='dropdown', selections=NOTE_CATEGORIES_FOR_MTURK) note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE + str(i), content=note_category_content, answer_spec=AnswerSpecification(answer), is_required=True) question_form.append(note_category) hit = connection.create_hit(questions=question_form, max_assignments=1, title=EMAIL_HIT_TITLE, description=EMAIL_HIT_DESCRIPTION, keywords=EMAIL_HIT_KEYWORDS, duration=EMAIL_HIT_DURATION, reward=EMAIL_HIT_REWARD, qualifications=EMAIL_HIT_QUALIFICATION)[0]
def check_notes_mailbox(): try: MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME'] MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD'] FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY'] MTURK_HOST = os.environ['MTURK_HOST'] except: logger.warn('Could not find notes mailbox secrets, not running check_notes_mailbox') return connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=MTURK_HOST) mailbox = poplib.POP3_SSL('pop.gmail.com', 995) mailbox.user(MAILBOX_USER) mailbox.pass_(MAILBOX_PASSWORD) numMessages = len(mailbox.list()[1]) for i in range(numMessages): # construct message object from raw message raw_message_string = '\n'.join(mailbox.retr(i+1)[1]) message = email.message_from_string(raw_message_string) if not message.is_multipart(): logger.warn('Got an email with no attachments') continue attachments = [] message_body = '' message_parts = message.get_payload() for part in message_parts: # Look for the message's plain text body if part.get_content_type() == 'text/plain' and part['Content-Disposition'] is None: message_body = part.get_payload() # Look for attachments elif part['Content-Disposition'] and 'attachment;' in part['Content-Disposition']: attachment_mimetype = part.get_content_type() attachment_filename = re.search(CONTENT_DISPOSITION_REGEX, part['Content-Disposition']).group('filename') if part['Content-Transfer-Encoding'] == 'base64': attachment_data = base64.decodestring(part.get_payload()) else: attachment_data = part.get_payload() # Upload attachment to filepicker resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \ 'signature={signature}&mimetype={mimetype}&filename={filename}' .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE, signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype, filename=attachment_filename), data=attachment_data) if resp.status_code == 200: url = json.loads(resp.text)['url'] url = url + '?policy={policy}&signature={signature}'\ .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ) attachments.append((url, attachment_filename)) else: logger.warn('Could not upload an attachment to filepicker') message_subject = message['Subject'] overview = Overview() overview.append(FormattedContent( EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject, body=message_body, attachments=''))) single_line_answer = FreeTextAnswer() single_line_answer.num_lines = 1 question_form = QuestionForm() question_form.append(overview) course_spam_content = QuestionContent() course_spam_content.append_field('Title', 'Does the email contain course notes (check attachments below)?') answer = SelectionAnswer(style='dropdown', selections=[('No', 'no'), ('Yes', 'yes')]) course_spam = Question(identifier=COURSE_SPAM_QID, content=course_spam_content, answer_spec=AnswerSpecification(answer), is_required=True) question_form.append(course_spam) course_name_content = QuestionContent() course_name_content.append_field('Title', 'Course Name') course_name = Question(identifier=COURSE_NAME_QID, content=course_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(course_name) instructor_names_content = QuestionContent() instructor_names_content.append_field('Title', 'Instructor Name(s)') instructor_names = Question(identifier=INSTRUCTOR_NAMES_QID, content=instructor_names_content, answer_spec=AnswerSpecification(single_line_answer), is_required=False) question_form.append(instructor_names) school_name_content = QuestionContent() school_name_content.append_field('Title', 'School Name') school_name = Question(identifier=SCHOOL_NAME_QID, content=school_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(school_name) department_name_content = QuestionContent() department_name_content.append_field('Title', 'Department Name') department_name = Question(identifier=DEPARTMENT_NAME_QID, content=department_name_content, answer_spec=AnswerSpecification(single_line_answer), is_required=False) question_form.append(department_name) for i in range(len(attachments)): overview = Overview() overview.append(FormattedContent( EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(link=attachments[i][0], name=attachments[i][1]))) question_form.append(overview) note_title_content = QuestionContent() note_title_content.append_field('Title', 'Note Title') note_title = Question(identifier=NOTE_TITLE_QID_TEMPLATE + str(i), content=note_title_content, answer_spec=AnswerSpecification(single_line_answer), is_required=True) question_form.append(note_title) note_category_content = QuestionContent() note_category_content.append_field('Title', 'Note Category') answer = SelectionAnswer(style='dropdown', selections=NOTE_CATEGORIES_FOR_MTURK) note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE + str(i), content=note_category_content, answer_spec=AnswerSpecification(answer), is_required=True) question_form.append(note_category) hit = connection.create_hit(questions=question_form, max_assignments=1, title=EMAIL_HIT_TITLE, description=EMAIL_HIT_DESCRIPTION, keywords=EMAIL_HIT_KEYWORDS, duration=EMAIL_HIT_DURATION, reward=EMAIL_HIT_REWARD, qualifications=EMAIL_HIT_QUALIFICATION)[0]
def submit_extract_keywords_hit(note): """Create a Mechanical Turk HIT that asks a worker to choose keywords and definitions from the given note.""" MTURK_HOST = run_mturk('submit_extract_keywords_hit') if not MTURK_HOST: return connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=MTURK_HOST) if note.course.school: title = KEYWORDS_HIT_TITLE_TEMPLATE.format( course=note.course.name, school=note.course.school.name) else: title = KEYWORDS_HIT_TITLE_TEMPLATE.format( course=note.course.name, school=note.course.department.school.name) overview = Overview() overview.append( FormattedContent( KEYWORDS_HIT_OVERVIEW_TEMPLATE.format( domain=Site.objects.get_current(), link=note.get_absolute_url()))) keyword_fta = FreeTextAnswer() keyword_fta.num_lines = 1 definition_fta = FreeTextAnswer() definition_fta.num_lines = 3 question_form = QuestionForm() question_form.append(overview) for i in range( min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))): keyword_content = QuestionContent() keyword_content.append_field('Title', KEYWORDS_HIT_KEYWORD_FIELDS[i][1]) keyword_question = Question( identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0], content=keyword_content, answer_spec=AnswerSpecification(keyword_fta), is_required=True if i <= 10 else False) question_form.append(keyword_question) definition_content = QuestionContent() definition_content.append_field('Title', KEYWORDS_HIT_DEFINITION_FIELDS[i][1]) definition_question = Question( identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0], content=definition_content, answer_spec=AnswerSpecification(definition_fta), is_required=False) question_form.append(definition_question) hit = connection.create_hit(questions=question_form, max_assignments=1, title=title, description=KEYWORDS_HIT_DESCRIPTION, keywords=KEYWORDS_HIT_KEYWORDS, duration=KEYWORDS_HIT_DURATION, reward=KEYWORDS_HIT_REWARD, qualifications=KEYWORDS_HIT_QUALIFICATION, annotation=str(note.id))[0] KeywordExtractionHIT.objects.create(HITId=hit.HITId, note=note, processed=False)