def backfill_mturk_batch(mturk_batch_id, only_incomplete_hits=True, chain=True): """Gets all the missing hits for an mturk batch""" mturk_connection = connect(MTURK_TARGET) retrieve_assignments_for_mturk_batch(mturk_connection, mturk_batch_id, only_incomplete_hits) if chain: process_responses.delay()
def turk_submission(submission_id, sample_batch_id=None, chain=True): """ Takes care of turking from a submission from _sample. """ assert Submission.objects.filter(id=submission_id).count( ) > 0, "Submission {} does not exist!".format(submission_id) assert SubmissionState.objects.filter(submission_id=submission_id).count( ) > 0, "SubmissionState {} does not exist!".format(submission_id) submission = Submission.objects.get(id=submission_id) state = SubmissionState.objects.get(submission_id=submission_id) logger.info("Turking submission %s", submission.id) if state.status != 'pending-turking': logger.warning("Trying to turk submission %s, but state is %s", submission.id, state.status) return try: sample_batches = api.get_submission_sample_batches(submission_id) assert len(sample_batches ) > 0, "No sample batches to turk for submission {}".format( submission_id) if sample_batch_id is not None: assert any(batch == sample_batch_id for batch in sample_batches),\ "Sample batch {} is not part of submission {}".format(sample_batch_id, submission_id) else: # Pick the most recent sample batch. sample_batch_id = sample_batches[0] evaluation_batch_id = create_evaluation_batch_for_submission_sample( submission_id, sample_batch_id) if evaluation_batch_id is None: logger.warning( "Evaluation batch not created because all possible questions have been asked!" ) else: evaluation_batch = api.get_question_batch(evaluation_batch_id) questions = api.get_questions(evaluation_batch_id) mturk_connection = connect(MTURK_TARGET) create_batch(mturk_connection, evaluation_batch.id, evaluation_batch.batch_type, questions) # Move state forward. state.status = 'pending-annotation' state.save() except Exception as e: logger.exception(e) state.status = 'error' state.message = traceback.format_exc() state.save()
def process_mturk_batch(mturk_batch_id, forced=False, chain=True): """ First verifies if the reponses for a hit are sane then aggregates them to fill evaluation_* tables """ logger.info("Running process_mturk_batch") # Actually merge all our tables. merge_evaluation_tables(mode='mturk_batch', mturk_batch_id=mturk_batch_id) # verify_evaluation_relation_response depends on majority relation directly # and verify_evaluation_mention_response looks at deviation from median, # so it depends on majority counts. Hence it doesn't seem like we can actually benefit from # merging only validated responses. The point of validation is then simply to discourage # spammers in the long run and doesn't help much in the correctness of the current batch # The only other alternative is to create a new mturk batch to cover the rejected assignments # TODO: Create a new mturk batch to cover rejected assignments verify_evaluation_mention_response() verify_evaluation_relation_response() mturk_connection = connect(MTURK_TARGET, forced=forced) mturk_batch_payments(mturk_connection, mturk_batch_id) db.execute( "UPDATE mturk_hit SET state = 'done' WHERE batch_id = %(mturk_batch_id)s", mturk_batch_id=mturk_batch_id) submission_id = db.get(""" SELECT DISTINCT submission_id FROM mturk_hit LEFT JOIN evaluation_batch ON evaluation_batch.id = mturk_hit.question_batch_id LEFT JOIN submission_sample ON submission_sample.batch_id = evaluation_batch.sample_batch_id WHERE mturk_hit.batch_id = %(mturk_batch_id)s; """, mturk_batch_id=mturk_batch_id).submission_id assert Submission.objects.filter(id=submission_id).count( ) > 0, "Submission {} does not exist!".format(submission_id) assert SubmissionState.objects.filter(submission_id=submission_id).count( ) > 0, "SubmissionState {} does not exist!".format(submission_id) submission = Submission.objects.get(id=submission_id) state = SubmissionState.objects.get(submission_id=submission_id) state.status = 'pending-scoring' state.save() if chain: score_submission.delay(submission_id)
def renew_mturk_hit(_, __, queryset): conn = turk.connect() for row in queryset: turk.renew_hit(conn, row.id)
def increment_assignments(_, __, queryset): conn = turk.connect() for row in queryset: turk.increment_assignments(conn, row.id)
def backfill_mturk_batch(_, __, queryset): conn = turk.connect() for row in queryset: turk.retrieve_assignments_for_mturk_batch(conn, row.id, only_incomplete_hits=True)
def handle_mturk_batch_payments(_, __, queryset): conn = turk.connect() for row in queryset: turk.mturk_batch_payments(conn, row.id)
def revoke_mturk_batch(_, __, queryset): conn = turk.connect() for row in queryset: turk.revoke_batch(conn, row.id)
def revoke_evaluation_question(_, __, queryset): mturk_conn = turk.connect() questions.revoke_question(row.batch_id, row.id, mturk_conn=mturk_conn)
def revoke_evaluation_batch(_, __, queryset): mturk_conn = turk.connect() for row in queryset: questions.revoke_question_batch(row.id, mturk_conn=mturk_conn)