def external_task_prepare_context(request, experiment_id, hit, override): """ Fetch hit, experiment, assignment, worker, etc. Returns either a dictionary on success, or a response (or exception) if there is some error. """ # obtain HIT if hit is None: if 'hitId' not in request.GET: if request.user.is_staff: return html_error_response( request, 'HIT ID missing from GET parameters') else: raise Http404 hit_id = request.GET['hitId'] try: hit = MtHit.objects \ .select_related( 'hit_type__experiment', 'hit_type__experiment_settings', 'hit_type__requirements') \ .get(id=hit_id) except MtHit.DoesNotExist: # if this HIT cannot be found, tell Amazon about it if (override is None and not request.user.is_staff and 'assignmentId' in request.GET and 'workerId' in request.GET and 'turkSubmitTo' in request.GET): expire_hit_task.delay(hit_id) raise Http404 # obtain experiment experiment = hit.hit_type.experiment if experiment.id != int(experiment_id): if request.user.is_staff: return html_error_response( request, 'Experiment ID (%s) does not match HIT (%s)' % (experiment_id, experiment.id)) else: raise Http404 # obtain worker and assignment worker = get_or_create_mturk_worker_from_request(request) assignment_dirty = False if worker and 'assignmentId' in request.GET: assignment, _ = MtAssignment.objects.get_or_create( id=request.GET['assignmentId'], defaults={ 'hit': hit, 'worker': worker }) if assignment.hit != hit or assignment.worker != worker: assignment.hit = hit assignment.worker = worker assignment_dirty = True else: assignment = None worker = None # obtain worker info specific to the experiment and worker if experiment and worker: experiment_worker, _ = ExperimentWorker.objects.get_or_create( experiment=experiment, worker=worker) else: experiment_worker = None # don't let blocked workers perform our tasks if (worker and worker.blocked) or (experiment_worker and experiment_worker.blocked): message = "Your submissions are too low quality. Please stop doing our tasks." if experiment_worker and experiment_worker.blocked_reason: message += "<br/><br/>" + experiment_worker.blocked_reason elif worker and worker.blocked_reason: message += "<br/><br/>" + worker.blocked_reason return html_error_response(request, message) # fetch contents hit_contents = fetch_hit_contents(hit) if override and 'publishable' in request.GET: hit_contents = filter(lambda x: x and x.publishable(), hit_contents) if not hit.num_contents or not hit_contents: # (in the if statement, also test hit.num_contents since it is only set # after the last content is added) return html_error_response(request, "Somehow there are no items in this HIT.") # fetch test (sentinel) contents if experiment_worker: if assignment.num_test_contents is None: n = experiment.test_contents_per_assignment if n > 0: # select new test contents from the set of possible contents # (that the user has not already answered) test_content_wrappers = experiment.test_contents.all() \ .exclude(responses__experiment_worker=experiment_worker) \ .order_by('-priority')[:n] # register chosen items with assignment assignment.test_contents.add(*test_content_wrappers) else: test_content_wrappers = [] assignment.num_test_contents = len(test_content_wrappers) assignment_dirty = True elif assignment.num_test_contents > 0: # re-fetch existing contents test_content_wrappers = assignment.test_contents.all() else: test_content_wrappers = [] # fetch objects from inside the wrappers if test_content_wrappers: test_contents = fetch_content_tuples([ (x.content_type_id, x.object_id) for x in test_content_wrappers ]) else: test_contents = [] else: test_contents = [] test_content_wrappers = [] # shuffle together (some tasks may sort contents again in javascript) contents = hit_contents + test_contents random.shuffle(contents) # prepare context data context = { 'hit': hit, 'assignment': assignment, 'worker': worker, 'experiment': experiment, 'experiment_id': experiment_id, 'experiment_worker': experiment_worker, 'slug': experiment.slug, 'hit_contents': hit_contents, 'test_content_wrappers': test_content_wrappers, 'test_contents': test_contents, 'contents': contents, 'num_contents': len(contents), 'num_contents_predicted': (len(hit_contents) + experiment.test_contents_per_assignment), 'override': override, } if len(contents) == 1: context['content'] = contents[0] if experiment.version >= 2: # old experiments (version 1) don't use this context['contents_json'] = json.dumps( [c.get_entry_dict() for c in contents]) # list of ids as json context['content_id_json'] = json.dumps([{'id': c.id} for c in contents]) # requirements for req in hit.hit_type.requirements.values('name', 'value'): context[req['name']] = req['value'] if assignment_dirty: assignment.save() return context
def mturk_submit_impl(**kwargs): #slug = kwargs['experiment'].slug #print '%s time_ms: %s, time_active_ms: %s, time_load_ms: %s' % ( #slug, kwargs['time_ms'], kwargs['time_active_ms'], #kwargs['time_load_ms']) #print '%s results: %s' % (slug, kwargs['results']) #if kwargs['mturk_assignment'].feedback: #print '%s feedback: %s' % (slug, kwargs['mturk_assignment'].feedback) # fetch objects if passed by ID if 'user_id' in kwargs: kwargs['user'] = UserProfile.objects.get(user_id=kwargs['user_id']) if 'mturk_hit_id' in kwargs: kwargs['mturk_hit'] = MtHit.objects.get(id=kwargs['mturk_hit_id']) if 'mturk_assignment_id' in kwargs: kwargs['mturk_assignment'] = MtAssignment.objects.get(id=kwargs['mturk_assignment_id']) if 'experiment_id' in kwargs: kwargs['experiment'] = Experiment.objects.get(id=kwargs['experiment_id']) # fetch experiment settings hit_type = kwargs['mturk_hit'].hit_type exp_settings = hit_type.experiment_settings if not exp_settings: # if the settings are somehow missing, update all records with the # newest experiment settings exp_settings = kwargs['experiment'].new_hit_settings MtHitType.objects.filter(id=hit_type.id) \ .update(experiment_settings=exp_settings) # fetch hit contents if 'hit_contents' not in kwargs: kwargs['hit_contents'] = fetch_hit_contents(kwargs['mturk_hit']) hit_contents = kwargs['hit_contents'] # new_objects_dict: {(content_type_id, content_id): [created items]} # (if [created items] is empty, the entry may be omitted) if hit_contents: new_objects_dict = exp_settings.out_content_model() \ .mturk_submit(**kwargs) else: print "WARNING: no hit_contents in %s" % kwargs['mturk_hit'].id new_objects_dict = {} # sanity check if not all(isinstance(k, tuple) for k in new_objects_dict): raise ValueError( "Invalid new_objects_dict: %s" % repr(new_objects_dict)) # flatten all items into one list new_objects_list = [] for obj_list in new_objects_dict.values(): new_objects_list += obj_list # attach objects to assignment for obj in new_objects_list: MtSubmittedContent.objects.get_or_create( assignment=kwargs['mturk_assignment'], object_id=obj.id, content_type=ContentType.objects.get_for_model(obj), ) for content in hit_contents: # content_tuple: (content type id, object id) content_tuple = get_content_tuple(content) if content_tuple not in new_objects_dict: # print '%s: no new objects generated' % repr(content_tuple) continue delta_completed = len(new_objects_dict[content_tuple]) delta_scheduled = exp_settings.out_count_ratio # update the fact that some outputs have been completed PendingContent.objects \ .filter( experiment=kwargs['experiment'], content_type=ContentType.objects.get_for_id(content_tuple[0]), object_id=content_tuple[1], ).update( num_outputs_completed=F( 'num_outputs_completed') + delta_completed, num_outputs_scheduled=F( 'num_outputs_scheduled') - delta_scheduled, ) # consider all affected objects for new experiments pending_objects = list(set(hit_contents + new_objects_list)) add_pending_objects_task.delay( [get_content_tuple(c) for c in pending_objects]) # mark experiment as dirty Experiment.objects.filter(id=kwargs['experiment'].id) \ .update(cubam_dirty=True) # here, "complete" means that the user actually submitted (and is not a # "partial submission", i.e. a background auto-submit performed by the # experiment script) if not kwargs['complete']: return # sync with mturk 30 minutes from now (it can take a while to update the # status; 1 minute is not enough) sync_hit_task.apply_async( args=[kwargs['mturk_hit'].id], countdown=30 * 60) # mark as done MtAssignment.objects.filter(id=kwargs['mturk_assignment'].id) \ .update(submission_complete=True)
def external_task_prepare_context(request, experiment_id, hit, override): """ Fetch hit, experiment, assignment, worker, etc. Returns either a dictionary on success, or a response (or exception) if there is some error. """ # obtain HIT if hit is None: if 'hitId' not in request.GET: if request.user.is_staff: return html_error_response( request, 'HIT ID missing from GET parameters') else: raise Http404 hit_id = request.GET['hitId'] try: hit = MtHit.objects \ .select_related( 'hit_type__experiment', 'hit_type__experiment_settings', 'hit_type__requirements') \ .get(id=hit_id) except MtHit.DoesNotExist: # if this HIT cannot be found, tell Amazon about it if (override is None and not request.user.is_staff and 'assignmentId' in request.GET and 'workerId' in request.GET and 'turkSubmitTo' in request.GET): expire_hit_task.delay(hit_id) raise Http404 # obtain experiment experiment = hit.hit_type.experiment if experiment.id != int(experiment_id): if request.user.is_staff: return html_error_response( request, 'Experiment ID (%s) does not match HIT (%s)' % ( experiment_id, experiment.id) ) else: raise Http404 # obtain worker and assignment worker = get_or_create_mturk_worker_from_request(request) assignment_dirty = False if worker and 'assignmentId' in request.GET: assignment, _ = MtAssignment.objects.get_or_create( id=request.GET['assignmentId'], defaults={'hit': hit, 'worker': worker}) if assignment.hit != hit or assignment.worker != worker: assignment.hit = hit assignment.worker = worker assignment_dirty = True else: assignment = None worker = None # obtain worker info specific to the experiment and worker if experiment and worker: experiment_worker, _ = ExperimentWorker.objects.get_or_create( experiment=experiment, worker=worker) else: experiment_worker = None # don't let blocked workers perform our tasks if (worker and worker.blocked) or (experiment_worker and experiment_worker.blocked): message = "Your submissions are too low quality. Please stop doing our tasks." if experiment_worker and experiment_worker.blocked_reason: message += "<br/><br/>" + experiment_worker.blocked_reason elif worker and worker.blocked_reason: message += "<br/><br/>" + worker.blocked_reason return html_error_response(request, message) # fetch contents hit_contents = fetch_hit_contents(hit) if override and 'publishable' in request.GET: hit_contents = filter(lambda x: x and x.publishable(), hit_contents) if not hit.num_contents or not hit_contents: # (in the if statement, also test hit.num_contents since it is only set # after the last content is added) return html_error_response( request, "Somehow there are no items in this HIT.") # fetch test (sentinel) contents if experiment_worker: if assignment.num_test_contents is None: n = experiment.test_contents_per_assignment if n > 0: # select new test contents from the set of possible contents # (that the user has not already answered) test_content_wrappers = experiment.test_contents.all() \ .exclude(responses__experiment_worker=experiment_worker) \ .order_by('-priority')[:n] # register chosen items with assignment assignment.test_contents.add(*test_content_wrappers) else: test_content_wrappers = [] assignment.num_test_contents = len(test_content_wrappers) assignment_dirty = True elif assignment.num_test_contents > 0: # re-fetch existing contents test_content_wrappers = assignment.test_contents.all() else: test_content_wrappers = [] # fetch objects from inside the wrappers if test_content_wrappers: test_contents = fetch_content_tuples([ (x.content_type_id, x.object_id) for x in test_content_wrappers ]) else: test_contents = [] else: test_contents = [] test_content_wrappers = [] # shuffle together (some tasks may sort contents again in javascript) contents = hit_contents + test_contents random.shuffle(contents) # prepare context data context = { 'hit': hit, 'assignment': assignment, 'worker': worker, 'experiment': experiment, 'experiment_id': experiment_id, 'experiment_worker': experiment_worker, 'slug': experiment.slug, 'hit_contents': hit_contents, 'test_content_wrappers': test_content_wrappers, 'test_contents': test_contents, 'contents': contents, 'num_contents': len(contents), 'num_contents_predicted': (len(hit_contents) + experiment.test_contents_per_assignment), 'override': override, } if len(contents) == 1: context['content'] = contents[0] if experiment.version >= 2: # old experiments (version 1) don't use this context['contents_json'] = json.dumps( [c.get_entry_dict() for c in contents]) # list of ids as json context['content_id_json'] = json.dumps( [{'id': c.id} for c in contents]) # requirements for req in hit.hit_type.requirements.values('name', 'value'): context[req['name']] = req['value'] if assignment_dirty: assignment.save() return context