def expire_hit_task(hit_id): """ Atomically expire a HIT (with respect to the other tasks described as "atomic") """ try: with transaction.atomic(): # Note: select_for_update() locks the object for modification MtHit.objects.select_for_update().get(id=hit_id).expire() except MtHit.DoesNotExist: get_mturk_connection().expire_hit(hit_id)
def handle(self, *args, **options): print >>self.stdout, 'MTurk info:' for key in dir(settings): if key.startswith('MTURK') or 'DEBUG' in key: print ' %s: %s' % (key, getattr(settings, key)) print '\nFetching account balance...' print 'Account balance:', get_mturk_connection().get_account_balance()
def handle(self, *args, **options): print >> self.stdout, 'MTurk info:' for key in dir(settings): if key.startswith('MTURK') or 'DEBUG' in key: print ' %s: %s' % (key, getattr(settings, key)) print >> self.stdout, '\nDownloading list of hits...' connection = get_mturk_connection() # repeatedly try and download list while True: try: all_hits = list(connection.get_all_hits()) break except MTurkRequestError as e: print e sleep(5) # LOCAL all_hit_ids = set( extract_mturk_attr(data, 'HITId') for data in all_hits) print >> self.stdout, '\nSyncing: local --> Amazon...' num_updated = MtHit.objects \ .filter(sandbox=settings.MTURK_SANDBOX) \ .exclude(hit_status='D') \ .exclude(id__in=all_hit_ids) \ .update(hit_status='D', expired=True) if num_updated: print 'No remote copy of %s hits -- marked them as disposed' % num_updated num_updated = MtAssignment.objects \ .filter(hit__hit_status='D', status='S') \ .update(status='A') if num_updated: print '%s assignments pending with disposed hits -- marked them as approved' % num_updated # REMOTE for sync_assignments in [False, True]: print >> self.stdout, '\nSyncing: Amazon --> local... (sync asst: %s)' % ( sync_assignments) for data in progress_bar(all_hits): hit_id = extract_mturk_attr(data, 'HITId') try: hit = MtHit.objects.get(id=hit_id) for _ in xrange(5): try: hit.sync_status(data, sync_assignments=sync_assignments) break except MTurkRequestError as e: print e sleep(5) except MtHit.DoesNotExist: print 'No local copy of %s -- approving and deleting from Amazon (disabling)' % hit_id try: connection.disable_hit(hit_id) except Exception as exc: print exc print >> self.stdout, '\nFetching account balance...' print >> self.stdout, 'Account balance:', connection.get_account_balance( ) print >> self.stdout, '\nDone'
def consume_pending_objects_task( scan_for_pending_objects=True, clean_up_invalid=True, show_progress=False): """ IMPORTANT: only one instance of this function can be running at once. This uses both cache locking and filesystem locking to make sure. Lock-directory: .consume_pending_objects_task """ if not os.path.isfile('manage.py'): raise RuntimeError('Worker not in server directory') # use a lock directory to ensure only one thread is running try: os.mkdir('.consume_pending_objects_task') except: print ("Already running! If you are *sure* that " + "consume_pending_objects_task is not running, " + "delete the .consume_pending_objects_task directory") return try: # might as well scan again since the rest of this function is optimized if scan_for_pending_objects: scan_all_for_pending_objects_task(show_progress=show_progress) total_reward = Decimal('0.00') commission = Decimal(str(settings.MTURK_COMMISSION)) # check all experiments for pending_contents pending_experiments = Experiment.objects \ .filter(new_hit_settings__auto_add_hits=True) \ .annotate(num=Count('pending_contents')) \ .filter(num__gt=0) # make sure we are within budget balance = get_mturk_balance() print 'balance: %s' % balance for experiment in pending_experiments: exp_settings = experiment.new_hit_settings # (double filter since the keyword is the same) get_pending_contents = lambda: experiment.pending_contents \ .filter(num_outputs_max__gt=0) \ .filter(num_outputs_max__gt=( F('num_outputs_completed') + F('num_outputs_scheduled'))) \ .order_by('-num_outputs_completed', '-priority') pending_contents = get_pending_contents() num_pending_contents = pending_contents.count() if num_pending_contents < 1: continue if clean_up_invalid: if show_progress: print '%s: clean up invalid or deleted content...' % experiment.slug pending_contents_dirty = False tuples = pending_contents.values_list('object_id', 'content_type') content_type_ids = set(t[1] for t in tuples) for ct_id in content_type_ids: ct = ContentType.objects.get_for_id(id=ct_id) pending_object_ids = [ t[0] for t in tuples if t[1] == ct_id] model = ct.model_class() existing_qset = model.objects.filter(id__in=pending_object_ids) if hasattr(model, 'invalid'): existing_qset = existing_qset.filter(invalid=False) existing_object_ids = set( existing_qset.values_list('id', flat=True)) to_delete = [ id for id in pending_object_ids if id not in existing_object_ids] if to_delete: print 'Deleting: %s dangling pending contents' % len(to_delete) pending_contents.filter( content_type=ct, object_id__in=to_delete).delete() pending_contents_dirty = True if pending_contents_dirty: pending_contents = get_pending_contents() num_pending_contents = pending_contents.count() if num_pending_contents < 1: continue if show_progress: print '%s: clean up invalid or deleted content... done' % experiment.slug # keep track of hit counts num_active_hits = MtHit.objects.filter( sandbox=settings.MTURK_SANDBOX, hit_type__experiment=experiment, all_submitted_assignments=False, expired=False, ).count() num_total_hits = MtHit.objects.filter( sandbox=settings.MTURK_SANDBOX, hit_type__experiment=experiment, ).count() print 'Experiment %s: %s/%s pending contents, %s/%s active HITs, %s/%s total HITs' % ( experiment, num_pending_contents, exp_settings.contents_per_hit, num_active_hits, exp_settings.max_active_hits, num_total_hits, exp_settings.max_total_hits ) hit_type = None while (num_active_hits < exp_settings.max_active_hits and num_total_hits < exp_settings.max_total_hits and pending_contents.count() >= exp_settings.contents_per_hit and total_reward + settings.MTURK_MIN_BALANCE < balance): # transaction to ensure that if this fails, the pending_content # list is still consistent with transaction.atomic(): # lazily create hit_type if not hit_type: hit_type = get_or_create_hit_type_from_experiment( experiment) # attach contents to hit cur_pending_contents = list( pending_contents[:exp_settings.contents_per_hit]) num_to_schedule = None if cur_pending_contents: num_to_schedule = max( [c.num_to_schedule() for c in cur_pending_contents]) if not num_to_schedule: num_to_schedule = exp_settings.num_outputs_max max_assignments = ( (num_to_schedule + exp_settings.out_count_ratio - 1) / exp_settings.out_count_ratio ) if max_assignments < 1: continue # create hit (also sends to amazon) hit = MtHit.objects.create( hit_type=hit_type, lifetime=exp_settings.lifetime, max_assignments=max_assignments) total_reward += (hit_type.reward * hit.max_assignments * (1 + commission)) num_active_hits += 1 num_total_hits += 1 for pending_content in cur_pending_contents: hit.contents.create( content_type=pending_content.content_type, object_id=pending_content.object_id) # link HIT pending_content.hits.add(hit) # update scheduling count PendingContent.objects.filter(id=pending_content.id).update( num_outputs_scheduled=( F('num_outputs_scheduled') + max_assignments * exp_settings.out_count_ratio), ) hit.num_contents = hit.contents.count() hit.save() print '%s: create hit: %s (%s assignments, %s contents)' % ( experiment, hit, max_assignments, hit.num_contents) # refresh for next loop pending_contents = get_pending_contents() finally: os.rmdir('.consume_pending_objects_task') if total_reward > 0: print 'added reward: %s' % total_reward print 'account balance: %s' % get_mturk_connection().get_account_balance()
def handle(self, *args, **options): if not settings.MTURK_SANDBOX or 'sandbox' not in settings.MTURK_HOST: print "Permanent delete is only allowed in sandbox (MTURK_SANDBOX) mode" return experiment = None delete_empty = False if len(args) == 2: task, target = args[0], args[1] experiment = Experiment.objects.get(task=task, target=target) print 'Finding experiment: task:', task, 'target:', target elif len(args) == 1 and args[0] == "all": print 'Finding all sandbox experiments' elif len(args) == 1 and args[0] == "empty": delete_empty = True print 'Finding all empty sandbox experiments' else: print "Usage: <task> <target> or all" return delete_count = 0 ignore_count = 0 missing_count = 0 connection = get_mturk_connection() all_aws_hits = list(connection.get_all_hits()) to_delete = [] for aws_hit in progress.bar(all_aws_hits): hit_id = extract_mturk_attr(aws_hit, 'HITId') try: hit = MtHit.objects.get(id=hit_id) if not hit.sandbox: ignore_count += 1 continue except ObjectDoesNotExist: print 'Warning: no local copy of HIT', hit_id, '(deleting anyway)' connection.disable_hit(hit_id) delete_count += 1 missing_count += 1 continue if not hit: continue delete = False if delete_empty: if hit.contents.count() == 0: to_delete.append(hit) else: if not experiment or hit.hit_type.experiment == experiment: to_delete.append(hit) if len(to_delete) > 0: print 'Will delete:' for hit in to_delete: print ' %s (%s, %s content(s))' % ( hit, hit.hit_type.experiment, hit.contents.count()) if raw_input('Okay? [y/n]: ').lower() != 'y': print 'exiting' return print 'Deleting...' with transaction.atomic(): for hit in progress.bar(to_delete): try: connection.disable_hit(hit.id) except Exception as e: print 'Problem deleting: %s' % e hit.delete() delete_count += 1 else: print 'No HITs to delete' if experiment: local = MtHit.objects.filter(sandbox=True, hit_type__experiment=experiment) else: local = MtHit.objects.filter(sandbox=True) local_count = local.count() local.delete() print 'Deleted %d sandbox HITs' % delete_count if ignore_count > 0: print 'Note: ignored %d non-sandbox HITs' % ignore_count if missing_count > 0: print 'Note: deleted %d HITs missing from local database' % ignore_count if local_count > 0: print 'Note: deleted local %d HITs missing from AWS database' % ignore_count
def handle(self, *args, **options): if not settings.MTURK_SANDBOX or 'sandbox' not in settings.MTURK_HOST: print "Permanent delete is only allowed in sandbox (MTURK_SANDBOX) mode" return experiment = None delete_empty = False if len(args) == 2: task, target = args[0], args[1] experiment = Experiment.objects.get(task=task, target=target) print 'Finding experiment: task:', task, 'target:', target elif len(args) == 1 and args[0] == "all": print 'Finding all sandbox experiments' elif len(args) == 1 and args[0] == "empty": delete_empty = True print 'Finding all empty sandbox experiments' else: print "Usage: <task> <target> or all" return delete_count = 0 ignore_count = 0 missing_count = 0 connection = get_mturk_connection() all_aws_hits = list(connection.get_all_hits()) to_delete = [] for aws_hit in progress.bar(all_aws_hits): hit_id = extract_mturk_attr(aws_hit, 'HITId') try: hit = MtHit.objects.get(id=hit_id) if not hit.sandbox: ignore_count += 1 continue except ObjectDoesNotExist: print 'Warning: no local copy of HIT', hit_id, '(deleting anyway)' connection.disable_hit(hit_id) delete_count += 1 missing_count += 1 continue if not hit: continue delete = False if delete_empty: if hit.contents.count() == 0: to_delete.append(hit) else: if not experiment or hit.hit_type.experiment == experiment: to_delete.append(hit) if len(to_delete) > 0: print 'Will delete:' for hit in to_delete: print ' %s (%s, %s content(s))' % (hit, hit.hit_type.experiment, hit.contents.count()) if raw_input('Okay? [y/n]: ').lower() != 'y': print 'exiting' return print 'Deleting...' with transaction.atomic(): for hit in progress.bar(to_delete): try: connection.disable_hit(hit.id) except Exception as e: print 'Problem deleting: %s' % e hit.delete() delete_count += 1 else: print 'No HITs to delete' if experiment: local = MtHit.objects.filter(sandbox=True, hit_type__experiment=experiment) else: local = MtHit.objects.filter(sandbox=True) local_count = local.count() local.delete() print 'Deleted %d sandbox HITs' % delete_count if ignore_count > 0: print 'Note: ignored %d non-sandbox HITs' % ignore_count if missing_count > 0: print 'Note: deleted %d HITs missing from local database' % ignore_count if local_count > 0: print 'Note: deleted local %d HITs missing from AWS database' % ignore_count
def handle(self, *args, **options): print >>self.stdout, 'MTurk info:' for key in dir(settings): if key.startswith('MTURK') or 'DEBUG' in key: print ' %s: %s' % (key, getattr(settings, key)) print >>self.stdout, '\nDownloading list of hits...' connection = get_mturk_connection() # repeatedly try and download list while True: try: all_hits = list(connection.get_all_hits()) break except MTurkRequestError as e: print e sleep(5) # LOCAL all_hit_ids = set(extract_mturk_attr(data, 'HITId') for data in all_hits) print >>self.stdout, '\nSyncing: local --> Amazon...' num_updated = MtHit.objects \ .filter(sandbox=settings.MTURK_SANDBOX) \ .exclude(hit_status='D') \ .exclude(id__in=all_hit_ids) \ .update(hit_status='D', expired=True) if num_updated: print 'No remote copy of %s hits -- marked them as disposed' % num_updated num_updated = MtAssignment.objects \ .filter(hit__hit_status='D', status='S') \ .update(status='A') if num_updated: print '%s assignments pending with disposed hits -- marked them as approved' % num_updated # REMOTE for sync_assignments in [False, True]: print >>self.stdout, '\nSyncing: Amazon --> local... (sync asst: %s)' % ( sync_assignments) for data in progress_bar(all_hits): hit_id = extract_mturk_attr(data, 'HITId') try: hit = MtHit.objects.get(id=hit_id) for _ in xrange(5): try: hit.sync_status( data, sync_assignments=sync_assignments) break except MTurkRequestError as e: print e sleep(5) except MtHit.DoesNotExist: print 'No local copy of %s -- approving and deleting from Amazon (disabling)' % hit_id try: connection.disable_hit(hit_id) except Exception as exc: print exc print >>self.stdout, '\nFetching account balance...' print >>self.stdout, 'Account balance:', connection.get_account_balance() print >>self.stdout, '\nDone'