Пример #1
0
def expire_hit_task(hit_id):
    """ Atomically expire a HIT (with respect to the other tasks described as
    "atomic") """
    try:
        with transaction.atomic():
            # Note: select_for_update() locks the object for modification
            MtHit.objects.select_for_update().get(id=hit_id).expire()
    except MtHit.DoesNotExist:
        get_mturk_connection().expire_hit(hit_id)
Пример #2
0
    def handle(self, *args, **options):
        print >>self.stdout, 'MTurk info:'
        for key in dir(settings):
            if key.startswith('MTURK') or 'DEBUG' in key:
                print '  %s: %s' % (key, getattr(settings, key))

        print '\nFetching account balance...'
        print 'Account balance:', get_mturk_connection().get_account_balance()
Пример #3
0
    def handle(self, *args, **options):
        print >> self.stdout, 'MTurk info:'
        for key in dir(settings):
            if key.startswith('MTURK') or 'DEBUG' in key:
                print '  %s: %s' % (key, getattr(settings, key))

        print >> self.stdout, '\nDownloading list of hits...'
        connection = get_mturk_connection()

        # repeatedly try and download list
        while True:
            try:
                all_hits = list(connection.get_all_hits())
                break
            except MTurkRequestError as e:
                print e
                sleep(5)

        # LOCAL
        all_hit_ids = set(
            extract_mturk_attr(data, 'HITId') for data in all_hits)
        print >> self.stdout, '\nSyncing: local --> Amazon...'
        num_updated = MtHit.objects \
            .filter(sandbox=settings.MTURK_SANDBOX) \
            .exclude(hit_status='D') \
            .exclude(id__in=all_hit_ids) \
            .update(hit_status='D', expired=True)
        if num_updated:
            print 'No remote copy of %s hits -- marked them as disposed' % num_updated

        num_updated = MtAssignment.objects \
            .filter(hit__hit_status='D', status='S') \
            .update(status='A')
        if num_updated:
            print '%s assignments pending with disposed hits -- marked them as approved' % num_updated

        # REMOTE
        for sync_assignments in [False, True]:
            print >> self.stdout, '\nSyncing: Amazon --> local... (sync asst: %s)' % (
                sync_assignments)
            for data in progress_bar(all_hits):
                hit_id = extract_mturk_attr(data, 'HITId')

                try:
                    hit = MtHit.objects.get(id=hit_id)
                    for _ in xrange(5):
                        try:
                            hit.sync_status(data,
                                            sync_assignments=sync_assignments)
                            break
                        except MTurkRequestError as e:
                            print e
                            sleep(5)
                except MtHit.DoesNotExist:
                    print 'No local copy of %s -- approving and deleting from Amazon (disabling)' % hit_id
                    try:
                        connection.disable_hit(hit_id)
                    except Exception as exc:
                        print exc

        print >> self.stdout, '\nFetching account balance...'
        print >> self.stdout, 'Account balance:', connection.get_account_balance(
        )
        print >> self.stdout, '\nDone'
Пример #4
0
def consume_pending_objects_task(
        scan_for_pending_objects=True, clean_up_invalid=True, show_progress=False):
    """ IMPORTANT: only one instance of this function can be running at once.
    This uses both cache locking and filesystem locking to make sure.
    Lock-directory: .consume_pending_objects_task """

    if not os.path.isfile('manage.py'):
        raise RuntimeError('Worker not in server directory')

    # use a lock directory to ensure only one thread is running
    try:
        os.mkdir('.consume_pending_objects_task')
    except:
        print ("Already running!  If you are *sure* that " +
               "consume_pending_objects_task is not running, " +
               "delete the .consume_pending_objects_task directory")
        return

    try:
        # might as well scan again since the rest of this function is optimized
        if scan_for_pending_objects:
            scan_all_for_pending_objects_task(show_progress=show_progress)

        total_reward = Decimal('0.00')
        commission = Decimal(str(settings.MTURK_COMMISSION))

        # check all experiments for pending_contents
        pending_experiments = Experiment.objects \
            .filter(new_hit_settings__auto_add_hits=True) \
            .annotate(num=Count('pending_contents')) \
            .filter(num__gt=0)

        # make sure we are within budget
        balance = get_mturk_balance()
        print 'balance: %s' % balance

        for experiment in pending_experiments:
            exp_settings = experiment.new_hit_settings

            # (double filter since the keyword is the same)
            get_pending_contents = lambda: experiment.pending_contents \
                .filter(num_outputs_max__gt=0) \
                .filter(num_outputs_max__gt=(
                        F('num_outputs_completed') + F('num_outputs_scheduled'))) \
                .order_by('-num_outputs_completed', '-priority')

            pending_contents = get_pending_contents()
            num_pending_contents = pending_contents.count()

            if num_pending_contents < 1:
                continue

            if clean_up_invalid:
                if show_progress:
                    print '%s: clean up invalid or deleted content...' % experiment.slug

                pending_contents_dirty = False
                tuples = pending_contents.values_list('object_id', 'content_type')
                content_type_ids = set(t[1] for t in tuples)
                for ct_id in content_type_ids:
                    ct = ContentType.objects.get_for_id(id=ct_id)
                    pending_object_ids = [
                        t[0] for t in tuples if t[1] == ct_id]
                    model = ct.model_class()
                    existing_qset = model.objects.filter(id__in=pending_object_ids)
                    if hasattr(model, 'invalid'):
                        existing_qset = existing_qset.filter(invalid=False)
                    existing_object_ids = set(
                        existing_qset.values_list('id', flat=True))
                    to_delete = [
                        id for id in pending_object_ids if id not in existing_object_ids]
                    if to_delete:
                        print 'Deleting: %s dangling pending contents' % len(to_delete)
                        pending_contents.filter(
                            content_type=ct, object_id__in=to_delete).delete()
                        pending_contents_dirty = True
                if pending_contents_dirty:
                    pending_contents = get_pending_contents()
                    num_pending_contents = pending_contents.count()
                    if num_pending_contents < 1:
                        continue

                if show_progress:
                    print '%s: clean up invalid or deleted content... done' % experiment.slug

            # keep track of hit counts
            num_active_hits = MtHit.objects.filter(
                sandbox=settings.MTURK_SANDBOX,
                hit_type__experiment=experiment,
                all_submitted_assignments=False,
                expired=False,
            ).count()

            num_total_hits = MtHit.objects.filter(
                sandbox=settings.MTURK_SANDBOX,
                hit_type__experiment=experiment,
            ).count()

            print 'Experiment %s: %s/%s pending contents, %s/%s active HITs, %s/%s total HITs' % (
                experiment, num_pending_contents, exp_settings.contents_per_hit,
                num_active_hits, exp_settings.max_active_hits,
                num_total_hits, exp_settings.max_total_hits
            )

            hit_type = None
            while (num_active_hits < exp_settings.max_active_hits and
                   num_total_hits < exp_settings.max_total_hits and
                   pending_contents.count() >= exp_settings.contents_per_hit and
                   total_reward + settings.MTURK_MIN_BALANCE < balance):

                # transaction to ensure that if this fails, the pending_content
                # list is still consistent
                with transaction.atomic():

                    # lazily create hit_type
                    if not hit_type:
                        hit_type = get_or_create_hit_type_from_experiment(
                            experiment)

                    # attach contents to hit
                    cur_pending_contents = list(
                        pending_contents[:exp_settings.contents_per_hit])

                    num_to_schedule = None
                    if cur_pending_contents:
                        num_to_schedule = max(
                            [c.num_to_schedule() for c in cur_pending_contents])
                    if not num_to_schedule:
                        num_to_schedule = exp_settings.num_outputs_max

                    max_assignments = (
                        (num_to_schedule + exp_settings.out_count_ratio - 1) /
                        exp_settings.out_count_ratio
                    )
                    if max_assignments < 1:
                        continue

                    # create hit (also sends to amazon)
                    hit = MtHit.objects.create(
                        hit_type=hit_type,
                        lifetime=exp_settings.lifetime,
                        max_assignments=max_assignments)

                    total_reward += (hit_type.reward *
                                     hit.max_assignments * (1 + commission))
                    num_active_hits += 1
                    num_total_hits += 1

                    for pending_content in cur_pending_contents:

                        hit.contents.create(
                            content_type=pending_content.content_type,
                            object_id=pending_content.object_id)

                        # link HIT
                        pending_content.hits.add(hit)

                        # update scheduling count
                        PendingContent.objects.filter(id=pending_content.id).update(
                            num_outputs_scheduled=(
                                F('num_outputs_scheduled') +
                                max_assignments * exp_settings.out_count_ratio),
                        )

                    hit.num_contents = hit.contents.count()
                    hit.save()

                    print '%s: create hit: %s (%s assignments, %s contents)' % (
                        experiment, hit, max_assignments, hit.num_contents)

                # refresh for next loop
                pending_contents = get_pending_contents()

    finally:
        os.rmdir('.consume_pending_objects_task')

    if total_reward > 0:
        print 'added reward: %s' % total_reward
        print 'account balance: %s' % get_mturk_connection().get_account_balance()
Пример #5
0
    def handle(self, *args, **options):
        if not settings.MTURK_SANDBOX or 'sandbox' not in settings.MTURK_HOST:
            print "Permanent delete is only allowed in sandbox (MTURK_SANDBOX) mode"
            return

        experiment = None
        delete_empty = False
        if len(args) == 2:
            task, target = args[0], args[1]
            experiment = Experiment.objects.get(task=task, target=target)
            print 'Finding experiment: task:', task, 'target:', target
        elif len(args) == 1 and args[0] == "all":
            print 'Finding all sandbox experiments'
        elif len(args) == 1 and args[0] == "empty":
            delete_empty = True
            print 'Finding all empty sandbox experiments'
        else:
            print "Usage: <task> <target> or all"
            return

        delete_count = 0
        ignore_count = 0
        missing_count = 0
        connection = get_mturk_connection()
        all_aws_hits = list(connection.get_all_hits())
        to_delete = []

        for aws_hit in progress.bar(all_aws_hits):
            hit_id = extract_mturk_attr(aws_hit, 'HITId')

            try:
                hit = MtHit.objects.get(id=hit_id)
                if not hit.sandbox:
                    ignore_count += 1
                    continue
            except ObjectDoesNotExist:
                print 'Warning: no local copy of HIT', hit_id, '(deleting anyway)'
                connection.disable_hit(hit_id)
                delete_count += 1
                missing_count += 1
                continue

            if not hit: continue

            delete = False
            if delete_empty:
                if hit.contents.count() == 0:
                    to_delete.append(hit)
            else:
                if not experiment or hit.hit_type.experiment == experiment:
                    to_delete.append(hit)

        if len(to_delete) > 0:
            print 'Will delete:'
            for hit in to_delete:
                print '    %s (%s, %s content(s))' % (
                    hit, hit.hit_type.experiment, hit.contents.count())

            if raw_input('Okay? [y/n]: ').lower() != 'y':
                print 'exiting'
                return

            print 'Deleting...'
            with transaction.atomic():
                for hit in progress.bar(to_delete):
                    try:
                        connection.disable_hit(hit.id)
                    except Exception as e:
                        print 'Problem deleting: %s' % e
                    hit.delete()
                    delete_count += 1
        else:
            print 'No HITs to delete'

        if experiment:
            local = MtHit.objects.filter(sandbox=True,
                                         hit_type__experiment=experiment)
        else:
            local = MtHit.objects.filter(sandbox=True)

        local_count = local.count()
        local.delete()

        print 'Deleted %d sandbox HITs' % delete_count
        if ignore_count > 0:
            print 'Note: ignored %d non-sandbox HITs' % ignore_count
        if missing_count > 0:
            print 'Note: deleted %d HITs missing from local database' % ignore_count
        if local_count > 0:
            print 'Note: deleted local %d HITs missing from AWS database' % ignore_count
Пример #6
0
    def handle(self, *args, **options):
        if not settings.MTURK_SANDBOX or 'sandbox' not in settings.MTURK_HOST:
            print "Permanent delete is only allowed in sandbox (MTURK_SANDBOX) mode"
            return

        experiment = None
        delete_empty = False
        if len(args) == 2:
            task, target = args[0], args[1]
            experiment = Experiment.objects.get(task=task, target=target)
            print 'Finding experiment: task:', task, 'target:', target
        elif len(args) == 1 and args[0] == "all":
            print 'Finding all sandbox experiments'
        elif len(args) == 1 and args[0] == "empty":
            delete_empty = True
            print 'Finding all empty sandbox experiments'
        else:
            print "Usage: <task> <target> or all"
            return

        delete_count = 0
        ignore_count = 0
        missing_count = 0
        connection = get_mturk_connection()
        all_aws_hits = list(connection.get_all_hits())
        to_delete = []

        for aws_hit in progress.bar(all_aws_hits):
            hit_id = extract_mturk_attr(aws_hit, 'HITId')

            try:
                hit = MtHit.objects.get(id=hit_id)
                if not hit.sandbox:
                    ignore_count += 1
                    continue
            except ObjectDoesNotExist:
                print 'Warning: no local copy of HIT', hit_id, '(deleting anyway)'
                connection.disable_hit(hit_id)
                delete_count += 1
                missing_count += 1
                continue

            if not hit: continue

            delete = False
            if delete_empty:
                if hit.contents.count() == 0:
                    to_delete.append(hit)
            else:
                if not experiment or hit.hit_type.experiment == experiment:
                    to_delete.append(hit)

        if len(to_delete) > 0:
            print 'Will delete:'
            for hit in to_delete:
                print '    %s (%s, %s content(s))' % (hit, hit.hit_type.experiment, hit.contents.count())

            if raw_input('Okay? [y/n]: ').lower() != 'y':
                print 'exiting'
                return

            print 'Deleting...'
            with transaction.atomic():
                for hit in progress.bar(to_delete):
                    try:
                        connection.disable_hit(hit.id)
                    except Exception as e:
                        print 'Problem deleting: %s' % e
                    hit.delete()
                    delete_count += 1
        else:
            print 'No HITs to delete'

        if experiment:
            local = MtHit.objects.filter(sandbox=True, hit_type__experiment=experiment)
        else:
            local = MtHit.objects.filter(sandbox=True)

        local_count = local.count()
        local.delete()

        print 'Deleted %d sandbox HITs' % delete_count
        if ignore_count > 0:
            print 'Note: ignored %d non-sandbox HITs' % ignore_count
        if missing_count > 0:
            print 'Note: deleted %d HITs missing from local database' % ignore_count
        if local_count > 0:
            print 'Note: deleted local %d HITs missing from AWS database' % ignore_count
Пример #7
0
    def handle(self, *args, **options):
        print >>self.stdout, 'MTurk info:'
        for key in dir(settings):
            if key.startswith('MTURK') or 'DEBUG' in key:
                print '  %s: %s' % (key, getattr(settings, key))

        print >>self.stdout, '\nDownloading list of hits...'
        connection = get_mturk_connection()

        # repeatedly try and download list
        while True:
            try:
                all_hits = list(connection.get_all_hits())
                break
            except MTurkRequestError as e:
                print e
                sleep(5)

        # LOCAL
        all_hit_ids = set(extract_mturk_attr(data, 'HITId') for data in all_hits)
        print >>self.stdout, '\nSyncing: local --> Amazon...'
        num_updated = MtHit.objects \
            .filter(sandbox=settings.MTURK_SANDBOX) \
            .exclude(hit_status='D') \
            .exclude(id__in=all_hit_ids) \
            .update(hit_status='D', expired=True)
        if num_updated:
            print 'No remote copy of %s hits -- marked them as disposed' % num_updated

        num_updated = MtAssignment.objects \
            .filter(hit__hit_status='D', status='S') \
            .update(status='A')
        if num_updated:
            print '%s assignments pending with disposed hits -- marked them as approved' % num_updated

        # REMOTE
        for sync_assignments in [False, True]:
            print >>self.stdout, '\nSyncing: Amazon --> local... (sync asst: %s)' % (
                sync_assignments)
            for data in progress_bar(all_hits):
                hit_id = extract_mturk_attr(data, 'HITId')

                try:
                    hit = MtHit.objects.get(id=hit_id)
                    for _ in xrange(5):
                        try:
                            hit.sync_status(
                                data, sync_assignments=sync_assignments)
                            break
                        except MTurkRequestError as e:
                            print e
                            sleep(5)
                except MtHit.DoesNotExist:
                    print 'No local copy of %s -- approving and deleting from Amazon (disabling)' % hit_id
                    try:
                        connection.disable_hit(hit_id)
                    except Exception as exc:
                        print exc

        print >>self.stdout, '\nFetching account balance...'
        print >>self.stdout, 'Account balance:', connection.get_account_balance()
        print >>self.stdout, '\nDone'