Python get_mturk_client示例，mturk_utils.get_mturk_client Python示例

示例#1

0

显示文件

文件： mturk.py 项目： wuxf-ml/ImageNetV2

def _delete_hits(hits, live):
    assert not live
    live_data = mturk_data.MTurkData(live=True,
                                     load_assignments=False,
                                     verbose=False)
    live_hit_ids = []
    for hit in live_data.hits.values():
        live_hit_ids.append(hit['hit_id'])
    live_hit_ids = set(live_hit_ids)
    for hit_id in hits:
        assert hit_id not in live_hit_ids
    client = mturk_utils.get_mturk_client(live)
    num_deleted = 0
    for hit_id in tqdm.tqdm(hits, desc='Deleting HITs'):
        try:
            client.update_expiration_for_hit(HITId=hit_id,
                                             ExpireAt=datetime(2018, 1, 1))
            response = client.delete_hit(HITId=hit_id)
            assert response['ResponseMetadata']['HTTPStatusCode'] == 200
            num_deleted += 1
        except Exception as e:
            print('ERROR while deleting the HIT with id {} (type {})'.format(
                hit_id, type(e)))
            print('    ' + str(e))
    print('Deleted {} HITs'.format(num_deleted))

示例#2

0

显示文件

文件： expire_hits.py 项目： stanfordnlp/plot-data

def main():
    is_sandbox = OPTS.is_sandbox
    global client
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print(client.get_account_balance()['AvailableBalance'])
    hits = open(OPTS.hits, 'r').readlines()
    for h in hits:
        expire(h.strip())

示例#3

0

显示文件

文件： unreject.py 项目： stanfordnlp/plot-data

def main():
    is_sandbox = OPTS.is_sandbox
    global client
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print(client.get_account_balance()['AvailableBalance'])
    approve('304SM51WA4Y9ILKTOP0M9ZKPOXPSBM')  # server issues
    approve('30BUDKLTXEP6JMY2MKP4HWGIDAG5EJ')
    approve('3VAR3R6G1QVBIVUTEWZ3YSKNJ9FO8E')
    approve('3QHK8ZVMINCME8P34G3LXD6UW3LBLV')

示例#4

0

显示文件

文件： mturk.py 项目： wuxf-ml/ImageNetV2

def _list_hits(live, max_results):
    client = mturk_utils.get_mturk_client(live)
    hit_response = client.list_hits(MaxResults=max_results)
    all_hits = []
    while hit_response['NumResults'] > 0:
        all_hits += hit_response["HITs"]
        hit_response = client.list_hits(MaxResults=max_results,
                                        NextToken=hit_response['NextToken'])
    for hit in all_hits:
        hit['Expiration'] = str(hit['Expiration'])
        hit['CreationTime'] = str(hit['CreationTime'])
    return all_hits

示例#5

0

显示文件

文件： review.py 项目： stanfordnlp/plot-data

def main():
    is_sandbox = OPTS.is_sandbox
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print(client.get_account_balance()['AvailableBalance'])
    by_worker = collections.defaultdict(lambda: {'accepted': 0, 'rejected': 0})

    assignments = json.loads(open(OPTS.assignments, 'r').read())
    statuses = json.loads(open(OPTS.status, 'r').read())
    status_dict = {(s['WorkerId'], s['AssignmentId']): s for s in statuses}
    for s in assignments:
        worker_id, assignment_id = s['WorkerId'], s['AssignmentId']
        try:
            response = client.get_assignment(AssignmentId=assignment_id)
            prev_status = response['Assignment']['AssignmentStatus']
            s['prev_status'] = prev_status

            accept = False
            try:
                accept = status_dict[(worker_id, assignment_id)]['accept']
            except KeyError as e:
                accept = False
                s['KeyError'] = 'KeyError: ' + str(e)

            if accept is False:
                by_worker[worker_id]['rejected'] += 1
                response = client.reject_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback='Thanks for trying our task, but your assignment has been rejected due to spamming. ' + \
                    'If you believe this is in error, message me and explain what you did, ' + \
                    'and I will reverse the rejection ASAP. Thanks for your understanding.'
                )
                s['next_status'] = 'Rejected'
            else:
                by_worker[worker_id]['accepted'] += 1
                response = client.approve_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback=
                    'thanks for trying our task, your assignment has been approved.',
                    OverrideRejection=True)
                s['next_status'] = 'Accepted'
        except client.exceptions.RequestError as e:
            s['RequestError'] = 'RequestError: ' + str(e)

        print(s)

    with open(OPTS.review_out, 'w') as f:
        f.writelines(json.dumps(assignments))

    for k, v in by_worker.items():
        print(k, v)

示例#6

0

显示文件

文件： mturk.py 项目： wuxf-ml/ImageNetV2

def _submit_hits(
        hit_data,
        hit_htmls,
        live=False,
        auto_approval_delay=24 * 3600 * 3,
        assignment_duration=3600,
        reward='0.30',
        title='Label new images for machine learning dataset',
        keywords='image, label, machine learning, artificial intelligence',
        description='Select images containing objects of the appropriate type',
        max_assignments=10,
        life_time=999999):
    client = mturk_utils.get_mturk_client(live)
    if (live):
        resp = input(
            "You are about to submit a live hit, please type in the word LIVE (in all capitals): "
        )
        if (resp.strip() != "LIVE"):
            exit()
    hit_type_response = client.create_hit_type(
        AutoApprovalDelayInSeconds=auto_approval_delay,
        AssignmentDurationInSeconds=assignment_duration,
        Reward=reward,
        Title=title,
        Keywords=keywords,
        Description=description)
    hit_type_id = hit_type_response["HITTypeId"]
    for hit in hit_data:
        try:
            response = client.create_hit_with_hit_type(
                HITTypeId=hit_type_id,
                RequesterAnnotation=hit["uuid"],
                UniqueRequestToken=hit["uuid"],
                MaxAssignments=max_assignments,
                LifetimeInSeconds=life_time,
                Question=hit_htmls[hit['uuid']])
            hit["hit_id"] = response['HIT']['HITId']
            hit["hit_type_id"] = response['HIT']['HITTypeId']
            hit["user"] = getpass.getuser()
            hit["uuid"] = str(hit['uuid'])
            hit["time"] = str(datetime.now(tzlocal()))
            hit["submitted"] = True
            print("\nCreated HIT: {}".format(response['HIT']['HITId']))
        except Exception as e:
            print(
                "hit {0} failed with error {1}, perhaps this hit already exists?"
                .format(hit["uuid"], e))
            traceback.print_exc()
    return hit_data

示例#7

0

显示文件

文件： hits_to_assignments.py 项目： stanfordnlp/plot-data

def main():
    is_sandbox = OPTS.is_sandbox
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print('balance', client.get_account_balance()['AvailableBalance'])

    hits = open(OPTS.hits, 'r').readlines()
    assignments = []
    for h in hits:
        response = client.list_assignments_for_hit(HITId=h.strip('\n'), MaxResults=100)
        assignments += response['Assignments']

    processed = []
    for a in assignments:
        code = get_code(a['Answer'])
        checked, dec = check_code(a['WorkerId'] + '_' + a['AssignmentId'], code)
        basic = { k: a[k] for k in {'WorkerId', 'AssignmentId', 'HITId', 'AcceptTime', 'SubmitTime'} }
        processed.append(basic)

    with open(OPTS.assignments, 'w') as f:
        f.write(json.dumps(processed, default=lambda s: str(s)))

示例#8

0

显示文件

def main():
    is_sandbox = OPTS.is_sandbox
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print(client.get_account_balance()['AvailableBalance'])

    with open(OPTS.input, 'r') as f:
        workers = f.readlines()
        qual_id = m.find_or_create_qualification(
            'vlspeakerlistener_bonus', 'qualification for paying bonuses',
            is_sandbox)

        for w in workers:
            worker_id = w.strip()
            try:
                response = client.associate_qualification_with_worker(
                    QualificationTypeId=qual_id,
                    WorkerId=worker_id,
                    IntegerValue=20,
                    SendNotification=True)
                print(response)
            except Exception as e:
                print(e)

示例#9

0

显示文件

文件： mturk.py 项目： wuxf-ml/ImageNetV2

def show_hit_progress(ctx, hit_file, live):
    filenames = hit_file
    hits = []
    for fn in filenames:
        with open(fn, 'r') as f:
            cur_hits = json.load(f)
        hits.extend(cur_hits)
    print('Loaded {} HITs from {} files'.format(len(hits), len(filenames)))
    assignments = {}
    client = mturk_utils.get_mturk_client(live=live)
    for hit in tqdm.tqdm(hits, desc='Querying HITs'):
        assignments[
            hit['uuid']] = mturk_utils.get_assignments_for_hit_from_aws(
                hit['hit_id'], client, hit['uuid'])
    assignment_counts = []
    for a in assignments.values():
        cur_submitted = [
            x for x in a.values()
            if x['AssignmentStatus'] in ['Approved', 'Submitted']
        ]
        assignment_counts.append(len(cur_submitted))
    counter = Counter(assignment_counts)
    for count, freq in counter.most_common():
        print('{} HITs have {} submitted assignments'.format(freq, count))

示例#10

0

显示文件

文件： backup_mturk.py 项目： wuxf-ml/ImageNetV2

import datetime
import getpass
import json
import time
from timeit import default_timer as timer

import tqdm

import mturk_utils

client = mturk_utils.get_mturk_client(live=True)
max_num_results = 100

api_results = []

all_hit_ids = []

print('Retrieving HITs ...')
last_hit_count_print = 0
last_hit_count_print_time = timer()

res = client.list_hits(MaxResults=max_num_results)
cur_call = {
    'method': 'list_hits',
    'arguments': {
        'MaxResults': max_num_results
    },
    'result': res
}
api_results.append(cur_call)

示例#11

0

显示文件

文件： test.py 项目： stanfordnlp/plot-data

import mturk_utils as m
import boto3
import json, csv
import functools

is_sandbox = False
qualification_name = 'plot-diff'
description = 'reasonable turkers who succeeded in both generation and differences'

client = m.get_mturk_client(is_sandbox=is_sandbox)
qual_id = m.find_or_create_qualification(qualification_name, description,
                                         is_sandbox)
print(qual_id)

m.give_worker_qualification('AM2KK02JXXW48',
                            qual_id,
                            value=1,
                            is_sandbox=is_sandbox)
# This will return $10,000.00 in the MTurk Developer Sandbox
print(client.get_account_balance()['AvailableBalance'])
# client.create_worker_block(WorkerId=turkerid, Reason='spamming on plot-diff')

response = client.list_reviewable_hits(
    HITTypeId='34A9VSEXPEVEYWXV71ERMOWJL3L9JI',
    # Status='Reviewable',
    MaxResults=30)
print(json.dumps(response))

response = client.list_hits(MaxResults=10)
selected = map(
    lambda h: {

示例#12

0

显示文件

文件： create_speaker_hit.py 项目： stanfordnlp/plot-data

def main():
    if (os.path.exists(OPTS.dir)):
        print('{} already exists' % OPTS.dir)
        return
    os.makedirs(OPTS.dir)

    with open(os.path.join(OPTS.dir, 'speaker.exec'), 'w') as f:
        f.write(SPEAKER_EXEC)

    is_sandbox = OPTS.is_sandbox
    client = m.get_mturk_client(is_sandbox=is_sandbox)
    print('balance', client.get_account_balance()['AvailableBalance'])

    # The question we ask the workers is contained in this file.
    script_dir = os.path.abspath(os.path.dirname(__file__))
    xml_template = open(os.path.join(script_dir, "amt.xml"), "r").read()
    content = open(os.path.join(script_dir, "speaker.html"), "r").read()
    question = xml_template.format(content)

    environments = {
        "live": {
            "endpoint": "https://mturk-requester.us-east-1.amazonaws.com",
            "preview": "https://www.mturk.com/mturk/preview",
            "manage": "https://requester.mturk.com/mturk/manageHITs",
            "reward": "0.25"
        },
        "sandbox": {
            "endpoint":
            "https://mturk-requester-sandbox.us-east-1.amazonaws.com",
            "preview": "https://workersandbox.mturk.com/mturk/preview",
            "manage": "https://requestersandbox.mturk.com/mturk/manageHITs",
            "reward": "0.25"
        },
    }
    mturk_environment = environments[
        "live"] if not is_sandbox else environments["sandbox"]

    print('about to submit {} assignments for a total payout of ${}'.format(
        OPTS.num_hit * OPTS.num_assignment,
        OPTS.num_hit * OPTS.num_assignment *
        float(mturk_environment["reward"])))
    # Example of using qualification to restrict responses to Workers who have had
    # at least 80% of their assignments approved. See:
    # http://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs
    # qual_id = m.find_or_create_qualification('vlspeaker_tag', 'tag for speaker hits', is_sandbox)
    # print('tagged as', qual_id)
    worker_requirements = [
        # {
        #     'QualificationTypeId': qual_id,
        #     'Comparator': 'DoesNotExist',
        #     'RequiredToPreview': False,
        # },
        {
            'QualificationTypeId': '000000000000000000L0',
            'Comparator': 'GreaterThanOrEqualTo',
            'IntegerValues': [95],
            'RequiredToPreview': True,
        },
        {
            'QualificationTypeId': '00000000000000000040',
            'Comparator': 'GreaterThanOrEqualTo',
            'IntegerValues': [3],
            'RequiredToPreview': True,
        },
        {
            'QualificationTypeId':
            '00000000000000000071',
            'Comparator':
            'In',
            'LocaleValues': [{
                'Country': 'US'
            }, {
                'Country': 'CA'
            }, {
                'Country': 'GB'
            }, {
                'Country': 'AU'
            }, {
                'Country': 'NZ'
            }, {
                'Country': 'IN'
            }],
            'RequiredToPreview':
            True
        }
    ]

    # print(question)
    # Create the HIT
    response = client.create_hit_type(
        AutoApprovalDelayInSeconds=3 * 24 * 3600,
        AssignmentDurationInSeconds=30 * 60,
        Title='write a command for producing the new plot',
        Keywords='vlspeaker percy plotting nlp language visualization',
        Description=
        'give a command for producing the new plot based on the old one',
        Reward=mturk_environment['reward'],
        QualificationRequirements=worker_requirements
        if not is_sandbox else [],
    )
    hit_type_id = response['HITTypeId']
    hit_ids = []

    for i in range(OPTS.num_hit):
        response = client.create_hit_with_hit_type(
            HITTypeId=hit_type_id,
            LifetimeInSeconds=24 * 3600,
            MaxAssignments=OPTS.num_assignment,
            Question=question,
            RequesterAnnotation='vlspeaker-diff',
            # UniqueRequestToken='string',
        )
        # The response included several fields that will be helpful later
        hit_type_id = response['HIT']['HITTypeId']
        hit_id = response['HIT']['HITId']
        hit_ids.append(hit_id)

        print("\nCreated HIT {}: {}".format(i, hit_id))
        print("\nYou can work the HIT here:")
        print(mturk_environment['preview'] + "?groupId={}".format(hit_type_id))

        if i == 0:
            with open(os.path.join(OPTS.dir, 'speaker.sample_hit'), 'a') as f:
                f.write(str(response))

    with open(os.path.join(OPTS.dir, 'speaker.HITs.txt'), 'w') as f:
        for h in hit_ids:
            f.write("%s\n" % h)

示例#13

0

显示文件

文件： generate_mturk_data_pickle.py 项目： wuxf-ml/ImageNetV2

    pass

live = True

bucket = 'imagenet2datav2'

print('Running consistency check:')
num_errors, num_warnings, local_hit_ids_missing_remotely = mturk_utils.mturk_vs_local_consistency_check(
    live=live)
assert num_errors == 0
assert num_warnings == len(local_hit_ids_missing_remotely)

# TODO: handle the blacklist correctly (do not include in the HITs)
hits, mturk_ids_to_uuid, json_dir, json_filenames, blacklisted_hits = mturk_utils.load_local_hit_data(
    live=live, verbose=True, include_blacklisted_hits=True)
client = mturk_utils.get_mturk_client(live=live)

backup_s3_key = 'mturk_results/data_live_2018-12-04_17-24-42_UTC.pickle'
backup_bytes = utils.get_s3_file_bytes(backup_s3_key, verbose=True)
backup_data = pickle.loads(backup_bytes)

backup_assignments = {}
for hit_id in local_hit_ids_missing_remotely:
    cur_uuid = mturk_ids_to_uuid[hit_id]
    backup_assignments[cur_uuid] = backup_data['assignments'][cur_uuid]
print(
    f'Took assignment data for {len(backup_assignments)} HITs from the backup {backup_s3_key}'
)

assignments = mturk_utils.get_all_hit_assignments(
    live=live,