Exemplo n.º 1
0
def getConnection(is_prod = False):
    if is_prod:
        mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=HOST)
    else:
        mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=SANDBOX_HOST)

    print mtc.get_account_balance()
    return mtc
Exemplo n.º 2
0
class MTurk(object):
    def __init__(self):
        self.mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                                   aws_secret_access_key=SECRET_KEY,
                                   host=HOST)
    def balance(self):
        return self.mtc.get_account_balance()

    def create_question(self, id, title):
        content = QuestionContent()
        content.append_field('Title', title)
        text = FreeTextAnswer()
        return Question(identifier=id, content=content, answer_spec=AnswerSpecification(text))

    def create_hit(self, title, description=''):
        question_form = QuestionForm()
        question_form.append(self.create_question(id='1', title='Comments'))
        question_form.append(self.create_question(id='2', title='More Comments'))
        self.mtc.create_hit(questions=question_form, max_assignments=1, title=title, description=description,
            duration=60*5, reward=0.01)

    def external(self):
        q = ExternalQuestion(external_url="http://mturk-hit-wizard.herokuapp.com/view/2", frame_height=800)
        #conn = MTurkConnection(host=HOST)
        keywords=['boto', 'test', 'doctest']
        create_hit_rs = self.mtc.create_hit(question=q, lifetime=60*65,max_assignments=2,title="Boto External Question Test", keywords=keywords,reward = 0.05, duration=60*6,approval_delay=60*60, annotation='An annotation from boto external question test', response_groups=['Minimal','HITDetail','HITQuestion','HITAssignmentSummary',])
        assert(create_hit_rs.status == True)
Exemplo n.º 3
0
def con(HOST=HOST):
    try:
        mtc = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)
        print('Connected to '+HOST)
        print('Balance: '+str(mtc.get_account_balance()))
        return mtc
    except:
        print('Failed to connect to '+HOST+', plz make sure you are connecting to the correct host with correct credentials')
Exemplo n.º 4
0
def test():
    text = ("Two young men with backpacks walked with purpose down "
            "Boylston Street Monday afternoon, weaving through the "
            "crowd on the sidelines of the Boston Marathon. It seemed "
            "like they'd been there before, like they knew where they "
            "were going.")
    title = "From backpacks to 'flash-bangs': Boston's week of terror"
    url = "http://www.cnn.com/2013/04/21/us/boston-week-review/?hpt=hp_t1"
    
    fact = MTurkSurveyFactory()
    questionForms = fact.buildSurvey([[url, title, text]])
    print 'getting account balance'
    
    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                         aws_secret_access_key=SECRET_KEY,
                         host=HOST, is_secure=True,
                         https_connection_factory=(https_connection_factory, ()))
    fact.submitHITs(mtc=mtc, questionForms=questionForms)
    print mtc.get_account_balance()
Exemplo n.º 5
0
def test():
    text = ("Two young men with backpacks walked with purpose down "
            "Boylston Street Monday afternoon, weaving through the "
            "crowd on the sidelines of the Boston Marathon. It seemed "
            "like they'd been there before, like they knew where they "
            "were going.")
    title = "From backpacks to 'flash-bangs': Boston's week of terror"
    url = "http://www.cnn.com/2013/04/21/us/boston-week-review/?hpt=hp_t1"

    fact = MTurkSurveyFactory()
    questionForms = fact.buildSurvey([[url, title, text]])
    print 'getting account balance'

    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                          aws_secret_access_key=SECRET_KEY,
                          host=HOST,
                          is_secure=True,
                          https_connection_factory=(https_connection_factory,
                                                    ()))
    fact.submitHITs(mtc=mtc, questionForms=questionForms)
    print mtc.get_account_balance()
Exemplo n.º 6
0
def connect_AMT():
# Open connections to HOST
# using ACCESS_ID,SECRET_KEY
# obtain from https://portal.aws.amazon.com/gp/aws/securityCredentials#access_credentials
	mturk_conn = None
	try:
		mturk_conn    = MTurkConnection(aws_access_key_id=ACCESS_ID,aws_secret_access_key=SECRET_KEY,host=HOST)
		#will throw if not connected
		canary = mturk_conn.get_account_balance()
		print "Connection to amt established."
	except Exception as e1:
		print "[Error Connecting]",e1
		print "[Exiting]"
		exit(1)
	return mturk_conn
Exemplo n.º 7
0
def connect_AMT():
    # Open connections to HOST
    # using ACCESS_ID,SECRET_KEY
    # obtain from https://portal.aws.amazon.com/gp/aws/securityCredentials#access_credentials
    mturk_conn = None
    try:
        mturk_conn = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)
        # will throw if not connected
        canary = mturk_conn.get_account_balance()
        print "Connection to amt established."
    except Exception as e1:
        print "[Error Connecting]", e1
        print "[Exiting]"
        exit(1)
    return mturk_conn
Exemplo n.º 8
0
class TurkConnect:
    def __init__(self, host, n=10):
        mturkparams = dict(
            aws_access_key_id=config.get('AWS Access', 'aws_access_key_id'),
            aws_secret_access_key=config.get('AWS Access',
                                             'aws_secret_access_key'),
            host=host)
        self.mtc = MTurkConnection(**mturkparams)

        # Configure portal
        experimentPortalURL = config.get('HIT Configuration', 'question_url')
        frameheight = 600
        mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight)

        # Qualification:
        quals = Qualifications()
        approve_requirement = config.getint('HIT Configuration',
                                            'Approve_Requirement')
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))
        if config.getboolean('HIT Configuration', 'US_only'):
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Specify all the HIT parameters
        self.paramdict = dict(
            hit_type=None,
            question=mturkQuestion,
            lifetime=datetime.timedelta(
                hours=config.getfloat('HIT Configuration', 'HIT_lifetime')),
            max_assignments=config.getint('HIT Configuration',
                                          'max_assignments'),
            title=config.get('HIT Configuration', 'title'),
            description=config.get('HIT Configuration', 'description'),
            keywords=config.get('HIT Configuration', 'keywords'),
            reward=config.getfloat('HIT Configuration', 'reward'),
            duration=datetime.timedelta(
                hours=config.getfloat('HIT Configuration', 'duration')),
            approval_delay=None,
            questions=None,
            qualifications=quals)

    def checkbalance(self):
        return self.mtc.get_account_balance()  # Tests the connection

    def createhit(self):
        myhit = self.mtc.create_hit(**self.paramdict)[0]
        hitid = myhit.HITId
Exemplo n.º 9
0
class TurkConnect:
    def __init__(self, host, n=10):
        mturkparams = dict(
            aws_access_key_id = config.get( 'AWS Access', 'aws_access_key_id' ),
            aws_secret_access_key = config.get( 'AWS Access', 'aws_secret_access_key' ),
            host=host)
        self.mtc = MTurkConnection( **mturkparams )
        
        # Configure portal
        experimentPortalURL = config.get( 'HIT Configuration', 'question_url' )
        frameheight = 600
        mturkQuestion = ExternalQuestion( experimentPortalURL, frameheight )
        
        # Qualification:
        quals = Qualifications();
        approve_requirement = config.getint('HIT Configuration',
                                            'Approve_Requirement')
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))
        if config.getboolean('HIT Configuration', 'US_only'):
            quals.add( LocaleRequirement("EqualTo", "US") )
        
        # Specify all the HIT parameters
        self.paramdict = dict(
            hit_type = None,
            question = mturkQuestion,
            lifetime = datetime.timedelta(hours=config.getfloat('HIT Configuration', 'HIT_lifetime')),
            max_assignments = config.getint('HIT Configuration', 'max_assignments'),
            title = config.get('HIT Configuration', 'title'),
            description = config.get('HIT Configuration', 'description'),
            keywords = config.get('HIT Configuration', 'keywords'),
            reward = config.getfloat('HIT Configuration', 'reward'),
            duration = datetime.timedelta(
                hours=config.getfloat('HIT Configuration', 'duration')),
            approval_delay = None,
            questions = None,
            qualifications = quals
        )
    
    def checkbalance(self):
        return self.mtc.get_account_balance()  # Tests the connection
    
    def createhit(self):
        myhit = self.mtc.create_hit( **self.paramdict )[0]
        hitid = myhit.HITId
Exemplo n.º 10
0
def check_account_balance(num_instances, price):
	"""
	Checks if the requester's mturk account has enough balance to post hits.

	Args:
		num_instances (int) : Number of instances that will be posted on Mturk
		price (float) :  Reward for each correct answer

	Returns:
		1 : If account has enough balance
		-1 : If account has insuffcient balance
	"""

	mtc = MTurkConnection(aws_access_key_id = aws_parameters['access_key'],
                      aws_secret_access_key = aws_parameters['secret_key'],
                      debug = 1, 
                      host = DEV_HOST)

	#print mtc.get_account_balance()

	if mtc.get_account_balance() < num_instances *  price:
		return -1
	else:
		return 1
Exemplo n.º 11
0
def new_rate_hit(PIN_IMAGE_URL, PIN_IMAGE_TITLE, MACYS_IMAGE_URL,
                 MACYS_IMAGE_TITLE):
    mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                          aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                          host=HOST)

    if debug: print mtc.get_account_balance()

    title = 'Match these Pictures to Macy\'s Products'
    description = 'Look at this photo and match it to Macy\'s products'
    keywords = 'clothing, rating, opinions, easy, quick, macys'

    ratings = [('Very Bad', '1'), ('Bad', '2'), ('OK', '3'), ('Good', '4'),
               ('Very Good', '5')]

    #make overview

    overview = Overview()
    overview.append_field('Title', 'Rank how these two images match.')
    overview.append(
        FormattedContent('<table border="1">><tr><td width="50%"><img src="' +
                         PIN_IMAGE_URL + '" alt="Pintrest Image" /></td>'
                         '<td width="50%"><img src="' + MACYS_IMAGE_URL +
                         '" alt="Macys Image" /></td></tr><tr>'
                         '<td width="50%">' + PIN_IMAGE_TITLE +
                         '</td><td width="50%">' + MACYS_IMAGE_TITLE +
                         '</td></tr></table>'))
    #make q1

    qc1 = QuestionContent()
    qc1.append_field('Title', 'Rank the match between these two')

    fta1 = SelectionAnswer(min=1,
                           max=1,
                           style='dropdown',
                           selections=ratings,
                           type='text',
                           other=False)

    q1 = Question(identifier='rating',
                  content=qc1,
                  answer_spec=AnswerSpecification(fta1),
                  is_required=True)

    #make q2

    qc2 = QuestionContent()
    qc2.append_field('Title', 'Comments about the HIT (Optional)')

    fta2 = FreeTextAnswer()

    q2 = Question(identifier="comments",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))

    #make question form

    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q1)
    question_form.append(q2)

    #--------------- CREATE THE HIT -------------------

    mtc.create_hit(questions=question_form,
                   max_assignments=1,
                   title=title,
                   description=description,
                   keywords=keywords,
                   duration=60 * 5,
                   reward=0.05)
Exemplo n.º 12
0
created_hits = []
for q in questions:
    try:
        hit = mtc.create_hit(question=q,
                             max_assignments=hitdata['assignments'],
                             title=hitdata['title'],
                             description=hitdata['description'],
                             keywords=hitdata['keywords'],
                             duration=duration,
                             lifetime=lifetime,
                             approval_delay=approvaldelay,
                             reward=reward,
                             qualifications=quals)
        created_hits.append(hit)
    except MTurkRequestError as e:
        print('{}: {}\n{}'.format(e.status, e.reason, e.body))

hit_list = [{'HITId': y.HITId, 'HITTypeId': y.HITTypeId} for y in [x[0] for x in created_hits]]

outfilename = hitfile_name.split('.')
outfilename.insert(-1, 'success')
outfilename = '.'.join(outfilename)
with open(outfilename, 'w') as successfile:
    safe_dump(hit_list, stream=successfile, default_flow_style=False)

preview_url = 'https://workersandbox.mturk.com/mturk/preview?groupId={}' if args.sandbox else 'https://www.mturk.com/mturk/preview?groupId={}'

for hittypeid in {x['HITTypeId'] for x in hit_list}:
    print('You can preview your new HIT at:\n\t{}'.format(preview_url.format(hittypeid)))
    print('{0} is the final balance'.format(mtc.get_account_balance()))
Exemplo n.º 13
0
class MTurkClient:

    # SETUP
    # ===========
           
    def __init__(self,aws_access_key,aws_secret_key,aws_mode):
        self.mode = aws_mode
        if aws_mode == 'sandbox':
            self.host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            self.host = 'mechanicalturk.amazonaws.com'

        self.c = MTurkConnection(
            aws_access_key,
            aws_secret_key,
            host=self.host)
            
    default_settings = {
        'lifetime': DAY,
        'duration': 10 * MINUTE,
        'approval_delay': DAY,

        'title': "[title]",
        'description': "[description]",
        'keywords': [],

        'reward': 0.01,
        'max_assignments': 1,
        
        'height': 700,
        
        'qualifications': [],
    }
            
    # HITS
    # ===========
    def create_hit(self,url,extra_settings):
        "Eventually, this should take a TEMPLATE and a dictionary of INPUT data that's put into that template. This function would then create an HTML file locally (assuming we're running on a web server) by replacing template {tags} with input values, and then send the URL to the newly created page to MTurk."
       
        settings = self.default_settings.copy()
        settings.update(extra_settings)

        settings['reward'] = Price(settings['reward'])
        settings['qualifications'] = qualification.Qualifications(settings['qualifications'])
        settings['keywords'] = ','.join(settings['keywords'])
        height = settings.pop('height')

        hit = self.c.create_hit(question=ExternalQuestion(url,height),**settings)[0]
        #print 'Created hit %s' % hit.HITId
        return hit.HITId,hit.HITTypeId
        
        #hit_type=None, # Let Amazon do this automatically
        #annotation=None, # Optional annotation for our system to use
        #questions=None, # If you want to create multiple HITs at a time? Probably irrelevant for External
        #response_groups=None, # Unclear what this does 
        
    def get_hit(self,hit_id):
        return self.c.get_hit(hit_id)[0]
        
    def hit_results(self,hit_id,type=None): # type in ['Submitted','Approved','Rejected',None]
        results = {}
    
        assignments = self.c.get_assignments(hit_id, status=None, page_size=100)
        for asst in assignments:
            results.setdefault(asst.AssignmentId,{})
            answers = asst.answers[0]
            for qfa in answers:
                field, response = qfa.qid, qfa.fields[0]
                results[asst.AssignmentId][field] = response
                
            results[asst.AssignmentId]['worker_id'] = asst.WorkerId
                       
            results[asst.AssignmentId]['accept_time'] = datetime.strptime(asst.AcceptTime,"%Y-%m-%dT%H:%M:%SZ")
            results[asst.AssignmentId]['submit_time'] = datetime.strptime(asst.SubmitTime,"%Y-%m-%dT%H:%M:%SZ")
                
        return results
        
    # URL of a HIT on MTurk
    def hit_url_turk(self,hit_id):
        pass
        
    def hit_url_external(self,hit_id):
        pass
        
    def extend_hit(self,hit_id,extras):
        return self.c.extend_hit(hit_id, extras)
        
    @catcherror
    def delete_hit(self,hit_id):
        self.c.disable_hit(hit_id)
        
    # Deletes all the HITS on the server. Risky!
    def cleanup(self):
        for hit in self.c.get_all_hits():
            self.delete_hit(hit.HITId)
            
    # ASSIGNMENTS
    # ===========
    @catcherror
    def approve(self, asst_id, feedback=None):
        return self.c.approve_assignment(asst_id, feedback)
        
    @catcherror
    def reject(self, asst_id, feedback=None):
        return self.c.reject_assignment(asst_id, feedback)

    def block(self,worker_id,feedback=None):
        return self.c.block_worker(worker_id, feedback)
        
    def unblock(self,worker_id,feedback=None):
        return self.c.unblock_worker(worker_id, feedback)
        
    def bonus(self,asst,amount,feedback):
        return self.c.grant_bonus(asst.worker, asst.asst_id, Price(amount), feedback)
        
    # STATUS / DIAGNOSTICS
    # --------------------
    def balance(self):
        return self.c.get_account_balance()[0]
Exemplo n.º 14
0
import os

print os.getcwd()
import sys

import boto
from boto.mturk.connection import MTurkConnection

SAND = 0
ACCESS_ID = 'AKIAIM5D5I7RUTGYNI7A'
SECRET_KEY = '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'
HIT = "2JNL8I9NZW6HG96GKYHWCT87ATCVL9"

# https://mechanicalturk.amazonaws.com/?Service=AWSMechanicalTurkRequester
HOST = 'mechanicalturk.amazonaws.com'

mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=HOST)

print mtc.get_account_balance()
mtc.extend_hit(hit_id=HIT, assignments_increment=1)
Exemplo n.º 15
0
class MTurk(object):
    """
    A class that wraps a boto.mturk.connection object and provides methods for
    the most common AI2 use cases
    """
    def __init__(self,
                 aws_access_key_id,
                 aws_secret_access_key,
                 host=SANDBOX_HOST):
        """
        initializes the instance with AWS credentials and a host
        :param aws_access_key_id the access key id.
        :param aws_secret_access_key the secret access key.
        :param host the mturk host to connect to
        """
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=host)
        self.host = host

    def __del__(self):
        """
        close the connection whenever this object goes out of scope
        """
        self.connection.close()

    def get_account_balance(self):
        """
        :return the balance on the mturk account
        """
        return self.connection.get_account_balance()[0]

    def _create_hit(self, params, **kwargs):
        """
        internal helper function for creating a HIT
        :param params the parameters (required and optional) common to all HITs
        :param **kwargs any other parameters needed for a specific HIT type
        :return the created HIT object
        """
        return self.connection.create_hit(
            title=params["title"],
            description=params["description"],
            keywords=params["keywords"],
            max_assignments=params["max_assignments"],
            reward=Price(amount=params["amount"]),
            qualifications=params["qualifications"],
            lifetime=params["lifetime"],
            # optional params below
            annotation=params.get("annotation"),
            **kwargs)

    def create_url_hit(self, params):
        """
        creates a HIT for an external question with a specified URL
        :param params a dict of the HIT parameters. must contain a "url" parameter
        :return the created HIT object
        """
        question = ExternalQuestion(params["url"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_html_hit(self, params):
        """
        creates a HIT for a question with the specified HTML
        :param params a dict of the HIT parameters, must contain a "html" parameter
        :return the created HIT object
        """
        question = HTMLQuestion(params["html"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_layout_hit(self, params):
        """
        creates a HIT for a question using the supplied layout id
        :param params a dict of the HIT parameters, must contain a "hit_layout"
               parameters with the layout id, and a "layout_params" parameter
               that's the dict of parameters to feed to the layout.
        """
        # create the LayoutParameters object from the supplied params
        layout_params = LayoutParameters([
            LayoutParameter(name, value)
            for name, value in params["layout_params"]
        ])

        return self._create_hit(params,
                                hit_layout=params["hit_layout"],
                                layout_params=layout_params)

    def delete_all_hits(self):
        """
        Permanently disables/ deletes all of the user's active HITs.
        :param mturk_connection: active mturk connection established by user in the notebook.
        :return:
        """
        my_hits = list(self.get_all_hits())
        for hit in my_hits:
            self.connection.disable_hit(hit.HITId)

    def get_assignments_object_list(self, assignment_dict):
        """
        Returns a list of "<boto.mturk.connection.Assignment object at...>" objects
        assignment_dict: a dictionary of HITId-assignment object pairs
        """
        assignments = []
        for entry in assignment_dict:
            for assignment_object in assignment_dict[entry]:
                assignments.append(assignment_object)
        return assignments

    def get_results_dict(self, HIT_assignments):
        """
        Takes a list of HIT assignment objects as input.
        Returns a list of dictionaries of HITs containing:
        HIT_id: the HIT ID
        worker_id: the worker ID of the Turker who completed the HIT
        answers: a dictionary of qid-answer field value pairs
        """
        assignment_results = []
        for assignment in HIT_assignments:
            HIT_dict = {}
            HIT_dict["assignment_object"] = assignment
            HIT_dict["worker_Id"] = assignment.WorkerId
            HIT_dict["HIT_id"] = assignment.HITId
            answers_dict = {}
            for answer in assignment.answers[0]:
                answers_dict[answer.qid] = answer.fields
                HIT_dict["answers"] = answers_dict
            assignment_results.append(HIT_dict)
        return assignment_results

    def get_all_results(self, hits):
        all_results = {}
        for hid, assignments in self.get_assignments(hits).items():
            all_results[hid] = self.get_results_dict(assignments)
        return all_results

    def get_reviewable_hits(self, annotations=None, detailed=False):
        """
        Get all the reviewable HITs. By default returns minimal HIT objects, but
        will return detailed ones (by necessity) if annotations is specified or
        if detailed is True
        :param annotations an optional set of annotations to retrieve HITs for
        :param detailed do you want detailed HIT objects or minimal ones
        :return a list of HIT objects
        """
        minimal_hits = []
        page_num = 1
        while True:
            more_hits = self.connection.get_reviewable_hits(
                page_size=100, page_number=page_num)
            if more_hits:
                minimal_hits.extend(more_hits)
                page_num += 1
            else:
                break

        if detailed or annotations is not None:
            detailed_hits = [
                self.connection.get_hit(hit.HITId,
                                        response_groups=('Minimal',
                                                         'HITDetail'))
                for hit in minimal_hits
            ]
            return [
                hit for hit in detailed_hits
                if annotation_filter(annotations, hit)
            ]
        else:
            return minimal_hits

    def get_all_hits(self, annotations=None):
        """
        Get all the HITs.
        :param annotations a set of annotations to get HITs for, all HITs if
               not specified
        :return a list of HIT objects
        """

        return [
            hit for hit in self.connection.get_all_hits()
            if annotation_filter(annotations, hit)
        ]

    def get_assignments(self, hits=None, hit_ids=None, status=None):
        """
        Retrieves individual assignments associated with the supplied HITs
        :param hits the HITs to get assignments for
        :status HIT status to filter by
        :return dict from HITId to lists of assignments
        """
        if hit_ids is None:
            hit_ids = [hit.HITId for hit in hits]
        return {
            hit_id: self.connection.get_assignments(hit_id, status=status)
            for hit_id in hit_ids
        }

    def disable_hit(self, hit=None, hit_id=None):
        """
        disable the specified hit (or the hit with the specified id). must
        specify either `hit` or `hit_id`
        :param hit a HIT object to disable
        :param hit_id a HITId to disable
        """
        hit_id = hit.HITId if hit is not None else hit_id
        return self.connection.disable_hit(hit_id)

    def approve_assignment(self,
                           assignment=None,
                           assignment_id=None,
                           feedback=None):
        """
        approve the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to approve
        :param assignment_id an AssignmentId to approve
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.approve_assignment(assignment_id, feedback)

    def reject_assignment(self,
                          assignment=None,
                          assignment_id=None,
                          feedback=None):
        """
        reject the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to reject
        :param assignment_id an AssignmentId to reject
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.reject_assignment(assignment_id, feedback)
Exemplo n.º 16
0
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST)
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in [
                    'location', 'approval_rate', 'total_tasks'
            ]:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [
                    val.strip() for val in item.expression['value']
                    if val is not None and val != ''
                ])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(
                    op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(
                    op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(
                project.qualification)
        boomerang_qual, success = self.create_qualification_type(
            owner_id=project.owner_id,
            project_id=project.group_id,
            name='Boomerang Score #{}'.format(project.group_id),
            flag=FLAG_Q_BOOMERANG,
            description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(
                            qualification_type_id=boomerang_blacklist.type_id,
                            comparator=OP_DNE,
                            integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(
                qualification_type_id=boomerang_qual.type_id,
                comparator=OP_GTEQ,
                integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(
                query, {
                    'skipped': TaskWorker.STATUS_SKIPPED,
                    'rejected': TaskWorker.STATUS_REJECTED,
                    'expired': TaskWorker.STATUS_EXPIRED,
                    'project_id': project.id
                })
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(
            origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(
            hours=24)
        lifetime = project.deadline - timezone.now(
        ) if project.deadline is not None else datetime.timedelta(days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(
                project=project,
                boomerang_threshold=int(round(task[4], 2) * 100),
                add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(
                title=project.name,
                description=self.description,
                price=project.price,
                duration=duration,
                keywords=self.keywords,
                approval_delay=datetime.timedelta(days=2),
                qual_req=qualifications,
                qualifications_mask=qualifications_mask,
                boomerang_threshold=int(round(task[4], 2) * 100),
                owner_id=project.owner_id,
                boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(
                        hit_type=hit_type.string_id,
                        max_assignments=task[3],
                        lifetime=lifetime,
                        question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId,
                                         hit_type=hit_type,
                                         task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail":
                            "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot',
                                                         users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(
                        hit_id=mturk_hit.hit_id,
                        hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self,
                        owner_id,
                        title,
                        description,
                        price,
                        duration,
                        boomerang_threshold,
                        keywords=None,
                        approval_delay=None,
                        qual_req=None,
                        qualifications_mask=0,
                        boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(
            owner_id=owner_id,
            name=title,
            description=description,
            price=Decimal(str(price)),
            duration=duration,
            qualifications_mask=qualifications_mask,
            boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(
                title=title,
                description=description,
                reward=reward,
                duration=duration,
                keywords=keywords,
                approval_delay=approval_delay,
                qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id,
                                    name=title,
                                    description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords,
                                    duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY,
                            min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url,
                                    frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist(
                "This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[
            TaskWorker.STATUS_REJECTED, TaskWorker.STATUS_SKIPPED,
            TaskWorker.STATUS_EXPIRED
        ])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(
            hit_type=hit_type_id,
            url=self.host + '/api/mturk/notification',
            event_types=[
                'AssignmentReturned', 'AssignmentAbandoned',
                'AssignmentAccepted', 'AssignmentSubmitted'
            ])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self,
                                  owner_id,
                                  name,
                                  flag,
                                  description,
                                  project_id,
                                  auto_granted=False,
                                  auto_granted_value=None,
                                  deny=False,
                                  bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER,
            'origin_id': owner_id,
            'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({
                'upper_bound': bucket[1] * 100,
                'lower_bound': bucket[0] * 100,
                'is_blacklist': True
            })
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{
            "worker_id": r[0],
            "worker_username": r[1],
            "rating": r[2]
        } for r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id,
                                                          flag=flag,
                                                          name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(
                    owner_id=owner_id,
                    flag=flag,
                    name=name,
                    description=description,
                    auto_granted=auto_granted,
                    auto_granted_value=auto_granted_value,
                    type_id=qualification_type.QualificationTypeId,
                    **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values(
                'worker').filter(qualification=qualification).values_list(
                    'worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id,
                        worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(
                    qualification=qualification,
                    worker=mturk_worker_id,
                    defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(
                hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg,
                                requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related(
            'hit_type__boomerang_qualification').filter(
                task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(
                qualification=qualification, worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual,
                                  score=int(task_avg * 100),
                                  override=True)
            else:
                MTurkWorkerQualification.objects.create(
                    qualification=qualification,
                    worker=worker_id,
                    score=int(task_avg * 100),
                    overwritten=True)
                self.assign_qualification(
                    qualification_type_id=qualification.type_id,
                    worker_id=worker_id,
                    value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(
                worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id, value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id,
                                                 worker_id,
                                                 value,
                                                 send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(
                qualification_type_id=qualification_type_id,
                subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
Exemplo n.º 17
0
#!/bin/python
"""
Very simple example of using boto API to get your account balance.
This code is based on/shamelessly stone from the tutorial at
http://www.toforge.com/2011/04/boto-mturk-tutorial-create-hits/
"""

from boto.mturk.connection import MTurkConnection

#These keys are unique to your account, and can be found on Amazon Web Services under 'My Account' -> 'Security Credentials' 
ACCESS_ID = 'YOUR KEY'
SECRET_KEY = 'YOUR KEY'
HOST = 'mechanicalturk.sandbox.amazonaws.com'

#Set up a connection with MTurk 
conn = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)
 
print 'Your current balance: ',  conn.get_account_balance()
Exemplo n.º 18
0
class MTurkServices(object):
    ''' MTurk services '''
    def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox):
        self.update_credentials(aws_access_key_id, aws_secret_access_key)
        self.set_sandbox(is_sandbox)
        self.valid_login = self.verify_aws_login()

        if not self.valid_login:
            print 'WARNING *****************************'
            print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\
                  'test experiments locally until you enter\nvalid '\
                  'credentials in the AWS Access section of ~/.psiturkconfig\n'

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        ''' Update credentials '''
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def set_sandbox(self, is_sandbox):
        ''' Set sandbox '''
        self.is_sandbox = is_sandbox

    def get_reviewable_hits(self):
        ''' Get reviewable HITs '''
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        reviewable_hits = [hit for hit in hits if hit.HITStatus == "Reviewable" \
                           or hit.HITStatus == "Reviewing"]
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration
        }) for hit in reviewable_hits]
        return hits_data

    def get_all_hits(self):
        ''' Get all HITs '''
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration,
            }) for hit in hits]
        return hits_data

    def get_active_hits(self):
        ''' Get active HITs '''
        if not self.connect_to_turk():
            return False
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        active_hits = [hit for hit in hits if not hit.expired]
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration,
            }) for hit in active_hits]
        return hits_data

    def get_workers(self, assignment_status=None, chosen_hit=None):
        ''' Get workers '''
        if not self.connect_to_turk():
            return False
        try:
            if chosen_hit:
                hit_ids = [chosen_hit]
            else:
                hits = self.mtc.get_all_hits()
                hit_ids = [hit.HITId for hit in hits]
           
            workers_nested = []
            page_size=100
            for hit_id in hit_ids:
                current_page_number=1
                hit_assignments = self.mtc.get_assignments(
                    hit_id,
                    status=assignment_status,
                    sort_by='SubmitTime',
                    page_size=page_size,
                    page_number=current_page_number
                )

                totalNumResults = int(hit_assignments.TotalNumResults)
                total_pages = (totalNumResults // page_size) + (totalNumResults % page_size > 0) #do integer division then round up if necessary

                while current_page_number < total_pages:
                    current_page_number += 1
                    hit_assignments += self.mtc.get_assignments(
                        hit_id,
                        status=assignment_status,
                        sort_by='SubmitTime',
                        page_size=page_size,
                        page_number=current_page_number
                    )

                workers_nested.append(hit_assignments)

            workers = [val for subl in workers_nested for val in subl]  # Flatten nested lists
        except MTurkRequestError:
            return False
        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus
        } for worker in workers]
        return worker_data

    def get_worker(self, assignment_id):
        if not self.connect_to_turk():
            return False
        try:
            worker = self.mtc.get_assignment(assignment_id)[0]
        except MTurkRequestError as e:
            return False
        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus
        }]
        return worker_data

    def bonus_worker(self, assignment_id, amount, reason=""):
        ''' Bonus worker '''
        if not self.connect_to_turk():
            return False
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            worker_id = assignment.WorkerId
            self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as exception:
            print exception
            return False

    def approve_worker(self, assignment_id):
        ''' Approve worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError as e:
            return False

    def reject_worker(self, assignment_id):
        ''' Reject worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return False

    def unreject_worker(self, assignment_id):
        ''' Unreject worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError:
            return False

    def verify_aws_login(self):
        ''' Verify AWS login '''
        if ((self.aws_access_key_id == 'YourAccessKeyId') or
                (self.aws_secret_access_key == 'YourSecretAccessKey')):
            return False
        else:
            host = 'mechanicalturk.amazonaws.com'
            mturkparams = dict(
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                host=host)
            self.mtc = MTurkConnection(**mturkparams)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as exception:
                print exception.error_message
                return False
            else:
                return True

    def connect_to_turk(self):
        ''' Connect to turk '''
        if not self.valid_login:
            print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS '\
                  'credentials invalid.'
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def configure_hit(self, hit_config):
        ''' Configure HIT '''
        # configure question_url based on the id
        experiment_portal_url = hit_config['ad_location']
        frame_height = 600
        mturk_question = ExternalQuestion(experiment_portal_url, frame_height)

        # Qualification:
        quals = Qualifications()
        approve_requirement = hit_config['approve_requirement']
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))
        number_hits_approved = hit_config['number_hits_approved']
        quals.add(
            NumberHitsApprovedRequirement("GreaterThanOrEqualTo",
                                            number_hits_approved))

        require_master_workers = hit_config['require_master_workers']
        if require_master_workers:
            quals.add(MasterRequirement(sandbox=self.is_sandbox))

        if hit_config['us_only']:
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Create a HIT type for this HIT.
        hit_type = self.mtc.register_hit_type(
            hit_config['title'],
            hit_config['description'],
            hit_config['reward'],
            hit_config['duration'],
            keywords=hit_config['keywords'],
            approval_delay=None,
            qual_req=quals)[0]

        # Check the config file to see if notifications are wanted.
        config = PsiturkConfig()
        config.load_config()

        try:
            url = config.get('Server Parameters', 'notification_url')

            all_event_types = [
                "AssignmentAccepted",
                "AssignmentAbandoned",
                "AssignmentReturned",
                "AssignmentSubmitted",
                "HITReviewable",
                "HITExpired",
            ]

            self.mtc.set_rest_notification(
                hit_type.HITTypeId,
                url,
                event_types=all_event_types)

        except:
            pass

        # Specify all the HIT parameters
        self.param_dict = dict(
            hit_type=hit_type.HITTypeId,
            question=mturk_question,
            lifetime=hit_config['lifetime'],
            max_assignments=hit_config['max_assignments'],
            questions=None,
            response_groups=[
                'Minimal',
                'HITDetail',
                'HITQuestion',
                'HITAssignmentSummary'
            ])

    def check_balance(self):
        ''' Check balance '''
        if not self.connect_to_turk():
            return '-'
        return self.mtc.get_account_balance()[0]

    # TODO (if valid AWS credentials haven't been provided then
    # connect_to_turk() will fail, not error checking here and elsewhere)
    def create_hit(self, hit_config):
        ''' Create HIT '''
        try:
            if not self.connect_to_turk():
                return False
            self.configure_hit(hit_config)
            myhit = self.mtc.create_hit(**self.param_dict)[0]
            self.hitid = myhit.HITId
        except MTurkRequestError as e:
            print e
            return False
        else:
            return self.hitid

    # TODO(Jay): Have a wrapper around functions that serializes them.
    # Default output should not be serialized.
    def expire_hit(self, hitid):
        ''' Expire HIT '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.expire_hit(hitid)
            return True
        except MTurkRequestError:
            print "Failed to expire HIT. Please check the ID and try again."
            return False

    def dispose_hit(self, hitid):
        ''' Dispose HIT '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.dispose_hit(hitid)
        except Exception, e:
            print "Failed to dispose of HIT %s. Make sure there are no "\
                "assignments remaining to be reviewed." % hitid
Exemplo n.º 19
0
def createhitsubmit(request):
    '''
    Submitting mkturk to amazon web service.
    @param request: Django http request containing keys and other task arguments.
    '''
    from boto.mturk.connection import MTurkConnection
    from boto.mturk.question import QuestionContent,Question,QuestionForm,Overview,AnswerSpecification,SelectionAnswer,FormattedContent,FreeTextAnswer
    print 'Ok'
    # Get request data from the front-end
    requestJson = json.loads(request.body)    
    
    user_aws_secret_key = requestJson['aws_secret_key']    
    user_aws_access_key_id = requestJson['aws_access_key_id']
    task_selected_docs = requestJson['task_selected_docs'] #id
    task_title = requestJson['task_title']
    #task_dataset = requestJson['task_dataset'] # id   
    task_description = requestJson['task_description']
    task_duration = requestJson['task_duration']
    task_max_assignment = requestJson['task_max_assignment']
    task_reward = requestJson['task_reward']
        
    # adjust host setting, depending on whether HIT is live (production) or in testing mode (sandbox)
    mode = "sandbox"
    #mode ="production"

    if mode=="production":
        HOST='mechanicalturk.amazonaws.com'
    else:
        HOST='mechanicalturk.sandbox.amazonaws.com'

    mtc = MTurkConnection(aws_access_key_id= user_aws_access_key_id,
                      aws_secret_access_key= user_aws_secret_key,
                      host=HOST)
                      
    overview = Overview()
    overview.append_field('Title', task_title)
    overview.append(FormattedContent('<b>' + task_description + '</b><p></p>'))
       
    tableStr = '<ul>'
    for docID in task_selected_docs:
        docText = Doc.objects.get(pk = docID)
        tableStr += '<li>' + docText.text + '</li>'
    tableStr += '</ul>' 
    overview.append(FormattedContent(tableStr))
    
    qc2 = QuestionContent()
    qc2.append_field('Title','What do you find?')
     
    fta2 = FreeTextAnswer()
     
    q2 = Question(identifier="comments",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))
     
    #--------------- BUILD THE QUESTION FORM -------------------
     
    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q2)
    print 'Before create hit'
    #--------------- CREATE THE HIT -------------------
    
    try:
        creathitReturnValue = mtc.create_hit(questions=question_form,
                                                           max_assignments= task_max_assignment,
                                                           title=task_title,
                                                           description=task_description,
                                                           keywords='SomeKeywords',
                                                           duration = task_duration,
                                                           reward= task_reward)
    except Exception as e:
        print e

    print 'after crate hit'
    
    return HttpResponse(json.dumps({'data' : mtc.get_account_balance()}), content_type = "application/json")
Exemplo n.º 20
0
class MyMTurk:

    AWS_ACCESS_KEY_ID = 'skip'
    AWS_SECRET_ACCESS_KEY = 'skip'
    HOST_SANDBOX = 'mechanicalturk.sandbox.amazonaws.com'
    HOST_MTURK = 'mechanicalturk.amazonaws.com'

    EXTERNAL_URL = 'http://redbug0314.blogspot.com/p/imcrowd.html'

    def __init__( self ):
        #connect to MTurk
        self.connect = MTurkConnection( self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, host=self.HOST_SANDBOX )

        #Qualification setting
        q = self.qualifications = Qualifications()

        # if required_to_preview == True unqualified user even can't view the hit.
#        q.add( PercentAssignmentsApprovedRequirement( comparator="GreaterThan", integer_value="95" ) )
        q.add( AdultRequirement( comparator="EqualTo", integer_value="1" ) )

    def register_hit_type( self ):
        try:
            reg_hit_type = self.connect.register_hit_type( title="Nine Picture!",
                                                           description="Choose some best pictures which you think is the best from following pictures.",
                                                           reward=0.01,
                                                           duration=60 * 30,
                                                           keywords="steak, photo",
                                                           approval_delay=datetime.timedelta( days=1 ),
                                                           qual_req=self.qualifications
                                                         )
        except MTurkRequestError as e:
            print "register hit type error:\n status: %s reason: %s\n body: %s" % ( e.status, e.reason, e.body )

        else:
            self.hit_type_id = reg_hit_type
            print "hit type id %s" % reg_hit_type


    def question_form( self ):

        qc = QuestionContent()
#        qc.append_field( 'Title', 'Is she hot?' )
        qc.append( Binary( 'image', 'jpg', 'http://www.miranchomeatmarket.com/images/T-%20bone%20steak.jpg', 'steak' ) )
        q = Question( identifier="This is the first girl!",
                      content=qc,
                      answer_spec=AnswerSpecification( FreeTextAnswer() ),
                      is_required=True,
                      display_name="This is display name" )
        qf = QuestionForm()
        qf.append( q )

        if self.hit_type_id:
            try:
                create_hit_rs = self.connect.create_hit( hit_type=self.hit_type_id,
                                                         question=qf,
                                                         lifetime=datetime.timedelta( days=14 ),
                                                         max_assignments=10,
                                                         annotation="This is a annotation"
                                                        )
            except MTurkRequestError as e:
                print "create hit type error:\n status: %s reason: %s\n body: %s" % ( e.status, e.reason, e.body )
            else:
                print "success!! key: %s" % create_hit_rs


    def question_form_formatted_content( self ):
        qc = QuestionContent()
        formatted_xhtml = """\
<table border="1">
  <tr>
    <td></td>
    <td align="center">1</td>
    <td align="center">2</td>
    <td align="center">3</td>
  </tr>
  <tr>
    <td align="right">A</td>
    <td align="center"><b>X</b></td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>O</b></td>
  </tr>
  <tr>
    <td align="right">B</td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>O</b></td>
    <td align="center">&nbsp;</td>
  </tr>
  <tr>
    <td align="right">C</td>
    <td align="center">&nbsp;</td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>X</b></td>
  </tr>
  <tr>
    <td align="center" colspan="4">It is <b>X</b>'s turn.</td>
  </tr>
</table>
"""
        qc.append( FormattedContent( formatted_xhtml ) )

        q = Question( identifier="Formatted content test!",
                      content=qc,
                      answer_spec=AnswerSpecification( SelectionAnswer( min=1,
                                                                       max=5,
                                                                       style='checkbox',
                                                                       selections=[ ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcSh1HXq3WyOvvG7-AgvNugKC2LzImMUvUDNTuDAPwVKuw8NZzvLN62pGYhX:farm1.static.flickr.com/21/24204504_e143536a2e.jpg', 'steak1' ).get_as_xml(), 'img1' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcTkMoChevUBvQfmfksKDBM5oj4V2ruj6riqv7kC-_6qf9MR0igeBlJLkSI:www.miranchomeatmarket.com/images/T-%2520bone%2520steak.jpg', 'steak2' ).get_as_xml(), 'img2' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcSttsqT7kj9siDKZg1p4fU6W9IFlMZHCFSxFd49ECJR1Bu_1QlHQwmH1DU:img4.myrecipes.com/i/recipes/ck/06/08/grilled-steak-ck-1215910-l.jpg', 'steak3' ).get_as_xml(), 'img3' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcRfdQ-vuNt-W4W7JZRkAmbZpE6LLA0puCQs5erSzrGtsOY8H8t-vgEzqA:www.greendiamondgrille.com/images/new/NewYorkStripSteak.jpg', 'steak4' ).get_as_xml(), 'img4' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcTsJzCp6En1R9yvFQw7bGsSxiiQCqlMrFg7XCbcJ13G39Aa3e6ZilWW34oI:www.bunrab.com/dailyfeed/dailyfeed_images_jan-07/df07_01-08_steak.jpg', 'steak5' ).get_as_xml(), 'img5' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcTkMoChevUBvQfmfksKDBM5oj4V2ruj6riqv7kC-_6qf9MR0igeBlJLkSI:www.miranchomeatmarket.com/images/T-%2520bone%2520steak.jpg', 'steak2' ).get_as_xml(), 'img6' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcSttsqT7kj9siDKZg1p4fU6W9IFlMZHCFSxFd49ECJR1Bu_1QlHQwmH1DU:img4.myrecipes.com/i/recipes/ck/06/08/grilled-steak-ck-1215910-l.jpg', 'steak3' ).get_as_xml(), 'img7' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcRfdQ-vuNt-W4W7JZRkAmbZpE6LLA0puCQs5erSzrGtsOY8H8t-vgEzqA:www.greendiamondgrille.com/images/new/NewYorkStripSteak.jpg', 'steak4' ).get_as_xml(), 'img8' ),
                                                                                    ( Binary( 'image', 'jpg', 'http://images.google.com/images?q=tbn:ANd9GcTsJzCp6En1R9yvFQw7bGsSxiiQCqlMrFg7XCbcJ13G39Aa3e6ZilWW34oI:www.bunrab.com/dailyfeed/dailyfeed_images_jan-07/df07_01-08_steak.jpg', 'steak5' ).get_as_xml(), 'img9' )
                                                                                   ],
                                                                       type='binary'
                                                                       ) ),
                       is_required=True,
                       display_name="This is display name"
                     )

        qf = QuestionForm()
        qf.append( q )

        if self.hit_type_id:
            try:
                create_hit_rs = self.connect.create_hit( hit_type=self.hit_type_id,
                                                         question=qf,
                                                         lifetime=datetime.timedelta( days=14 ),
                                                         max_assignments=1,
                                                         annotation="This is a annotation"
                                                        )
            except MTurkRequestError as e:
                print "create hit type error:\n status: %s reason: %s\n body: %s" % ( e.status, e.reason, e.body )
            else:
                print "success!! key: %s" % create_hit_rs


    def external_question( self ):
        q = ExternalQuestion( external_url="http://www.kernel.org/pub/software/scm/git/docs/everyday.html", frame_height=200 )
#        keywords = ['image', 'filter', 'google']
#        #create hit without id
#        create_hit_rs = self.connect.create_hit( question=q, lifetime=60 * 65, max_assignments=2, title="Google Image Filter", keywords=keywords, reward=0.05, duration=60 * 6, approval_delay=60 * 60, annotation='An annotation from boto external question test', response_groups=['Minimal', 'HITDetail', 'HITQuestion', 'HITAssignmentSummary', ], qualifications=self.qualifications )

        #create hit with id
        if self.hit_type_id:
            try:
                hit = self.connect.create_hit( hit_type=self.hit_type_id,
                                                         question=q,
                                                         lifetime=datetime.timedelta( days=14 ),
                                                         max_assignments=1,
                                                         annotation="This is a annotation"
                                                        )

            except MTurkRequestError as e:
                print "register hit type error:\n status: %s reason: %s\n body: %s" % ( e.status, e.reason, e.body )
            else:
                print "hit id: %s " % hit[0].HITId
                print "hit type id: %s " % hit[0].HITTypeId

    def get_account_balance( self ):
        print self.connect.get_account_balance()

    def getHits( self ):
        print self.connect.get_all_hits()

    def getHit( self, hit_id ):
        hit_rs = self.connect.get_hit( hit_id )
        hit = hit_rs[0]
        for k, v in hit.__dict__.items():
            print "%s: %s" % ( k, v )

    def searchHits( self ):
        print self.connect.search_hits()

    def getAssignments( self, hit_id ):
        print self.connect.get_assignments( hit_id )

    def getReviewableHits( self ):
        print self.connect.get_reviewable_hits()
Exemplo n.º 21
0
def new_sugg_hit(PIN_IMAGE_URL, PIN_IMAGE_TITLE):

    mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                          aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                          host=HOST)

    if debug: print mtc.get_account_balance()

    title = 'Match these Pictures to Macy\'s Products'
    description = 'Look at this photo and match it to Macy\'s products'
    keywords = 'clothing, rating, opinions, easy, quick, macys'

    #make overview

    overview = Overview()
    overview.append_field('Title',
                          'Find three Macys.com Product Web IDs That Match')
    overview.append(
        FormattedContent('<img src="' + PIN_IMAGE_URL +
                         '" alt="Pintrest Image" />'
                         '<br />' + PIN_IMAGE_TITLE))

    #make webid1

    qc1 = QuestionContent()
    qc1.append_field('Title', 'First WebID Code')

    fta1 = FreeTextAnswer(num_lines=1)

    q1 = Question(identifier="FirstWebCode",
                  content=qc1,
                  answer_spec=AnswerSpecification(fta1))

    #make webid2

    qc2 = QuestionContent()
    qc2.append_field('Title', 'Second WebID Code')

    fta2 = FreeTextAnswer(num_lines=1)

    q2 = Question(identifier="SecondWebCode",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))

    #make webid1

    qc3 = QuestionContent()
    qc3.append_field('Title', 'Third WebID Code')

    fta3 = FreeTextAnswer(num_lines=1)

    q3 = Question(identifier="ThirdWebCode",
                  content=qc3,
                  answer_spec=AnswerSpecification(fta3))

    #make question form

    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q1)
    question_form.append(q2)
    question_form.append(q3)

    #--------------- CREATE THE HIT -------------------

    mtc.create_hit(questions=question_form,
                   max_assignments=1,
                   title=title,
                   description=description,
                   keywords=keywords,
                   duration=60 * 5,
                   reward=0.05)
Exemplo n.º 22
0
#!/bin/python
"""
Very simple example of using boto API to get your account balance.
This code is based on/shamelessly stone from the tutorial at
http://www.toforge.com/2011/04/boto-mturk-tutorial-create-hits/
"""

from boto.mturk.connection import MTurkConnection

#These keys are unique to your account, and can be found on Amazon Web Services under 'My Account' -> 'Security Credentials'
ACCESS_ID = 'YOUR KEY'
SECRET_KEY = 'YOUR KEY'
HOST = 'mechanicalturk.sandbox.amazonaws.com'

#Set up a connection with MTurk
conn = MTurkConnection(aws_access_key_id=ACCESS_ID,
                       aws_secret_access_key=SECRET_KEY,
                       host=HOST)

print 'Your current balance: ', conn.get_account_balance()
Exemplo n.º 23
0
class MyMTurk:

    AWS_ACCESS_KEY_ID = 'skip'
    AWS_SECRET_ACCESS_KEY = 'skip'
    HOST_SANDBOX = 'mechanicalturk.sandbox.amazonaws.com'
    HOST_MTURK = 'mechanicalturk.amazonaws.com'

    EXTERNAL_URL = 'http://redbug0314.blogspot.com/p/imcrowd.html'

    def __init__(self):
        #connect to MTurk
        self.connect = MTurkConnection(self.AWS_ACCESS_KEY_ID,
                                       self.AWS_SECRET_ACCESS_KEY,
                                       host=self.HOST_SANDBOX)

        #Qualification setting
        q = self.qualifications = Qualifications()

        # if required_to_preview == True unqualified user even can't view the hit.
        #        q.add( PercentAssignmentsApprovedRequirement( comparator="GreaterThan", integer_value="95" ) )
        q.add(AdultRequirement(comparator="EqualTo", integer_value="1"))

    def register_hit_type(self):
        try:
            reg_hit_type = self.connect.register_hit_type(
                title="Nine Picture!",
                description=
                "Choose some best pictures which you think is the best from following pictures.",
                reward=0.01,
                duration=60 * 30,
                keywords="steak, photo",
                approval_delay=datetime.timedelta(days=1),
                qual_req=self.qualifications)
        except MTurkRequestError as e:
            print "register hit type error:\n status: %s reason: %s\n body: %s" % (
                e.status, e.reason, e.body)

        else:
            self.hit_type_id = reg_hit_type
            print "hit type id %s" % reg_hit_type

    def question_form(self):

        qc = QuestionContent()
        #        qc.append_field( 'Title', 'Is she hot?' )
        qc.append(
            Binary(
                'image', 'jpg',
                'http://www.miranchomeatmarket.com/images/T-%20bone%20steak.jpg',
                'steak'))
        q = Question(identifier="This is the first girl!",
                     content=qc,
                     answer_spec=AnswerSpecification(FreeTextAnswer()),
                     is_required=True,
                     display_name="This is display name")
        qf = QuestionForm()
        qf.append(q)

        if self.hit_type_id:
            try:
                create_hit_rs = self.connect.create_hit(
                    hit_type=self.hit_type_id,
                    question=qf,
                    lifetime=datetime.timedelta(days=14),
                    max_assignments=10,
                    annotation="This is a annotation")
            except MTurkRequestError as e:
                print "create hit type error:\n status: %s reason: %s\n body: %s" % (
                    e.status, e.reason, e.body)
            else:
                print "success!! key: %s" % create_hit_rs

    def question_form_formatted_content(self):
        qc = QuestionContent()
        formatted_xhtml = """\
<table border="1">
  <tr>
    <td></td>
    <td align="center">1</td>
    <td align="center">2</td>
    <td align="center">3</td>
  </tr>
  <tr>
    <td align="right">A</td>
    <td align="center"><b>X</b></td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>O</b></td>
  </tr>
  <tr>
    <td align="right">B</td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>O</b></td>
    <td align="center">&nbsp;</td>
  </tr>
  <tr>
    <td align="right">C</td>
    <td align="center">&nbsp;</td>
    <td align="center">&nbsp;</td>
    <td align="center"><b>X</b></td>
  </tr>
  <tr>
    <td align="center" colspan="4">It is <b>X</b>'s turn.</td>
  </tr>
</table>
"""
        qc.append(FormattedContent(formatted_xhtml))

        q = Question(
            identifier="Formatted content test!",
            content=qc,
            answer_spec=AnswerSpecification(
                SelectionAnswer(
                    min=1,
                    max=5,
                    style='checkbox',
                    selections=[
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcSh1HXq3WyOvvG7-AgvNugKC2LzImMUvUDNTuDAPwVKuw8NZzvLN62pGYhX:farm1.static.flickr.com/21/24204504_e143536a2e.jpg',
                            'steak1').get_as_xml(), 'img1'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcTkMoChevUBvQfmfksKDBM5oj4V2ruj6riqv7kC-_6qf9MR0igeBlJLkSI:www.miranchomeatmarket.com/images/T-%2520bone%2520steak.jpg',
                            'steak2').get_as_xml(), 'img2'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcSttsqT7kj9siDKZg1p4fU6W9IFlMZHCFSxFd49ECJR1Bu_1QlHQwmH1DU:img4.myrecipes.com/i/recipes/ck/06/08/grilled-steak-ck-1215910-l.jpg',
                            'steak3').get_as_xml(), 'img3'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcRfdQ-vuNt-W4W7JZRkAmbZpE6LLA0puCQs5erSzrGtsOY8H8t-vgEzqA:www.greendiamondgrille.com/images/new/NewYorkStripSteak.jpg',
                            'steak4').get_as_xml(), 'img4'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcTsJzCp6En1R9yvFQw7bGsSxiiQCqlMrFg7XCbcJ13G39Aa3e6ZilWW34oI:www.bunrab.com/dailyfeed/dailyfeed_images_jan-07/df07_01-08_steak.jpg',
                            'steak5').get_as_xml(), 'img5'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcTkMoChevUBvQfmfksKDBM5oj4V2ruj6riqv7kC-_6qf9MR0igeBlJLkSI:www.miranchomeatmarket.com/images/T-%2520bone%2520steak.jpg',
                            'steak2').get_as_xml(), 'img6'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcSttsqT7kj9siDKZg1p4fU6W9IFlMZHCFSxFd49ECJR1Bu_1QlHQwmH1DU:img4.myrecipes.com/i/recipes/ck/06/08/grilled-steak-ck-1215910-l.jpg',
                            'steak3').get_as_xml(), 'img7'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcRfdQ-vuNt-W4W7JZRkAmbZpE6LLA0puCQs5erSzrGtsOY8H8t-vgEzqA:www.greendiamondgrille.com/images/new/NewYorkStripSteak.jpg',
                            'steak4').get_as_xml(), 'img8'),
                        (Binary(
                            'image', 'jpg',
                            'http://images.google.com/images?q=tbn:ANd9GcTsJzCp6En1R9yvFQw7bGsSxiiQCqlMrFg7XCbcJ13G39Aa3e6ZilWW34oI:www.bunrab.com/dailyfeed/dailyfeed_images_jan-07/df07_01-08_steak.jpg',
                            'steak5').get_as_xml(), 'img9')
                    ],
                    type='binary')),
            is_required=True,
            display_name="This is display name")

        qf = QuestionForm()
        qf.append(q)

        if self.hit_type_id:
            try:
                create_hit_rs = self.connect.create_hit(
                    hit_type=self.hit_type_id,
                    question=qf,
                    lifetime=datetime.timedelta(days=14),
                    max_assignments=1,
                    annotation="This is a annotation")
            except MTurkRequestError as e:
                print "create hit type error:\n status: %s reason: %s\n body: %s" % (
                    e.status, e.reason, e.body)
            else:
                print "success!! key: %s" % create_hit_rs

    def external_question(self):
        q = ExternalQuestion(
            external_url=
            "http://www.kernel.org/pub/software/scm/git/docs/everyday.html",
            frame_height=200)
        #        keywords = ['image', 'filter', 'google']
        #        #create hit without id
        #        create_hit_rs = self.connect.create_hit( question=q, lifetime=60 * 65, max_assignments=2, title="Google Image Filter", keywords=keywords, reward=0.05, duration=60 * 6, approval_delay=60 * 60, annotation='An annotation from boto external question test', response_groups=['Minimal', 'HITDetail', 'HITQuestion', 'HITAssignmentSummary', ], qualifications=self.qualifications )

        #create hit with id
        if self.hit_type_id:
            try:
                hit = self.connect.create_hit(
                    hit_type=self.hit_type_id,
                    question=q,
                    lifetime=datetime.timedelta(days=14),
                    max_assignments=1,
                    annotation="This is a annotation")

            except MTurkRequestError as e:
                print "register hit type error:\n status: %s reason: %s\n body: %s" % (
                    e.status, e.reason, e.body)
            else:
                print "hit id: %s " % hit[0].HITId
                print "hit type id: %s " % hit[0].HITTypeId

    def get_account_balance(self):
        print self.connect.get_account_balance()

    def getHits(self):
        print self.connect.get_all_hits()

    def getHit(self, hit_id):
        hit_rs = self.connect.get_hit(hit_id)
        hit = hit_rs[0]
        for k, v in hit.__dict__.items():
            print "%s: %s" % (k, v)

    def searchHits(self):
        print self.connect.search_hits()

    def getAssignments(self, hit_id):
        print self.connect.get_assignments(hit_id)

    def getReviewableHits(self):
        print self.connect.get_reviewable_hits()
Exemplo n.º 24
0
class HaCRSTurker:
    def __init__(self):
        self.config = HaCRSUtil.get_config('../config.ini')
        HOST = self.config.get('mturk', 'host')

        AWS_ACCESS_KEY_ID = self.config.get('mturk', 'access_key_id')
        AWS_SECRET_ACCESS_KEY = self.config.get('mturk', 'secret_access_key')

        self.MTconnection = MTurkConnection(
            aws_access_key_id=AWS_ACCESS_KEY_ID,
            aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
            host=HOST)

        self.db = HaCRSDB()

    def get_balance(self):
        print self.MTconnection.get_account_balance()

    def expire_all_hits(self):
        all_hits = self.MTconnection.get_all_hits()
        for hit in all_hits:
            if hit.expired:
                continue
            try:
                self.MTconnection.expire_hit(hit.HITId)
                print 'Expired HIT'
            except Exception as e:
                print 'Could not expire: {}'.format(e)

    def delete_all_mturk_hits(self):
        all_hits = self.MTconnection.get_all_hits()
        for hit in all_hits:
            print 'expire/dispose'
            self.MTconnection.expire_hit(hit.HITId)
            self.MTconnection.dispose_hit(hit.HITId)

    def get_all_mturk_hits(self):
        all_hits = self.MTconnection.get_all_hits()
        return all_hits

    # TODO: HITs available via API, but not via Amazon Web Sandbox
    def push_tasklet_mturk(self, keywords):

        sdescription = self.config.get('mturk', 'shortdescr')
        frame_height = self.config.get('mturk', 'frameheight')
        #url = "https://cgcturk.hacked.jp/tasklet/{}/".format(tasklet['id'])
        url = "https://cgcturk.hacked.jp/pick_tasklet/{}/".format(keywords)
        #keywords = tasklet['keywords']
        #amount = tasklet['amount']
        if keywords == 'easy':
            amount = 1.00
        elif keywords in ['medium', 'hard', 'very_hard']:
            amount = 2.00
        elif keywords == 'priority':
            amount = 4.00
        else:
            print 'Error'
            sys.exit(1)

        questionform = ExternalQuestion(url, frame_height)

        title = 'HELP AN AI!!! We are students building an artificial intelligence to find bugs in programs to keep the internet safe'
        sdescription = 'We are students building an artificial intelligence system that finds bugs in programs and keeps the internet safe from malware. BUT IT NEEDS YOUR HELP! Play with programs to find functions that it missed, and get $$$!'

        hit_result = self.MTconnection.create_hit(
            title='[{}] {}'.format(keywords, title),
            description=sdescription,
            keywords=keywords,
            max_assignments=1,
            question=questionform,
            reward=Price(amount=amount),
            response_groups=('Minimal', 'HITDetail'),  # ?
        )
        assert len(hit_result) == 1
        mturkid = self.db.create_mturk_resource(hit_result[0].HITId,
                                                hit_result[0].HITGroupId)
        #self.db.add_mturk_tasklet_association(tasklet['id'], mturkid)
        #self.db.commit()
        return mturkid, hit_result

    def push_tasks_mturk(self):
        frame_height = self.config.get('mturk', 'frameheight')
        amount = 0.01
        tasklets = self.db.get_unassigned_tasklets()
        sdescription = self.config.get('mturk', 'shortdescr')

        for tasklet in tasklets:
            print 'pushing!'

            url = "https://cgcturk.hacked.jp/tasklet/{}/".format(tasklet['id'])
            keywords = ["easy"]
            questionform = ExternalQuestion(url, frame_height)

            hit_result = self.MTconnection.create_hit(
                title=HaCRSUtil.get_tasklet_name(tasklet),
                description=sdescription,
                keywords=keywords,
                max_assignments=1,
                question=questionform,
                reward=Price(amount=amount),
                response_groups=('Minimal', 'HITDetail'),  # ?
            )
            assert len(hit_result) == 1
            mturkid = self.db.create_mturk_resource(hit_result[0].HITId,
                                                    hit_result[0].HITGroupId)
            self.db.add_mturk_tasklet_association(tasklet['id'], mturkid)
        self.db.commit()

    def show_seed_tasklets(self):
        pprint(self.db.get_seed_tasklets())

    def get_hit(self, hitid):
        try:
            hit = self.MTconnection.get_hit(hitid)
        except Exception as e:
            return None
        if hit != None:
            return hit[0]

    def get_assignment_from_hit(self, hitid):
        try:
            assignments = self.MTconnection.get_assignments(hitid)
            return assignments[0]
        except Exception as e:
            return None

    def get_approved_seeding_tasklets(self):

        for program in json.load(
                open(self.config.get('general', 'programsjson'))):
            pid = self.db.lookup_program(program)
        program = None

        approved = set()
        for tasklet in self.db.get_latest_seed_tasklets():
            turkinfos = self.db.get_mturk_infos(tasklet['id'])
            try:
                #hit = self.MTconnection.get_hit(turkinfos['hitid'])
                assignments = self.MTconnection.get_assignments(
                    turkinfos['hitid'])
                if len(assignments) == 0:
                    continue
                if assignments[0].AssignmentStatus == 'Approved':
                    approved.add(self.db.get_tasklet_program(tasklet['id']))
            except Exception as e:
                #print e
                pass
        return list(approved)
Exemplo n.º 25
0
class Mturk():
    def __init__(self):
        self.config = self.set_config()
        self.mturk = MTurkConnection(
            aws_access_key_id=self.config['aws_access_key_id'],
            aws_secret_access_key=self.config['aws_secret_access_key'],
            host=self.config['host'])
        self.mturk_tmpl = MturkTmpl()

    def set_config(self, config_path="config.yml"):
        with open(config_path, 'r') as file:
            config = yaml.load(file)
        return config

    def account_balance(self):
        account_balance = self.mturk.get_account_balance()
        print("Testing connection: You have a balance of: {}".format(
            account_balance))

    def get_hits(self):
        return self.mturk.get_all_hits()

    def get_all_assignments(self, hit_id):
        page_size = 100
        assignments = self.mturk.get_assignments(hit_id, page_size=page_size)
        total_records = int(assignments.TotalNumResults)
        get_page_assignments = lambda page: self.mturk.get_assignments(
            hit_id, page_size=page_size, page_number=page)
        page_nums = self.mturk._get_pages(page_size=page_size,
                                          total_records=total_records)
        assignments_sets = itertools.imap(get_page_assignments, page_nums)
        return itertools.chain.from_iterable(assignments_sets)

    def remove_old_hits(self):
        # Disable old hits.
        for hit in self.get_hits():
            print("Hit {} has been removed.".format(hit.HITId))
            self.mturk.disable_hit(hit.HITId)

    def cal_reward(self, data):
        read_instruction = 3.0
        word_count = len(data['ents']) * 1 / 30.0
        return round((read_instruction + word_count) / 60.0 * 6.0, 2)

    def create_hit(self, data):
        # These parameters define the HIT that will be created
        # question is what we defined above
        # max_assignments is the # of unique Workers you're requesting
        # title, description, and keywords help Workers find your HIT
        # duration is the # of seconds Workers have to complete your HIT
        # reward is what Workers will be paid when you approve their work
        # Check out the documentation on CreateHIT for more details
        response = self.mturk.create_hit(
            question=self.mturk_tmpl.html_question(data),
            max_assignments=1,
            title=self.config['title'],
            description=self.config['description'],
            keywords=self.config['keywords'],
            duration=120,
            reward=self.cal_reward(data))
        return response
"""

import csv
import argparse

from boto.mturk.connection import MTurkConnection
from boto.mturk.price import Price
from boto.exception import AWSConnectionError, EC2ResponseError

parser = argparse.ArgumentParser(description='Grant bonuses for HITs on Amazon Mechanical Turk')
parser.add_argument('-experiment', required=True, help='(required) The name of the experiment you are granting bonuses for')
args = parser.parse_args()

try:
    conn = MTurkConnection(is_secure=True)
    mtbal = conn.get_account_balance()
    if len(mtbal) == 1:
        print "Available balance: ", mtbal[0]
    elif len(mtbal) == 2:
        print "Available balance: ", mtbal[0]
        print "OnHold balance: ", mtbal[1]
    avbal = mtbal[0]

    #if (bonuscost > avbal.amount):
    #    print "Insufficient funds to pay bonuses! Add $%.2f to your account before proceeding" % (bonuscost - avbal.amount)
    #else:
    with open('bonus.' + args.expt_name + '.csv', 'r') as csvinfile:
        fields = ('worker', 'trials', 'bonus', 'assignment')
        bonuses = csv.DictReader(csvinfile, fieldnames=fields)
        bonuses.next() # pop off the header row
        for row in bonuses:
Exemplo n.º 27
0
import boto
from boto.mturk.connection import MTurkConnection

# Create the connection to MTurk
mtc = MTurkConnection(aws_access_key_id='MYACCESSKEYID',
aws_secret_access_key='MYSECRETACCESSKEY',
host='mechanicalturk.amazonaws.com')

# Print account balance to show connection is working
account_balance = mtc.get_account_balance()[0]
print "You have a balance of: {}".format(account_balance)

# Create the qualification
response = mtc.create_qualification_type(name="NAME", description="DESCRIPTION", status="Active|Inactive", auto_granted=True|False, auto_granted_value=###)
Exemplo n.º 28
0
class MTurkServices:
    def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox):
        self.update_credentials(aws_access_key_id, aws_secret_access_key)
        self.set_sandbox(is_sandbox)
        self.validLogin = self.verify_aws_login()
        if not self.validLogin:
            print 'WARNING *****************************'
            print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\
                  + 'test experiments locally until you enter\nvalid '\
                  + 'credentials in the AWS Access section of ~/.psiturkconfig\n'

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def set_sandbox(self, is_sandbox):
        self.is_sandbox = is_sandbox

    def get_reviewable_hits(self):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        reviewable_hits = [
            hit for hit in hits
            if (hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing")
        ]
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in reviewable_hits
        ]
        return (hits_data)

    def get_all_hits(self):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in hits
        ]
        return (hits_data)

    def get_active_hits(self):
        if not self.connect_to_turk():
            return False
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        active_hits = [hit for hit in hits if not (hit.expired)]
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in active_hits
        ]
        return (hits_data)

    def get_workers(self, assignmentStatus=None):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.search_hits(sort_direction='Descending',
                                        page_size=20)
            hit_ids = [hit.HITId for hit in hits]
            workers_nested = [
                self.mtc.get_assignments(hit_id,
                                         status=assignmentStatus,
                                         sort_by='SubmitTime',
                                         page_size=100) for hit_id in hit_ids
            ]

            workers = [val for subl in workers_nested
                       for val in subl]  # Flatten nested lists
        except MTurkRequestError:
            return (False)
        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus
        } for worker in workers]
        return (worker_data)

    def bonus_worker(self, assignment_id, amount, reason=""):
        if not self.connect_to_turk():
            return False
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            workerId = assignment.WorkerId
            self.mtc.grant_bonus(workerId, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as e:
            print e
            return False

    def approve_worker(self, assignment_id):
        if not self.connect_to_turk():
            return (False)
        try:
            self.mtc.approve_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return (False)

    def reject_worker(self, assignment_id):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return (False)

    def unreject_worker(self, assignment_id):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError:
            return False

    def verify_aws_login(self):
        if (self.aws_access_key_id
                == 'YourAccessKeyId') or (self.aws_secret_access_key
                                          == 'YourSecretAccessKey'):
            return False
        else:
            host = 'mechanicalturk.amazonaws.com'
            mturkparams = dict(
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                host=host)
            self.mtc = MTurkConnection(**mturkparams)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as e:
                print(e.error_message)
                return False
            else:
                return True

    def connect_to_turk(self):
        if not self.validLogin:
            print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS credentials invalid.'
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(aws_access_key_id=self.aws_access_key_id,
                           aws_secret_access_key=self.aws_secret_access_key,
                           host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def configure_hit(self, hit_config):

        # configure question_url based on the id
        experimentPortalURL = hit_config['ad_location']
        frameheight = 600
        mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight)

        # Qualification:
        quals = Qualifications()
        approve_requirement = hit_config['approve_requirement']
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))

        if hit_config['us_only']:
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Specify all the HIT parameters
        self.paramdict = dict(hit_type=None,
                              question=mturkQuestion,
                              lifetime=hit_config['lifetime'],
                              max_assignments=hit_config['max_assignments'],
                              title=hit_config['title'],
                              description=hit_config['description'],
                              keywords=hit_config['keywords'],
                              reward=hit_config['reward'],
                              duration=hit_config['duration'],
                              approval_delay=None,
                              questions=None,
                              qualifications=quals)

    def check_balance(self):
        if not self.connect_to_turk():
            return ('-')
        return (self.mtc.get_account_balance()[0])

    # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will
    # fail, not error checking here and elsewhere)
    def create_hit(self, hit_config):
        try:
            if not self.connect_to_turk():
                return False
            self.configure_hit(hit_config)
            myhit = self.mtc.create_hit(**self.paramdict)[0]
            self.hitid = myhit.HITId
        except:
            return False
        else:
            return self.hitid

    # TODO(Jay): Have a wrapper around functions that serializes them.
    # Default output should not be serialized.
    def expire_hit(self, hitid):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.expire_hit(hitid)
            return True
        except MTurkRequestError:
            print "Failed to expire HIT. Please check the ID and try again."
            return False

    def dispose_hit(self, hitid):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.dispose_hit(hitid)
        except Exception, e:
            print 'Failed to dispose of HIT %s. Make sure there are no assignments remaining to be reviewed' % hitid
Exemplo n.º 29
0
from boto.mturk.connection import MTurkConnection

print "Content-type: text/html"
print


form = cgi.FieldStorage()
HIT = form.getfirst("ID", "empty")
SAND = form.getfirst("sandbox", "empty")

ACCESS_ID = "AKIAIM5D5I7RUTGYNI7A"
SECRET_KEY = "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"

# HIT="2JNL8I9NZW6HG96GKYHWCT87ATCVL9"
# HOST = 'mechanicalturk.amazonaws.com'


# https://mechanicalturk.amazonaws.com/?Service=AWSMechanicalTurkRequester
if SAND == 1:
    print "sandbox"
    HOST = "mechanicalturk.sandbox.amazonaws.com"
else:
    print "not Sandbox"
    HOST = "mechanicalturk.amazonaws.com"


mtc = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)

print mtc.get_account_balance()
mtc.extend_hit(hit_id=HIT, assignments_increment=1)
Exemplo n.º 30
0
def get_account_balance():
    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                            aws_secret_access_key=SECRET_KEY,
                            host=HOST)
    return mtc.get_account_balance()
Exemplo n.º 31
0
class MTurkServices:
    def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox):
        self.update_credentials(aws_access_key_id, aws_secret_access_key)
        self.set_sandbox(is_sandbox)
        self.validLogin = self.verify_aws_login()
        if not self.validLogin:
            print 'WARNING *****************************'
            print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\
                  + 'test experiments locally until you enter\nvalid '\
                  + 'credentials in the AWS Access section of ~/.psiturkconfig\n'

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def set_sandbox(self, is_sandbox):
        self.is_sandbox = is_sandbox

    def get_reviewable_hits(self):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        reviewable_hits = [hit for hit in hits if (hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing")]
        hits_data = [MTurkHIT({'hitid': hit.HITId,
                      'title': hit.Title,
                      'status': hit.HITStatus,
                      'max_assignments': hit.MaxAssignments,
                      'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
                      'number_assignments_pending': hit.NumberOfAssignmentsPending,
                      'number_assignments_available': hit.NumberOfAssignmentsAvailable,
                      'creation_time': hit.CreationTime,
                      'expiration': hit.Expiration,
                      }) for hit in reviewable_hits]
        return(hits_data)

    def get_all_hits(self):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        hits_data = [MTurkHIT({'hitid': hit.HITId,
                      'title': hit.Title,
                      'status': hit.HITStatus,
                      'max_assignments': hit.MaxAssignments,
                      'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
                      'number_assignments_pending': hit.NumberOfAssignmentsPending,
                      'number_assignments_available': hit.NumberOfAssignmentsAvailable,
                      'creation_time': hit.CreationTime,
                      'expiration': hit.Expiration,
                      }) for hit in hits]
        return(hits_data)

    def get_active_hits(self):
        if not self.connect_to_turk():
            return False
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        active_hits = [hit for hit in hits if not(hit.expired)]
        hits_data = [MTurkHIT({'hitid': hit.HITId,
                      'title': hit.Title,
                      'status': hit.HITStatus,
                      'max_assignments': hit.MaxAssignments,
                      'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
                      'number_assignments_pending': hit.NumberOfAssignmentsPending,
                      'number_assignments_available': hit.NumberOfAssignmentsAvailable,
                      'creation_time': hit.CreationTime,
                      'expiration': hit.Expiration,
                      }) for hit in active_hits]
        return(hits_data)

    def get_workers(self, assignmentStatus = None):
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.search_hits(sort_direction='Descending', page_size=20)
            hit_ids = [hit.HITId for hit in hits]
            workers_nested = [self.mtc.get_assignments(
                                hit_id,
                                status=assignmentStatus,
                                sort_by='SubmitTime',
                                page_size=100
                              ) for hit_id in hit_ids]

            workers = [val for subl in workers_nested for val in subl]  # Flatten nested lists
        except MTurkRequestError:
            return(False)
        worker_data = [{'hitId': worker.HITId,
                        'assignmentId': worker.AssignmentId,
                        'workerId': worker.WorkerId,
                        'submit_time': worker.SubmitTime,
                        'accept_time': worker.AcceptTime,
                        'status': worker.AssignmentStatus
                       } for worker in workers]
        return(worker_data)


    def bonus_worker(self, assignment_id, amount, reason=""):
        if not self.connect_to_turk():
            return False
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            workerId = assignment.WorkerId
            self.mtc.grant_bonus(workerId, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as e:
            print e
            return False


    def approve_worker(self, assignment_id):
        if not self.connect_to_turk():
            return(False)
        try:
            self.mtc.approve_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return(False)

    def reject_worker(self, assignment_id):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return(False)

    def unreject_worker(self, assignment_id):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError:
            return False

    def verify_aws_login(self):
        if (self.aws_access_key_id == 'YourAccessKeyId') or (self.aws_secret_access_key == 'YourSecretAccessKey'):
            return False
        else:
            host = 'mechanicalturk.amazonaws.com'
            mturkparams = dict(
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                host=host)
            self.mtc = MTurkConnection(**mturkparams)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as e:
                print(e.error_message)
                return False
            else:
                return True


    def connect_to_turk(self):
        if not self.validLogin:
            print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS credentials invalid.'
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(
            aws_access_key_id = self.aws_access_key_id,
            aws_secret_access_key = self.aws_secret_access_key,
            host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def configure_hit(self, hit_config):

        # configure question_url based on the id
        experimentPortalURL = hit_config['ad_location']
        frameheight = 600
        mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight)

        # Qualification:
        quals = Qualifications()
        approve_requirement = hit_config['approve_requirement']
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))

        if hit_config['us_only']:
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Specify all the HIT parameters
        self.paramdict = dict(
            hit_type = None,
            question = mturkQuestion,
            lifetime = hit_config['lifetime'],
            max_assignments = hit_config['max_assignments'],
            title = hit_config['title'],
            description = hit_config['description'],
            keywords = hit_config['keywords'],
            reward = hit_config['reward'],
            duration = hit_config['duration'],
            approval_delay = None,
            questions = None,
            qualifications = quals
        )

    def check_balance(self):
        if not self.connect_to_turk():
            return('-')
        return(self.mtc.get_account_balance()[0])

    # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will
    # fail, not error checking here and elsewhere)
    def create_hit(self, hit_config):
        try:
            if not self.connect_to_turk():
                return False
            self.configure_hit(hit_config)
            myhit = self.mtc.create_hit(**self.paramdict)[0]
            self.hitid = myhit.HITId
        except:
            return False
        else:
            return self.hitid

    # TODO(Jay): Have a wrapper around functions that serializes them.
    # Default output should not be serialized.
    def expire_hit(self, hitid):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.expire_hit(hitid)
            return True
        except MTurkRequestError:
            print "Failed to expire HIT. Please check the ID and try again."
            return False

    def dispose_hit(self, hitid):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.dispose_hit(hitid)
        except Exception, e:
            print 'Failed to dispose of HIT %s. Make sure there are no assignments remaining to be reviewed' % hitid
Exemplo n.º 32
0

# https://mechanicalturk.amazonaws.com/?Service=AWSMechanicalTurkRequester
if SAND==1:
    print "sandbox"
    HOST = 'mechanicalturk.sandbox.amazonaws.com'
else:
    print "not Sandbox"
    HOST = 'mechanicalturk.amazonaws.com'


conn = MTurkConnection(aws_access_key_id=ACCESS_ID,
                     aws_secret_access_key=SECRET_KEY,
                    host=HOST)

print conn.get_account_balance()


def accept_and_pay(worker_id, assign_id, bonus_price, reason):
  """pays for assignment; returns False if something went wrong, else True"""
  try:
      #result = conn.approve_assignment(assign_id)
    #TODO: make sure to avoid the possibility of paying the same bonus twice
    if bonus_price > 0:
      conn.grant_bonus(worker_id, assign_id, Price(amount=bonus_price), reason)
  except MTurkRequestError:
    #TODO: less embarrasing error handling
    print "looks like this one was already paid for. or, any other error"
    return False# no bonus if already paid for
  return True
Exemplo n.º 33
0
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger(
            "transcription_engine.transcription_pipeline_handler")

    def audio_clip_referenced_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts_by_state(
                "audio_clips", "Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts("audio_clips",
                                                {"state": "Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    def audio_clip_queue_to_hit(self, cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe"
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(
                    clip_pairs, reward_per_clip, max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(
                        clip_pairs, hit_title, question_title, description,
                        keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(
                            response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [
                            w["audio_clip_id"] for w in clip_queue
                        ]
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(
                            hit_id, hit_type_id, clip_queue, "New")
                        self.logger.info("Successfully created HIT: %s" %
                                         hit_id)
                        return self.mh.update_audio_clips_state(
                            audio_clip_ids, "Hit")
                else:
                    pass
        return False

    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)
                if self.mh.get_artifact("assignments",
                                        {"_id": assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue
                else:
                    have_all_assignments = False
                transcription_ids = []
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(
                    assignment)
                if transcription_dicts and len(transcription_dicts) == 10:
                    pass
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"]):
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break
                    self.mh.update_transcription_state(transcription,
                                                       "Submitted")
                    self.mh.update_audio_clips_state(
                        [transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(
                        self.mh.get_artifact(
                            "transcriptions", {
                                "audio_clip_id":
                                transcription["audio_clip_id"],
                                "assignment_id": transcription["assignment_id"]
                            }, "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                       transcription_ids,
                                                       "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id, "Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s " %
                  (hit_id, assignment_ids))

    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments",
                                                     "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions", "_id",
                                                   transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)

            approved, average_wer = self.filter.approve_assignment(
                transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(
                        assignment_id,
                        accepted_feedback % (assignment_id, average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment, "Approved")
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"],
                            "reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(
                                transcription, "Approved")
                    print("Approved transcription ids: %s" % transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback % (assignment_id,
                                                self.filter.WER_THRESHOLD,
                                                self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id, feedback)
                self.mh.update_assignment_state(assignment, "Denied")
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,
                                                 (assignment_id, average_wer))

    def _load_rm_audio_source_file_to_clipped(
            self,
            file_dir,
            prompt_file_uri,
            base_clip_dir,
            sample_rate=16000,
            http_base_url="http://www.cis.upenn.edu/~tturpen/wavs/",
            init_clip_count=200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root, f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root, (out_uri))
                spkr_id = str(os.path.relpath(root, file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri, out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: " +
                                          str(e))

                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(
                        out_uri, disk_space, length_seconds, sample_rate,
                        spkr_id, encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir, spkr_id,
                                                  wav_filename)
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri, audio_clip_uri)
                    #http_url
                    http_url = os.path.join(http_base_url, spkr_id,
                                            wav_filename)
                    clip_id = self.mh.create_audio_clip_artifact(
                        source_id, 0, -1, audio_clip_uri, http_url,
                        length_seconds, disk_space)

                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id, clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(
                        clip_id, transcription_prompt, "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(
                        clip_id, transcription_id)

    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)" %
                  (worker["_id"], approved, denied))
            selection = input(
                "1. Show denied transcriptions and references.\n" +
                "2. Show accepted transcriptions and references.\n" +
                "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))

    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved:
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer / len(approved)
                print("%s,%s" % (len(approved), worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment / total_accepted
        print("Average WER per assignment(%s)" % (av))

    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                      "%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(
                    assignment["SubmitTime"], "%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted - accepted
            count += 1
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",
                                                     {"_id": hit_id}, "clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input(
                """Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """ % init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(
                    audio_file_dir,
                    prompt_file_uri,
                    base_clip_dir,
                    init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s" % self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()


#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)

#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
class ElicitationPipelineHandler(object):
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        try:
            self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        except Exception as e:
            print(e)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoElicitationHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.batch_cost = 1
        if self.balance > self.batch_cost:
            self.balance = self.batch_cost
        else:
            raise IOError
        self.logger = logging.getLogger(
            "transcription_engine.elicitation_pipeline_handler")

    def load_PromptSource_RawToList(self, prompt_file_uri):
        """Create the prompt artifacts from the source."""
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        disk_space = os.stat(prompt_file_uri).st_size
        source_id = self.mh.create_prompt_source_artifact(
            prompt_file_uri, disk_space, len(prompt_dict))
        normalizer = Normalize()
        for key in prompt_dict:
            prompt, line_number = prompt_dict[key]
            normalized_prompt = normalizer.rm_prompt_normalization(prompt)
            self.mh.create_prompt_artifact(source_id, prompt,
                                           normalized_prompt, line_number, key,
                                           len(prompt))

    def load_assignment_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments",
                                            {"_id": assignment.AssignmentId}):
                        #We create assignments here, so if we already have it, skip
                        continue
                        #pass
                    else:
                        have_all_assignments = False
                    recording_ids = []
                    prompt_id_tag = "prompt_id"
                    recording_url_tag = "recording_url"
                    worker_id_tag = "worker_id"
                    recording_dict = self.ah.get_assignment_submitted_text_dict(
                        assignment, prompt_id_tag, recording_url_tag)
                    worker_oid = self.mh.create_worker_artifact(
                        assignment.WorkerId)
                    zipcode = None
                    for recording in recording_dict:
                        if recording[prompt_id_tag] == "zipcode":
                            zipcode = recording[recording_url_tag]
                            continue
                        if not self.mh.get_artifact_by_id(
                                "prompts", recording[prompt_id_tag]):
                            self.logger.info("Assignment(%s) with unknown %s(%s) skipped"%\
                                        (assignment_id,prompt_id_tag,recording[prompt_id_tag]))
                            break
                        recording_id = self.mh.create_recording_source_artifact(
                            recording[prompt_id_tag],
                            recording[recording_url_tag],
                            recording[worker_id_tag])
                        if not recording_id:
                            self.mh.create_assignment_artifact(assignment,
                                                               recording_ids,
                                                               zipcode=zipcode,
                                                               incomplete=True)
                            break

                        self.mh.add_item_to_artifact_set(
                            "prompts", recording[prompt_id_tag],
                            "recording_sources", recording_id)
                        recording_ids.append(recording_id)
                    else:
                        self.mh.create_assignment_artifact(assignment,
                                                           recording_ids,
                                                           zipcode=zipcode)
                        self.mh.add_item_to_artifact_set(
                            "elicitation_hits", hit_id,
                            "submitted_assignments", assignment_id)
                        self.mh.add_item_to_artifact_set(
                            "workers", worker_oid, "submitted_assignments",
                            assignment_id)
                print("Elicitation HIT(%s) submitted assignments: %s " %
                      (hit_id, assignment_ids))

    def approve_assignment_submitted_to_approved(self):
        """Approve all submitted assignments"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        self.conn.approve_assignment(
                            assignment_id,
                            "Thank you for completing this assignment!")
                        self.mh.update_artifact_by_id(
                            "elicitation_assignments", assignment_id,
                            "approval_time", datetime.datetime.now())

    def approve_assignment_by_worker(self):
        """Approve all submitted assignments"""
        approval_comment = "Thank you for your recordings, good work, assignment approved!"
        denial_comment = "I'm sorry but your work was denied because %s"
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        assignment_artifact = self.mh.get_artifact(
                            "elicitation_assignments", {"_id": assignment_id})
                        recording_ids = assignment_artifact["recordings"]
                        worker = self.mh.get_artifact(
                            "workers",
                            {"eid": assignment_artifact["worker_id"]})
                        if worker["state"] == "Approved":
                            #If the worker is approved, approve the assignment automatically
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        elif worker["state"] == "Rejected":
                            self.conn.reject_assignment(
                                assignment_id, worker["rejection_reason"])
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        recording_uris = []
                        for recording_id in recording_ids:
                            uri = self.mh.get_artifact_by_id(
                                "recording_sources", recording_id,
                                "recording_uri")
                            recording_uris.append(uri)
                        command = ["gnome-mplayer"] + recording_uris
                        if len(recording_uris) > 0 and recording_uris[
                                0].endswith(" .wav") or recording_uris[
                                    0].endswith(".com.wav"):
                            continue
                        print("Calling: %s" % command)
                        call(command)
                        approve_assignment = raw_input(
                            "Approve assignment(y/n/s)?")
                        if approve_assignment == "s":
                            #skip
                            continue
                        elif approve_assignment == "y":
                            #accept the assignment
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            approve_worker = raw_input("Approve worker(y/n)?")
                            if approve_worker == "y":
                                #approve the worker and all future assignments
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"], "approval_time",
                                    datetime.datetime.now())
                        elif approve_assignment == "n":
                            #Reject the assignment
                            reject_worker = raw_input(
                                "Reject this worker's future work?")
                            if reject_worker == "y":
                                #Reject the worker
                                reason = raw_input(
                                    "Reason for rejecting this worker's future work:"
                                )
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"],
                                    "rejection_reason", reason)
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")
                            else:
                                reason = raw_input(
                                    "Why reject the assignment?")
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")

    def get_assignment_stats(self):
        effective_hourly_wage = self.effective_hourly_wage_for_approved_assignments(
            .20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "elicitation_assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                  "%Y-%m-%dT%H:%M:%SZ")
            submitted = datetime.datetime.strptime(assignment["SubmitTime"],
                                                   "%Y-%m-%dT%H:%M:%SZ")
            total += submitted - accepted
            count += 1
            #self.mh.update_artifact_by_id("elicitation_assignments", assignment["_id"], "SubmitTime", completion_time)
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def enqueue_prompts_and_generate_hits(self):
        prompts = self.mh.get_artifacts_by_state("prompts", "New")
        for prompt in prompts:
            self.mh.enqueue_prompt(prompt["_id"], 1, 5)
            prompt_queue = self.mh.get_prompt_queue()
            prompt_pairs = self.mh.get_prompt_pairs(prompt_queue)
            if prompt_pairs:
                hit_title = "Audio Elicitation"
                question_title = "Speak and Record your Voice"
                hit_description = "Speak the prompt and record your voice."
                keywords = "audio, elicitation, speech, recording"
                if cost_sensitive:
                    reward_per_clip = 0.04
                    max_assignments = 2
                    estimated_cost = self.hh.estimate_html_HIT_cost(prompt_pairs,reward_per_clip=reward_per_clip,\
                                                                    max_assignments=max_assignments)
                    prompts_in_hits = self.mh.prompts_already_in_hit(
                        prompt_pairs)
                    if prompts_in_hits:
                        #If one or more clips are already in a HIT, remove it from the queue
                        self.mh.remove_artifact_from_queue(prompts_in_hits)
                    elif self.balance - estimated_cost >= 0:
                        #if we have enough money, create the HIT
                        response = self.hh.make_html_elicitation_HIT(
                            prompt_pairs,
                            hit_title,
                            question_title,
                            keywords,
                            hit_description,
                            max_assignments=max_assignments,
                            reward_per_clip=reward_per_clip)
                        #                         response = self.hh.make_question_form_elicitation_HIT(prompt_pairs,hit_title,
                        #                                                      question_title, keywords)
                        self.balance = self.balance - estimated_cost
                        if type(response) == ResultSet and len(
                                response) == 1 and response[0].IsValid:
                            response = response[0]
                            self.mh.remove_artifacts_from_queue(
                                "prompt_queue", prompt_queue)
                            prompt_ids = [w["prompt_id"] for w in prompt_queue]
                            hit_id = response.HITId
                            hit_type_id = response.HITTypeId
                            self.mh.create_elicitation_hit_artifact(
                                hit_id, hit_type_id, prompt_ids)
                            self.mh.update_artifacts_by_id(
                                "prompts", prompt_ids, "hit_id", hit_id)
                            self.logger.info("Successfully created HIT: %s" %
                                             hit_id)
                    else:
                        return True
        print("Amount left in batch: %s out of %s" %
              (self.balance, self.batch_cost))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        selection = raw_input("Remove all hits with no assignments?")
        if selection == "y":
            for hit in hits:
                hit_id = hit.HITId
                assignments = self.conn.get_assignments(hit_id)
                if len(assignments) == 0:
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            return True
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        selection = 0
        #self.get_time_submitted_for_assignments()
        while selection != "8":
            selection = raw_input(
                """Prompt Source raw to Elicitations-Approved Pipeline:\n
                                     1: PromptSource-Load_RawToList: Load Resource Management 1 prompt source files to queueable prompts
                                     2: Prompt-ReferencedToHit: Queue all referenced prompts and create a HIT if the queue is full.
                                     3: Prompt-HitToAssignmentSubmitted: Check all submitted assignments for Elicitations and download elicitations.
                                     4: Maintain all assignments and hits.
                                     5: (WARNING, approves all assignments) Approve all submitted assignments.
                                     6: Calculate assignment stats.
                                     7: Hand approve submitted assignments by elicitation and/or by worker. 
                                     8: Exit
                                    """)
            if selection == "1":
                self.load_PromptSource_RawToList(prompt_file_uri)
            elif selection == "2":
                self.enqueue_prompts_and_generate_hits()
            elif selection == "3":
                self.load_assignment_hit_to_submitted()
            elif selection == "4":
                self.allhits_liveness()
            elif selection == "5":
                self.approve_assignment_submitted_to_approved()
            elif selection == "6":
                self.get_assignment_stats()
            elif selection == "7":
                self.approve_assignment_by_worker()
            else:
                selection = "8"


#    prompt_dict = self.ph.get_prompts(prompt_file_uri)

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("elicitation_assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("elicitation_assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
Exemplo n.º 35
0
class MTurk(object):
    def __init__(self, app=None):
        self.host = 'https://mechanicalturk.sandbox.amazonaws.com'
        self.secret_key = None
        self.access_id = None
        self.app = app

        if app is not None:
            self.init_app(app)

    def init_app(self, app):
        app.config.setdefault('MTURK_SECRET_KEY', None)
        app.config.setdefault('MTURK_ACCESS_ID', None)
        app.config.setdefault('MTURK_SANDBOX', True)
        self.update_credentials(app.config['MTURK_ACCESS_ID'],
                                app.config['MTURK_SECRET_KEY'])
        self.is_sandbox = app.config['MTURK_SANDBOX']
        self.valid_login = self.verify_aws_login()

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def verify_aws_login(self):
        if ((self.aws_secret_access_key is None)
                or (self.aws_access_key_id is None)):
            logging.warning('No AWS keys found in app configuration')
        else:
            host = 'mechanicalturk.amazonaws.com'
            params = dict(aws_access_key_id=self.aws_access_key_id,
                          aws_secret_access_key=self.aws_secret_access_key,
                          host=host)
            self.mtc = MTurkConnection(**params)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as e:
                return dict(success=False, message=e.error_message)
            else:
                return True

    def connect_to_turk(self):
        if not self.valid_login:
            logging.warning(
                'Sorry, unable to connect to Amazon Mechanical Turk. Please check your credentials'
            )
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(aws_access_key_id=self.aws_access_key_id,
                           aws_secret_access_key=self.aws_secret_access_key,
                           host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def get_account_balance(self):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            balance = self.mtc.get_account_balance()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        else:
            return balance

    def get_reviewable_hits(self):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

        reviewable_hits = [
            hit for hit in hits
            if hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing"
        ]

        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration
            }) for hit in reviewable_hits
        ]

        return hits_data

    def get_all_hits(self):
        """ Get all HITs """
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in hits
        ]
        return hits_data

    def get_active_hits(self):
        """ Get active HITs """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        active_hits = [hit for hit in hits if not hit.expired]
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in active_hits
        ]
        return hits_data

    def get_hit(self, hit_id, response_groups=None):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hit = self.mtc.get_hit(hit_id, response_groups)[0]
        except MTurkRequestError as e:
            return False
        return hit

    def get_workers(self, assignment_status=None):
        """ Get workers """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hits = self.mtc.search_hits(sort_direction='Descending',
                                        page_size=20)
        except MTurkRequestError as e:
            return False
        hit_ids = [hit.HITId for hit in hits]
        workers_nested = [
            self.mtc.get_assignments(hit_id,
                                     status=assignment_status,
                                     sort_by='SubmitTime',
                                     page_size=100) for hit_id in hit_ids
        ]

        workers = [val for subl in workers_nested
                   for val in subl]  # Flatten nested lists

        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus,
            'completion_code': worker.answers[0][0].fields[0]
        } for worker in workers]
        return worker_data

    def bonus_worker(self, assignment_id, amount, reason=""):
        """ Bonus worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            worker_id = assignment.WorkerId
            self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def approve_worker(self, assignment_id, feedback=None):
        """ Approve worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.approve_assignment(assignment_id, feedback=feedback)
            return True
        except MTurkRequestError as e:
            return False

    def reject_worker(self, assignment_id):
        """ Reject worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def unreject_worker(self, assignment_id):
        """ Unreject worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def assign_qualification(self,
                             qualification_type_id,
                             worker_id,
                             value=1,
                             send_notification=True):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.assign_qualification(qualification_type_id, worker_id,
                                          value, send_notification)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def revoke_qualification(self,
                             subject_id,
                             qualification_type_id,
                             reason=None):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.revoke_qualification(subject_id, qualification_type_id,
                                          reason)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def notify_worker(self, worker_id, subject, message_text):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.notify_workers(worker_id, subject, message_text)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def list_workers_with_qualification(self, qualification_type_id):
        if not self.connect_to_turk():
            return False
        try:
            workers = self.mtc.get_all_qualifications_for_qual_type(
                qualification_type_id)
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        workers = [w.SubjectId for w in workers]
        return workers
Exemplo n.º 36
0
class mTurk():
    ACCESS_ID =''
    SECRET_KEY = ''
    HOST = ''
    mtc = None
    log=None
    def __init__(self,access_id,secret_key,debug=True):
        self.ACCESS_ID=access_id
        self.SECRET_KEY=secret_key
        if debug:
            self.HOST='mechanicalturk.sandbox.amazonaws.com'
        else:
            raise Exception('sure you want to spend money')
        
        self.mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                              aws_secret_access_key=self.SECRET_KEY,
                              host=self.HOST)
        
    def getBalance(self):
        return self.mtc.get_account_balance()
    
    def hasEnoughMoney(self,cost):
        return cost<self.getBalance()
    
    
    def approveAssignemnt(self,a_id,feedback="Thanks for contribution"):
        assignment= self.mtc.get_assignment(a_id)[0]
        log.debug("%s",assignment.__dict__)

        if assignment.AssignmentStatus!="Approved":
            ret = self.mtc.approve_assignment(a_id, feedback)
            log.debug("approve ret: %s",ret)
        else:
            log.debug("already approvred")
            
    def rejectAssigment(self,a_id,feedback="Sorry, but your work was not satifactory"):
        ret = self.mtc.reject_assignment(a_id, feedback)
        log.debug("reject ret: %s",ret)

    
    def createExternalQuestion(self,title,description,keywords,url,duration,reward):
        ex_q=ExternalQuestion(url,1000)  
        res=self.mtc.create_hit(question=ex_q,max_assignments=1,title=title,description=description,keywords=keywords,duration = duration,reward=reward)   
        log.debug("created external question %s",res[0])
        return res[0]
    
    def getDataFromHit(self, assignmentId, workerId):
        assignment= self.mtc.get_assignment(assignmentId)[0]
        log.debug("Answers of the worker %s",  assignment.WorkerId)
        if assignment.WorkerId == workerId: 
            ret = {}
            for question_form_answer in assignment.answers[0]:
                if len(question_form_answer.fields)>1:
                    log.debug("answers are >1")
                    ret[question_form_answer.qid]=question_form_answer.fields
                else:
                    ret[question_form_answer.qid]=question_form_answer.fields[0]
                
        return ret
#                print question_form_answer.qid," - "," ".join(question_form_answer.fields)           
#        # for r in res:        
#            # print "Your hit ID is this %s -> https://workersandbox.mturk.com/mturk/preview?groupId=%s"%(r.HITId,r.HITTypeId))
#        
#    # def printAllHits(self):
#    #        hits=self.mtc.get_all_hits()
#    #        for hit in hits:
#    #            print printAtt(hit,'HITId')            
#            
#  #   def rejectAllHITs(self):
#  #         hits=self.mtc.get_all_hits()
#  #         for hit in hits:
#  #             assignements = self.mtc.get_assignments(hit.HITId)
#  #             for assignment in assignements:
#  #                 try:
#  #                     self.mtc.reject_assignment(assignment.AssignmentId, "i'm just testing this functionality")
#  #                     print "Rejected the assignment %s of HIT %s"%(assignment.AssignmentId,hit.HITId)
#  #                 except Exception:
#  #                     print "ERROR with the assignment %s of HIT %s"%(assignment.AssignmentId,hit.HITId)
#  # #           ret = self.mtc.disable_hit(hit.HITId, "HITDetail")
##            print ret   
Exemplo n.º 37
0
frameheight = 600
mturkQuestion = ExternalQuestion( experimentPortalURL, 600 )

# Qualification:
quals = Qualifications();
quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", "95") )
quals.add( LocaleRequirement("EqualTo", "US") )
#quals.add( NumberHitsApprovedRequirement("GreaterThanOrEqualTo", "100") )

# Specify all the HIT parameters
paramdict = dict(
    hit_type = None,
    question = mturkQuestion,
    lifetime = datetime.timedelta(1),  # How long the HIT will be available
    max_assignments = 50, # Total times it will be assigned, not max per turker
    title = "Paid volunteers needed for an online experiment in Psychology",
    description = "Learn to categorize a set of cards over a series of training trials.",
    keywords = "New York University, psychology experiment, category learning",
    reward = 0.75,
    duration = datetime.timedelta(hours=2),
    approval_delay = None,
    annotation = None,  # Do we need this? Not clear on what it is.
    questions = None,
    qualifications = quals
)

myhit = mtc.create_hit(**paramdict )[0]
hitid = myhit.HITId

print mtc.get_account_balance()  # Tests the connection
Exemplo n.º 38
0
from boto.mturk.connection import MTurkConnection
from boto.mturk.question import QuestionContent, Question, QuestionForm, Overview, AnswerSpecification, SelectionAnswer, FormattedContent, FreeTextAnswer
from keys import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY

debug = True

HOST = 'mechanicalturk.sandbox.amazonaws.com'

PIN_IMAGE_URL = 'http://media-cache-ak0.pinimg.com/236x/17/8f/99/178f993435fb2718ab6e22ba29d704e0.jpg'
PIN_IMAGE_TITLE = 'Arnhem Clothing \'Song Bird\' Kimono in Mayan Song. Via Soleilblue..'

mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                      aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                      host=HOST)

if debug: print mtc.get_account_balance()

title = 'Match these Pictures to Macy\'s Products'
description = 'Look at this photo and match it to Macy\'s products'
keywords = 'clothing, rating, opinions, easy, quick, macys'

#make overview

overview = Overview()
overview.append_field('Title',
                      'Find three Macys.com Product Web IDs That Match')
overview.append(
    FormattedContent('<img src="' + PIN_IMAGE_URL + '" alt="Pintrest Image" />'
                     '<br />' + PIN_IMAGE_TITLE))

#make webid1
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        
        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn,TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger("transcription_engine.transcription_pipeline_handler")
        
    def audio_clip_referenced_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts_by_state("audio_clips","Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts("audio_clips",{"state":"Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    
    def audio_clip_queue_to_hit(self,cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe" 
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(clip_pairs,reward_per_clip,max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(clip_pairs,hit_title,
                                                 question_title, description, keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [w["audio_clip_id"] for w in clip_queue]    
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(hit_id,hit_type_id,clip_queue,"New")        
                        self.logger.info("Successfully created HIT: %s"%hit_id)
                        return self.mh.update_audio_clips_state(audio_clip_ids,"Hit")
                else:
                    pass
        return False
            
    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)  
                if self.mh.get_artifact("assignments",{"_id":assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue   
                else:
                    have_all_assignments = False                                         
                transcription_ids = []                
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(assignment)   
                if transcription_dicts and len(transcription_dicts)==10:
                    pass             
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"]): 
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break 
                    self.mh.update_transcription_state(transcription,"Submitted")
                    self.mh.update_audio_clips_state([transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(self.mh.get_artifact("transcriptions",{"audio_clip_id" : transcription["audio_clip_id"],
                                                                        "assignment_id" : transcription["assignment_id"]},
                                                                       "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                   transcription_ids,
                                                   "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id,"Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s "%(hit_id,assignment_ids))
            
    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments", "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions","_id",transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)
            
            approved, average_wer  = self.filter.approve_assignment(transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(assignment_id, accepted_feedback%(assignment_id,average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment,"Approved")    
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"],"reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(transcription,"Approved")                                          
                    print("Approved transcription ids: %s"%transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback%(assignment_id,self.filter.WER_THRESHOLD,self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id,feedback)
                self.mh.update_assignment_state(assignment,"Denied")    
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
            
    def _load_rm_audio_source_file_to_clipped(self,file_dir,prompt_file_uri,
                                                   base_clip_dir,sample_rate=16000,
                                                   http_base_url = "http://www.cis.upenn.edu/~tturpen/wavs/",
                                                   init_clip_count = 200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root,f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root,(out_uri))
                spkr_id = str(os.path.relpath(root,file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri,out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: "+str(e))
                        
                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:                        
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(out_uri,
                                                         disk_space,
                                                         length_seconds,
                                                         sample_rate,
                                                         spkr_id,
                                                         encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir,spkr_id,wav_filename)                    
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri,audio_clip_uri)     
                    #http_url
                    http_url = os.path.join(http_base_url,spkr_id,wav_filename)                   
                    clip_id = self.mh.create_audio_clip_artifact(source_id,
                                                       0,
                                                       -1,
                                                       audio_clip_uri,
                                                       http_url,
                                                       length_seconds,
                                                       disk_space)
                    
                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id,clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(clip_id,
                                                                                       transcription_prompt,
                                                                                       "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(clip_id,transcription_id)                    
        
    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)"%(worker["_id"],approved,denied))
            selection = input("1. Show denied transcriptions and references.\n"+
                                    "2. Show accepted transcriptions and references.\n"+
                                    "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            
    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved: 
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer/len(approved)
                print("%s,%s"%(len(approved),worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment/total_accepted
        print("Average WER per assignment(%s)"%(av))
        
    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)                    
    
    def effective_hourly_wage_for_approved_assignments(self,reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""        
        approved_assignments = self.mh.get_artifacts_by_state("assignments","Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],"%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(assignment["SubmitTime"],"%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted-accepted
            count += 1
        seconds_per_assignment = total.total_seconds()/count
        effective_hourly_wage = 60.0*60.0/seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s"%(seconds_per_assignment,reward_per_assignment,effective_hourly_wage))        
        
    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId            
            print("HIT ID: %s"%hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)") == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",{"_id": hit_id},"clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)"%len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e
                    
    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input("""Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """%init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(audio_file_dir,
                                                       prompt_file_uri,
                                                       base_clip_dir,init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s"%self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
                    
#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})        
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#             
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
Exemplo n.º 40
0
#HIT="2JNL8I9NZW6HG96GKYHWCT87ATCVL9"
#HOST = 'mechanicalturk.amazonaws.com'

# https://mechanicalturk.amazonaws.com/?Service=AWSMechanicalTurkRequester
if SAND == 1:
    print "sandbox"
    HOST = 'mechanicalturk.sandbox.amazonaws.com'
else:
    print "not Sandbox"
    HOST = 'mechanicalturk.amazonaws.com'

conn = MTurkConnection(aws_access_key_id=ACCESS_ID,
                       aws_secret_access_key=SECRET_KEY,
                       host=HOST)

print conn.get_account_balance()


def accept_and_pay(worker_id, assign_id, bonus_price, reason):
    """pays for assignment; returns False if something went wrong, else True"""
    try:
        result = conn.approve_assignment(assign_id)
    except MTurkRequestError:
        print "Couldn't pay"
        #TODO: make sure to avoid the possibility of paying the same bonus twice
    try:
        if bonus_price > 0:
            conn.grant_bonus(worker_id, assign_id, Price(amount=bonus_price),
                             reason)
    except MTurkRequestError:
        #TODO: less embarrasing error handling
Exemplo n.º 41
0
def Bank():
    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                          aws_secret_access_key=SECRET_KEY,
                          host=HOST)
    print 'Funds: '
    print mtc.get_account_balance()
Exemplo n.º 42
0
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""
        Gets account balance at MTURK.

        The program reads the default config in the resources directory
        ('../resources/private/mturk.cfg') and any additional config files passed as
        an argument of a '-c'. The additional config file overwrites any
        default or previous values.

        Remember that the "private" directory is not distributed and it contains
        mainly information that you do not want to share.
      """)

    parser.add_argument('-c', "--configs", nargs='+',
                        help='additional configuration files')
    args = parser.parse_args()

    mturk_cfg_fname = as_project_path('resources/private/mturk.cfg')
    cfg = Config.load_configs([mturk_cfg_fname] + args.configs, log=False)

    print "Gets MTURK account balance"
    print "-" * 120
    print

    conn = MTurkConnection(aws_access_key_id = cfg['MTURK']['aws_access_key_id'],
                           aws_secret_access_key = cfg['MTURK']['aws_secret_access_key'],
                           host = cfg['MTURK']['host'])

    print "Account balance:", conn.get_account_balance()
Exemplo n.º 43
0
        Gets account balance at MTURK.

        The program reads the default config in the resources directory
        ('../resources/private/mturk.cfg') and any additional config files passed as
        an argument of a '-c'. The additional config file overwrites any
        default or previous values.

        Remember that the "private" directory is not distributed and it contains
        mainly information that you do not want to share.
      """)

    parser.add_argument('-c',
                        "--configs",
                        nargs='+',
                        help='additional configuration files')
    args = parser.parse_args()

    mturk_cfg_fname = as_project_path('resources/private/mturk.cfg')
    cfg = Config.load_configs([mturk_cfg_fname] + args.configs, log=False)

    print "Gets MTURK account balance"
    print "-" * 120
    print

    conn = MTurkConnection(
        aws_access_key_id=cfg['MTURK']['aws_access_key_id'],
        aws_secret_access_key=cfg['MTURK']['aws_secret_access_key'],
        host=cfg['MTURK']['host'])

    print "Account balance:", conn.get_account_balance()
Exemplo n.º 44
0
class MTurkServices(object):
    ''' MTurk services '''
    def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox):
        self.update_credentials(aws_access_key_id, aws_secret_access_key)
        self.set_sandbox(is_sandbox)
        self.valid_login = self.verify_aws_login()

        if not self.valid_login:
            print 'WARNING *****************************'
            print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\
                  'test experiments locally until you enter\nvalid '\
                  'credentials in the AWS Access section of ~/.psiturkconfig\n'

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        ''' Update credentials '''
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def set_sandbox(self, is_sandbox):
        ''' Set sandbox '''
        self.is_sandbox = is_sandbox

    def get_reviewable_hits(self):
        ''' Get reviewable HITs '''
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        reviewable_hits = [hit for hit in hits if hit.HITStatus == "Reviewable" \
                           or hit.HITStatus == "Reviewing"]
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration
        }) for hit in reviewable_hits]
        return hits_data

    def get_all_hits(self):
        ''' Get all HITs '''
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration,
            }) for hit in hits]
        return hits_data

    def get_active_hits(self):
        ''' Get active HITs '''
        if not self.connect_to_turk():
            return False
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return False
        active_hits = [hit for hit in hits if not hit.expired]
        hits_data = [MTurkHIT({
            'hitid': hit.HITId,
            'title': hit.Title,
            'status': hit.HITStatus,
            'max_assignments': hit.MaxAssignments,
            'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
            'number_assignments_pending': hit.NumberOfAssignmentsPending,
            'number_assignments_available': hit.NumberOfAssignmentsAvailable,
            'creation_time': hit.CreationTime,
            'expiration': hit.Expiration,
            }) for hit in active_hits]
        return hits_data

    def get_workers(self, assignment_status=None):
        ''' Get workers '''
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
            hit_ids = [hit.HITId for hit in hits]
           
            workers_nested = []
            page_size=100
            for hit_id in hit_ids:
                current_page_number=1
                hit_assignments = self.mtc.get_assignments(
                    hit_id,
                    status=assignment_status,
                    sort_by='SubmitTime',
                    page_size=page_size,
                    page_number=current_page_number
                )

                totalNumResults = int(hit_assignments.TotalNumResults)
                total_pages = (totalNumResults // page_size) + (totalNumResults % page_size > 0) #do integer division then round up if necessary

                while current_page_number < total_pages:
                    current_page_number += 1
                    hit_assignments += self.mtc.get_assignments(
                        hit_id,
                        status=assignment_status,
                        sort_by='SubmitTime',
                        page_size=page_size,
                        page_number=current_page_number
                    )

                workers_nested.append(hit_assignments)

            workers = [val for subl in workers_nested for val in subl]  # Flatten nested lists
        except MTurkRequestError:
            return False
        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus
        } for worker in workers]
        return worker_data

    def bonus_worker(self, assignment_id, amount, reason=""):
        ''' Bonus worker '''
        if not self.connect_to_turk():
            return False
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            worker_id = assignment.WorkerId
            self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as exception:
            print exception
            return False

    def approve_worker(self, assignment_id):
        ''' Approve worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return False

    def reject_worker(self, assignment_id):
        ''' Reject worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError:
            return False

    def unreject_worker(self, assignment_id):
        ''' Unreject worker '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError:
            return False

    def verify_aws_login(self):
        ''' Verify AWS login '''
        if ((self.aws_access_key_id == 'YourAccessKeyId') or
                (self.aws_secret_access_key == 'YourSecretAccessKey')):
            return False
        else:
            host = 'mechanicalturk.amazonaws.com'
            mturkparams = dict(
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                host=host)
            self.mtc = MTurkConnection(**mturkparams)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as exception:
                print exception.error_message
                return False
            else:
                return True

    def connect_to_turk(self):
        ''' Connect to turk '''
        if not self.valid_login:
            print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS '\
                  'credentials invalid.'
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def configure_hit(self, hit_config):
        ''' Configure HIT '''
        # configure question_url based on the id
        experiment_portal_url = hit_config['ad_location']
        frame_height = 600
        mturk_question = ExternalQuestion(experiment_portal_url, frame_height)

        # Qualification:
        quals = Qualifications()
        approve_requirement = hit_config['approve_requirement']
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))

        if hit_config['us_only']:
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Create a HIT type for this HIT.
        hit_type = self.mtc.register_hit_type(
            hit_config['title'],
            hit_config['description'],
            hit_config['reward'],
            hit_config['duration'],
            keywords=hit_config['keywords'],
            approval_delay=None,
            qual_req=None)[0]

        # Check the config file to see if notifications are wanted.
        config = PsiturkConfig()
        config.load_config()

        try:
            url = config.get('Server Parameters', 'notification_url')

            all_event_types = [
                "AssignmentAccepted",
                "AssignmentAbandoned",
                "AssignmentReturned",
                "AssignmentSubmitted",
                "HITReviewable",
                "HITExpired",
            ]

            self.mtc.set_rest_notification(
                hit_type.HITTypeId,
                url,
                event_types=all_event_types)

        except:
            pass

        # Specify all the HIT parameters
        self.param_dict = dict(
            hit_type=hit_type.HITTypeId,
            question=mturk_question,
            lifetime=hit_config['lifetime'],
            max_assignments=hit_config['max_assignments'],
            title=hit_config['title'],
            description=hit_config['description'],
            keywords=hit_config['keywords'],
            reward=hit_config['reward'],
            duration=hit_config['duration'],
            approval_delay=None,
            questions=None,
            qualifications=quals,
            response_groups=[
                'Minimal',
                'HITDetail',
                'HITQuestion',
                'HITAssignmentSummary'
            ])

    def check_balance(self):
        ''' Check balance '''
        if not self.connect_to_turk():
            return '-'
        return self.mtc.get_account_balance()[0]

    # TODO (if valid AWS credentials haven't been provided then
    # connect_to_turk() will fail, not error checking here and elsewhere)
    def create_hit(self, hit_config):
        ''' Create HIT '''
        try:
            if not self.connect_to_turk():
                return False
            self.configure_hit(hit_config)
            myhit = self.mtc.create_hit(**self.param_dict)[0]
            self.hitid = myhit.HITId
        except:
            return False
        else:
            return self.hitid

    # TODO(Jay): Have a wrapper around functions that serializes them.
    # Default output should not be serialized.
    def expire_hit(self, hitid):
        ''' Expire HIT '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.expire_hit(hitid)
            return True
        except MTurkRequestError:
            print "Failed to expire HIT. Please check the ID and try again."
            return False

    def dispose_hit(self, hitid):
        ''' Dispose HIT '''
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.dispose_hit(hitid)
        except Exception, e:
            print "Failed to dispose of HIT %s. Make sure there are no "\
                "assignments remaining to be reviewed." % hitid
Exemplo n.º 45
0
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST
        )
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in ['location', 'approval_rate', 'total_tasks']:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [val.strip() for val in item.expression['value'] if
                                                          val is not None and val != ''])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(project.qualification)
        boomerang_qual, success = self.create_qualification_type(owner_id=project.owner_id,
                                                                 project_id=project.group_id,
                                                                 name='Boomerang Score #{}'.format(project.group_id),
                                                                 flag=FLAG_Q_BOOMERANG,
                                                                 description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(qualification_type_id=boomerang_blacklist.type_id,
                                                         comparator=OP_DNE,
                                                         integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(qualification_type_id=boomerang_qual.type_id, comparator=OP_GTEQ,
                                             integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(query, {'skipped': TaskWorker.STATUS_SKIPPED,
                                   'rejected': TaskWorker.STATUS_REJECTED,
                                   'expired': TaskWorker.STATUS_EXPIRED,
                                   'project_id': project.id})
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(hours=24)
        lifetime = project.deadline - timezone.now() if project.deadline is not None else datetime.timedelta(
            days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(project=project,
                                                                     boomerang_threshold=int(
                                                                         round(task[4], 2) * 100),
                                                                     add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(title=project.name, description=self.description,
                                                     price=project.price,
                                                     duration=duration, keywords=self.keywords,
                                                     approval_delay=datetime.timedelta(days=2),
                                                     qual_req=qualifications,
                                                     qualifications_mask=qualifications_mask,
                                                     boomerang_threshold=int(round(task[4], 2) * 100),
                                                     owner_id=project.owner_id, boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(hit_type=hit_type.string_id,
                                                     max_assignments=task[3],
                                                     lifetime=lifetime,
                                                     question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId, hit_type=hit_type, task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail": "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot', users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(hit_id=mturk_hit.hit_id,
                                                                  hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self, owner_id, title, description, price, duration, boomerang_threshold, keywords=None,
                        approval_delay=None, qual_req=None,
                        qualifications_mask=0, boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(owner_id=owner_id, name=title, description=description,
                                               price=Decimal(str(price)),
                                               duration=duration,
                                               qualifications_mask=qualifications_mask,
                                               boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(title=title, description=description, reward=reward,
                                                         duration=duration, keywords=keywords,
                                                         approval_delay=approval_delay,
                                                         qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id, name=title, description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords, duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY, min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url, frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist("This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[TaskWorker.STATUS_REJECTED,
                                                                        TaskWorker.STATUS_SKIPPED,
                                                                        TaskWorker.STATUS_EXPIRED])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(hit_type=hit_type_id,
                                              url=self.host + '/api/mturk/notification',
                                              event_types=['AssignmentReturned', 'AssignmentAbandoned',
                                                           'AssignmentAccepted', 'AssignmentSubmitted'])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id, expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id, assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self, owner_id, name, flag, description, project_id, auto_granted=False,
                                  auto_granted_value=None, deny=False, bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER, 'origin_id': owner_id, 'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({'upper_bound': bucket[1] * 100, 'lower_bound': bucket[0] * 100, 'is_blacklist': True})
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{"worker_id": r[0], "worker_username": r[1], "rating": r[2]} for
                          r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id, flag=flag, name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(owner_id=owner_id, flag=flag, name=name,
                                                                  description=description,
                                                                  auto_granted=auto_granted,
                                                                  auto_granted_value=auto_granted_value,
                                                                  type_id=qualification_type.QualificationTypeId,
                                                                  **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values('worker').filter(
                qualification=qualification).values_list('worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id, worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(qualification=qualification,
                                                                  worker=mturk_worker_id,
                                                                  defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg, requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related('hit_type__boomerang_qualification').filter(
            task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(qualification=qualification,
                                                                  worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual, score=int(task_avg * 100), override=True)
            else:
                MTurkWorkerQualification.objects.create(qualification=qualification, worker=worker_id,
                                                        score=int(task_avg * 100), overwritten=True)
                self.assign_qualification(qualification_type_id=qualification.type_id, worker_id=worker_id,
                                          value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id,
                             value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id, worker_id,
                                                 value, send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(qualification_type_id=qualification_type_id, subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
Exemplo n.º 46
0
debug = True

HOST = 'mechanicalturk.sandbox.amazonaws.com'

MACYS_IMAGE_URL = 'http://i.imgur.com/1f7moUz.jpg'
MACYS_IMAGE_TITLE = 'Material Girl Juniors Floral-Print Illusion Dress'

PIN_IMAGE_URL = 'http://media-cache-ec0.pinimg.com/736x/de/21/31/de2131cb6d0fa52faaee161047194896.jpg'
PIN_IMAGE_TITLE = 'Floral print maxi dress by Marc Jacobs.'

mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                      aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                      host=HOST)
                      
if debug: print mtc.get_account_balance()

title = 'Match these Pictures to Macy\'s Products'
description = 'Look at this photo and match it to Macy\'s products'
keywords = 'clothing, rating, opinions, easy, quick, macys'

ratings =[('Very Bad','1'),
         ('Bad','2'),
         ('OK','3'),
         ('Good','4'),
         ('Very Good','5')]
 
#make overview
 
overview = Overview()
overview.append_field('Title', 'Rank how these two images match.')
Exemplo n.º 47
0
class MTurkServices:
    def __init__(self, config):
        self.config = config

    def get_active_hits(self):
        self.connect_to_turk()
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError:
            return(False)
        active_hits = [hit for hit in hits if not(hit.expired)]
        hits_data = [{'hitid': hit.HITId,
                      'title': hit.Title,
                      'status': hit.HITStatus,
                      'max_assignments': hit.MaxAssignments,
                      'number_assignments_completed': hit.NumberOfAssignmentsCompleted,
                      'number_assignments_pending': hit.NumberOfAssignmentsCompleted,
                      'number_assignments_available': hit.NumberOfAssignmentsAvailable,
                      'creation_time': hit.CreationTime,
                      'expiration': hit.Expiration,
                      } for hit in active_hits]
        return(hits_data)

    def get_workers(self):
        self.connect_to_turk()
        try:
            hits = self.mtc.search_hits(sort_direction='Descending', page_size=20)
            hit_ids = [hit.HITId for hit in hits]
            workers_nested = [self.mtc.get_assignments(
                                hit_id,
                                status="Submitted",
                                sort_by='SubmitTime',
                                page_size=100
                              ) for hit_id in hit_ids]

            workers = [val for subl in workers_nested for val in subl]  # Flatten nested lists
        except MTurkRequestError:
            return(False)
        completed_workers = [worker for worker in workers if worker.AssignmentStatus == "Submitted"]
        worker_data = [{'hitId': worker.HITId,
                        'assignmentId': worker.AssignmentId,
                        'workerId': worker.WorkerId,
                        'submit_time': worker.SubmitTime,
                        'accept_time': worker.AcceptTime
                       } for worker in completed_workers]
        return(worker_data)

    def approve_worker(self, assignment_id):
        self.connect_to_turk()
        try:
            self.mtc.approve_assignment(assignment_id, feedback=None)
        except MTurkRequestError:
            return(False)

    def reject_worker(self, assignment_id):
        self.connect_to_turk()
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
        except MTurkRequestError:
            return(False)

    def verify_aws_login(self, key_id, secret_key):
        is_sandbox = self.config.getboolean('HIT Configuration', 'using_sandbox')
        if is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'
        mturkparams = dict(
            aws_access_key_id=key_id,
            aws_secret_access_key=secret_key,
            host=host)
        self.mtc = MTurkConnection(**mturkparams)
        try:
            self.mtc.get_account_balance()
        except MTurkRequestError as e:
            print(e.error_message)
            print('AWS Credentials invalid')
            return 0
        else:
            print('AWS Credentials valid')
            return 1

    def connect_to_turk(self):
        is_sandbox = self.config.getboolean('HIT Configuration', 'using_sandbox')
        if is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'
        
        mturkparams = dict(
            aws_access_key_id = self.config.get('AWS Access', 'aws_access_key_id'),
            aws_secret_access_key = self.config.get('AWS Access', 'aws_secret_access_key'),
            host=host)
        self.mtc = MTurkConnection(**mturkparams)
        
    def configure_hit(self):

        # Configure portal
        experimentPortalURL = self.config.get('HIT Configuration', 'question_url')
        frameheight = 600
        mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight)

        # Qualification:
        quals = Qualifications()
        approve_requirement = self.config.get('HIT Configuration', 'Approve_Requirement')
        quals.add(
            PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo",
                                                  approve_requirement))
        if self.config.getboolean('HIT Configuration', 'US_only'):
            quals.add(LocaleRequirement("EqualTo", "US"))

        # Specify all the HIT parameters
        self.paramdict = dict(
            hit_type = None,
            question = mturkQuestion,
            lifetime = datetime.timedelta(hours=self.config.getfloat('HIT Configuration', 'HIT_lifetime')),
            max_assignments = self.config.getint('HIT Configuration', 'max_assignments'),
            title = self.config.get('HIT Configuration', 'title'),
            description = self.config.get('HIT Configuration', 'description'),
            keywords = self.config.get('HIT Configuration', 'keywords'),
            reward = self.config.getfloat('HIT Configuration', 'reward'),
            duration = datetime.timedelta(hours=self.config.getfloat('HIT Configuration', 'duration')),
            approval_delay = None,
            questions = None,
            qualifications = quals
        )
    
    def is_signed_up(self):
        access_key_id = self.config.get('AWS Access', 'aws_access_key_id')
        access_key = self.config.get('AWS Access', 'aws_secret_access_key')
        return (access_key_id != 'YourAccessKeyId') and \
               (access_key != 'YourSecreteAccessKey')

    def check_balance(self):
        if self.is_signed_up():
            self.connect_to_turk()
            return(self.mtc.get_account_balance()[0])
        else:
            return('-')

    # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will
    # fail, not error checking here and elsewhere)
    def create_hit(self):
        self.connect_to_turk()
        self.configure_hit()
        myhit = self.mtc.create_hit(**self.paramdict)[0]
        self.hitid = myhit.HITId

    # TODO(Jay): Have a wrapper around functions that serializes them. 
    # Default output should not be serialized.
    def expire_hit(self, hitid):
        self.connect_to_turk()
        self.mtc.expire_hit(hitid)

    def extend_hit(self, hitid, assignments_increment=None, expiration_increment=None):
        self.connect_to_turk()
        self.mtc.extend_hit(hitid, assignments_increment=int(assignments_increment))
        self.mtc.extend_hit(hitid, expiration_increment=int(expiration_increment)*60)

    def get_summary(self):
      try:
          balance = self.check_balance()
          summary = jsonify(balance=str(balance))
          return(summary)
      except MTurkRequestError as e:
          print(e.error_message)
          return(False)